Home | History | Annotate | Download | only in svga
      1 /**********************************************************
      2  * Copyright 1998-2013 VMware, Inc.  All rights reserved.
      3  *
      4  * Permission is hereby granted, free of charge, to any person
      5  * obtaining a copy of this software and associated documentation
      6  * files (the "Software"), to deal in the Software without
      7  * restriction, including without limitation the rights to use, copy,
      8  * modify, merge, publish, distribute, sublicense, and/or sell copies
      9  * of the Software, and to permit persons to whom the Software is
     10  * furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be
     13  * included in all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     22  * SOFTWARE.
     23  *
     24  **********************************************************/
     25 
     26 /**
     27  * @file svga_tgsi_vgpu10.c
     28  *
     29  * TGSI -> VGPU10 shader translation.
     30  *
     31  * \author Mingcheng Chen
     32  * \author Brian Paul
     33  */
     34 
     35 #include "pipe/p_compiler.h"
     36 #include "pipe/p_shader_tokens.h"
     37 #include "pipe/p_defines.h"
     38 #include "tgsi/tgsi_build.h"
     39 #include "tgsi/tgsi_dump.h"
     40 #include "tgsi/tgsi_info.h"
     41 #include "tgsi/tgsi_parse.h"
     42 #include "tgsi/tgsi_scan.h"
     43 #include "tgsi/tgsi_two_side.h"
     44 #include "tgsi/tgsi_aa_point.h"
     45 #include "tgsi/tgsi_util.h"
     46 #include "util/u_math.h"
     47 #include "util/u_memory.h"
     48 #include "util/u_bitmask.h"
     49 #include "util/u_debug.h"
     50 #include "util/u_pstipple.h"
     51 
     52 #include "svga_context.h"
     53 #include "svga_debug.h"
     54 #include "svga_link.h"
     55 #include "svga_shader.h"
     56 #include "svga_tgsi.h"
     57 
     58 #include "VGPU10ShaderTokens.h"
     59 
     60 
     61 #define INVALID_INDEX 99999
     62 #define MAX_INTERNAL_TEMPS 3
     63 #define MAX_SYSTEM_VALUES 4
     64 #define MAX_IMMEDIATE_COUNT \
     65         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
     66 #define MAX_TEMP_ARRAYS 64  /* Enough? */
     67 
     68 
     69 /**
     70  * Clipping is complicated.  There's four different cases which we
     71  * handle during VS/GS shader translation:
     72  */
     73 enum clipping_mode
     74 {
     75    CLIP_NONE,     /**< No clipping enabled */
     76    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
     77                    * one or more user-defined clip planes are enabled.  We
     78                    * generate extra code to emit clip distances.
     79                    */
     80    CLIP_DISTANCE, /**< The shader already declares clip distance output
     81                    * registers and has code to write to them.
     82                    */
     83    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
     84                   * has code that writes to the register.  We convert the
     85                   * clipvertex position into one or more clip distances.
     86                   */
     87 };
     88 
     89 
     90 struct svga_shader_emitter_v10
     91 {
     92    /* The token output buffer */
     93    unsigned size;
     94    char *buf;
     95    char *ptr;
     96 
     97    /* Information about the shader and state (does not change) */
     98    struct svga_compile_key key;
     99    struct tgsi_shader_info info;
    100    unsigned unit;
    101 
    102    unsigned inst_start_token;
    103    boolean discard_instruction; /**< throw away current instruction? */
    104 
    105    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
    106    unsigned num_immediates;      /**< Number of immediates emitted */
    107    unsigned common_immediate_pos[8];  /**< literals for common immediates */
    108    unsigned num_common_immediates;
    109    boolean immediates_emitted;
    110 
    111    unsigned num_outputs;      /**< include any extra outputs */
    112                               /**  The first extra output is reserved for
    113                                *   non-adjusted vertex position for
    114                                *   stream output purpose
    115                                */
    116 
    117    /* Temporary Registers */
    118    unsigned num_shader_temps; /**< num of temps used by original shader */
    119    unsigned internal_temp_count;  /**< currently allocated internal temps */
    120    struct {
    121       unsigned start, size;
    122    } temp_arrays[MAX_TEMP_ARRAYS];
    123    unsigned num_temp_arrays;
    124 
    125    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
    126    struct {
    127       unsigned arrayId, index;
    128    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
    129 
    130    /** Number of constants used by original shader for each constant buffer.
    131     * The size should probably always match with that of svga_state.constbufs.
    132     */
    133    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
    134 
    135    /* Samplers */
    136    unsigned num_samplers;
    137    boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
    138    ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
    139    ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
    140 
    141    /* Address regs (really implemented with temps) */
    142    unsigned num_address_regs;
    143    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
    144 
    145    /* Output register usage masks */
    146    ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
    147 
    148    /* To map TGSI system value index to VGPU shader input indexes */
    149    ubyte system_value_indexes[MAX_SYSTEM_VALUES];
    150 
    151    struct {
    152       /* vertex position scale/translation */
    153       unsigned out_index;  /**< the real position output reg */
    154       unsigned tmp_index;  /**< the fake/temp position output reg */
    155       unsigned so_index;   /**< the non-adjusted position output reg */
    156       unsigned prescale_scale_index, prescale_trans_index;
    157       boolean  need_prescale;
    158    } vposition;
    159 
    160    /* For vertex shaders only */
    161    struct {
    162       /* viewport constant */
    163       unsigned viewport_index;
    164 
    165       /* temp index of adjusted vertex attributes */
    166       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
    167    } vs;
    168 
    169    /* For fragment shaders only */
    170    struct {
    171       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
    172       unsigned num_color_outputs;
    173       unsigned color_tmp_index;  /**< fake/temp color output reg */
    174       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
    175 
    176       /* front-face */
    177       unsigned face_input_index; /**< real fragment shader face reg (bool) */
    178       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
    179 
    180       unsigned pstipple_sampler_unit;
    181 
    182       unsigned fragcoord_input_index;  /**< real fragment position input reg */
    183       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
    184 
    185       /** Which texture units are doing shadow comparison in the FS code */
    186       unsigned shadow_compare_units;
    187    } fs;
    188 
    189    /* For geometry shaders only */
    190    struct {
    191       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
    192       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
    193       unsigned input_size;       /**< size of input arrays */
    194       unsigned prim_id_index;    /**< primitive id register index */
    195       unsigned max_out_vertices; /**< maximum number of output vertices */
    196    } gs;
    197 
    198    /* For vertex or geometry shaders */
    199    enum clipping_mode clip_mode;
    200    unsigned clip_dist_out_index; /**< clip distance output register index */
    201    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
    202    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
    203 
    204    /** Index of temporary holding the clipvertex coordinate */
    205    unsigned clip_vertex_out_index; /**< clip vertex output register index */
    206    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
    207 
    208    /* user clip plane constant slot indexes */
    209    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
    210 
    211    unsigned num_output_writes;
    212    boolean constant_color_output;
    213 
    214    boolean uses_flat_interp;
    215 
    216    /* For all shaders: const reg index for RECT coord scaling */
    217    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
    218 
    219    /* For all shaders: const reg index for texture buffer size */
    220    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
    221 
    222    /* VS/GS/FS Linkage info */
    223    struct shader_linkage linkage;
    224 
    225    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
    226 };
    227 
    228 
    229 static boolean
    230 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
    231 
    232 static boolean
    233 emit_vertex(struct svga_shader_emitter_v10 *emit,
    234             const struct tgsi_full_instruction *inst);
    235 
    236 static char err_buf[128];
    237 
    238 static boolean
    239 expand(struct svga_shader_emitter_v10 *emit)
    240 {
    241    char *new_buf;
    242    unsigned newsize = emit->size * 2;
    243 
    244    if (emit->buf != err_buf)
    245       new_buf = REALLOC(emit->buf, emit->size, newsize);
    246    else
    247       new_buf = NULL;
    248 
    249    if (!new_buf) {
    250       emit->ptr = err_buf;
    251       emit->buf = err_buf;
    252       emit->size = sizeof(err_buf);
    253       return FALSE;
    254    }
    255 
    256    emit->size = newsize;
    257    emit->ptr = new_buf + (emit->ptr - emit->buf);
    258    emit->buf = new_buf;
    259    return TRUE;
    260 }
    261 
    262 /**
    263  * Create and initialize a new svga_shader_emitter_v10 object.
    264  */
    265 static struct svga_shader_emitter_v10 *
    266 alloc_emitter(void)
    267 {
    268    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
    269 
    270    if (!emit)
    271       return NULL;
    272 
    273    /* to initialize the output buffer */
    274    emit->size = 512;
    275    if (!expand(emit)) {
    276       FREE(emit);
    277       return NULL;
    278    }
    279    return emit;
    280 }
    281 
    282 /**
    283  * Free an svga_shader_emitter_v10 object.
    284  */
    285 static void
    286 free_emitter(struct svga_shader_emitter_v10 *emit)
    287 {
    288    assert(emit);
    289    FREE(emit->buf);    /* will be NULL if translation succeeded */
    290    FREE(emit);
    291 }
    292 
    293 static inline boolean
    294 reserve(struct svga_shader_emitter_v10 *emit,
    295         unsigned nr_dwords)
    296 {
    297    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
    298       if (!expand(emit))
    299          return FALSE;
    300    }
    301 
    302    return TRUE;
    303 }
    304 
    305 static boolean
    306 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
    307 {
    308    if (!reserve(emit, 1))
    309       return FALSE;
    310 
    311    *(uint32 *)emit->ptr = dword;
    312    emit->ptr += sizeof dword;
    313    return TRUE;
    314 }
    315 
    316 static boolean
    317 emit_dwords(struct svga_shader_emitter_v10 *emit,
    318             const uint32 *dwords,
    319             unsigned nr)
    320 {
    321    if (!reserve(emit, nr))
    322       return FALSE;
    323 
    324    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
    325    emit->ptr += nr * sizeof *dwords;
    326    return TRUE;
    327 }
    328 
    329 /** Return the number of tokens in the emitter's buffer */
    330 static unsigned
    331 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
    332 {
    333    return (emit->ptr - emit->buf) / sizeof(unsigned);
    334 }
    335 
    336 
    337 /**
    338  * Check for register overflow.  If we overflow we'll set an
    339  * error flag.  This function can be called for register declarations
    340  * or use as src/dst instruction operands.
    341  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
    342                 or VGPU10_OPCODE_DCL_x
    343  * \param index  the register index
    344  */
    345 static void
    346 check_register_index(struct svga_shader_emitter_v10 *emit,
    347                      unsigned operandType, unsigned index)
    348 {
    349    bool overflow_before = emit->register_overflow;
    350 
    351    switch (operandType) {
    352    case VGPU10_OPERAND_TYPE_TEMP:
    353    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
    354    case VGPU10_OPCODE_DCL_TEMPS:
    355       if (index >= VGPU10_MAX_TEMPS) {
    356          emit->register_overflow = TRUE;
    357       }
    358       break;
    359    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
    360    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
    361       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
    362          emit->register_overflow = TRUE;
    363       }
    364       break;
    365    case VGPU10_OPERAND_TYPE_INPUT:
    366    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
    367    case VGPU10_OPCODE_DCL_INPUT:
    368    case VGPU10_OPCODE_DCL_INPUT_SGV:
    369    case VGPU10_OPCODE_DCL_INPUT_SIV:
    370    case VGPU10_OPCODE_DCL_INPUT_PS:
    371    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
    372    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
    373       if ((emit->unit == PIPE_SHADER_VERTEX &&
    374            index >= VGPU10_MAX_VS_INPUTS) ||
    375           (emit->unit == PIPE_SHADER_GEOMETRY &&
    376            index >= VGPU10_MAX_GS_INPUTS) ||
    377           (emit->unit == PIPE_SHADER_FRAGMENT &&
    378            index >= VGPU10_MAX_FS_INPUTS)) {
    379          emit->register_overflow = TRUE;
    380       }
    381       break;
    382    case VGPU10_OPERAND_TYPE_OUTPUT:
    383    case VGPU10_OPCODE_DCL_OUTPUT:
    384    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
    385    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
    386       if ((emit->unit == PIPE_SHADER_VERTEX &&
    387            index >= VGPU10_MAX_VS_OUTPUTS) ||
    388           (emit->unit == PIPE_SHADER_GEOMETRY &&
    389            index >= VGPU10_MAX_GS_OUTPUTS) ||
    390           (emit->unit == PIPE_SHADER_FRAGMENT &&
    391            index >= VGPU10_MAX_FS_OUTPUTS)) {
    392          emit->register_overflow = TRUE;
    393       }
    394       break;
    395    case VGPU10_OPERAND_TYPE_SAMPLER:
    396    case VGPU10_OPCODE_DCL_SAMPLER:
    397       if (index >= VGPU10_MAX_SAMPLERS) {
    398          emit->register_overflow = TRUE;
    399       }
    400       break;
    401    case VGPU10_OPERAND_TYPE_RESOURCE:
    402    case VGPU10_OPCODE_DCL_RESOURCE:
    403       if (index >= VGPU10_MAX_RESOURCES) {
    404          emit->register_overflow = TRUE;
    405       }
    406       break;
    407    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
    408       if (index >= MAX_IMMEDIATE_COUNT) {
    409          emit->register_overflow = TRUE;
    410       }
    411       break;
    412    default:
    413       assert(0);
    414       ; /* nothing */
    415    }
    416 
    417    if (emit->register_overflow && !overflow_before) {
    418       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
    419                    operandType, index);
    420    }
    421 }
    422 
    423 
    424 /**
    425  * Examine misc state to determine the clipping mode.
    426  */
    427 static void
    428 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
    429 {
    430    if (emit->info.num_written_clipdistance > 0) {
    431       emit->clip_mode = CLIP_DISTANCE;
    432    }
    433    else if (emit->info.writes_clipvertex) {
    434       emit->clip_mode = CLIP_VERTEX;
    435    }
    436    else if (emit->key.clip_plane_enable) {
    437       emit->clip_mode = CLIP_LEGACY;
    438    }
    439    else {
    440       emit->clip_mode = CLIP_NONE;
    441    }
    442 }
    443 
    444 
    445 /**
    446  * For clip distance register declarations and clip distance register
    447  * writes we need to mask the declaration usage or instruction writemask
    448  * (respectively) against the set of the really-enabled clipping planes.
    449  *
    450  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
    451  * has a VS that writes to all 8 clip distance registers, but the plane enable
    452  * flags are a subset of that.
    453  *
    454  * This function is used to apply the plane enable flags to the register
    455  * declaration or instruction writemask.
    456  *
    457  * \param writemask  the declaration usage mask or instruction writemask
    458  * \param clip_reg_index  which clip plane register is being declared/written.
    459  *                        The legal values are 0 and 1 (two clip planes per
    460  *                        register, for a total of 8 clip planes)
    461  */
    462 static unsigned
    463 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
    464                       unsigned writemask, unsigned clip_reg_index)
    465 {
    466    unsigned shift;
    467 
    468    assert(clip_reg_index < 2);
    469 
    470    /* four clip planes per clip register: */
    471    shift = clip_reg_index * 4;
    472    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
    473 
    474    return writemask;
    475 }
    476 
    477 
    478 /**
    479  * Translate gallium shader type into VGPU10 type.
    480  */
    481 static VGPU10_PROGRAM_TYPE
    482 translate_shader_type(unsigned type)
    483 {
    484    switch (type) {
    485    case PIPE_SHADER_VERTEX:
    486       return VGPU10_VERTEX_SHADER;
    487    case PIPE_SHADER_GEOMETRY:
    488       return VGPU10_GEOMETRY_SHADER;
    489    case PIPE_SHADER_FRAGMENT:
    490       return VGPU10_PIXEL_SHADER;
    491    default:
    492       assert(!"Unexpected shader type");
    493       return VGPU10_VERTEX_SHADER;
    494    }
    495 }
    496 
    497 
    498 /**
    499  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
    500  * Note: we only need to translate the opcodes for "simple" instructions,
    501  * as seen below.  All other opcodes are handled/translated specially.
    502  */
    503 static VGPU10_OPCODE_TYPE
    504 translate_opcode(unsigned opcode)
    505 {
    506    switch (opcode) {
    507    case TGSI_OPCODE_MOV:
    508       return VGPU10_OPCODE_MOV;
    509    case TGSI_OPCODE_MUL:
    510       return VGPU10_OPCODE_MUL;
    511    case TGSI_OPCODE_ADD:
    512       return VGPU10_OPCODE_ADD;
    513    case TGSI_OPCODE_DP3:
    514       return VGPU10_OPCODE_DP3;
    515    case TGSI_OPCODE_DP4:
    516       return VGPU10_OPCODE_DP4;
    517    case TGSI_OPCODE_MIN:
    518       return VGPU10_OPCODE_MIN;
    519    case TGSI_OPCODE_MAX:
    520       return VGPU10_OPCODE_MAX;
    521    case TGSI_OPCODE_MAD:
    522       return VGPU10_OPCODE_MAD;
    523    case TGSI_OPCODE_SQRT:
    524       return VGPU10_OPCODE_SQRT;
    525    case TGSI_OPCODE_FRC:
    526       return VGPU10_OPCODE_FRC;
    527    case TGSI_OPCODE_FLR:
    528       return VGPU10_OPCODE_ROUND_NI;
    529    case TGSI_OPCODE_FSEQ:
    530       return VGPU10_OPCODE_EQ;
    531    case TGSI_OPCODE_FSGE:
    532       return VGPU10_OPCODE_GE;
    533    case TGSI_OPCODE_FSNE:
    534       return VGPU10_OPCODE_NE;
    535    case TGSI_OPCODE_DDX:
    536       return VGPU10_OPCODE_DERIV_RTX;
    537    case TGSI_OPCODE_DDY:
    538       return VGPU10_OPCODE_DERIV_RTY;
    539    case TGSI_OPCODE_RET:
    540       return VGPU10_OPCODE_RET;
    541    case TGSI_OPCODE_DIV:
    542       return VGPU10_OPCODE_DIV;
    543    case TGSI_OPCODE_IDIV:
    544       return VGPU10_OPCODE_IDIV;
    545    case TGSI_OPCODE_DP2:
    546       return VGPU10_OPCODE_DP2;
    547    case TGSI_OPCODE_BRK:
    548       return VGPU10_OPCODE_BREAK;
    549    case TGSI_OPCODE_IF:
    550       return VGPU10_OPCODE_IF;
    551    case TGSI_OPCODE_ELSE:
    552       return VGPU10_OPCODE_ELSE;
    553    case TGSI_OPCODE_ENDIF:
    554       return VGPU10_OPCODE_ENDIF;
    555    case TGSI_OPCODE_CEIL:
    556       return VGPU10_OPCODE_ROUND_PI;
    557    case TGSI_OPCODE_I2F:
    558       return VGPU10_OPCODE_ITOF;
    559    case TGSI_OPCODE_NOT:
    560       return VGPU10_OPCODE_NOT;
    561    case TGSI_OPCODE_TRUNC:
    562       return VGPU10_OPCODE_ROUND_Z;
    563    case TGSI_OPCODE_SHL:
    564       return VGPU10_OPCODE_ISHL;
    565    case TGSI_OPCODE_AND:
    566       return VGPU10_OPCODE_AND;
    567    case TGSI_OPCODE_OR:
    568       return VGPU10_OPCODE_OR;
    569    case TGSI_OPCODE_XOR:
    570       return VGPU10_OPCODE_XOR;
    571    case TGSI_OPCODE_CONT:
    572       return VGPU10_OPCODE_CONTINUE;
    573    case TGSI_OPCODE_EMIT:
    574       return VGPU10_OPCODE_EMIT;
    575    case TGSI_OPCODE_ENDPRIM:
    576       return VGPU10_OPCODE_CUT;
    577    case TGSI_OPCODE_BGNLOOP:
    578       return VGPU10_OPCODE_LOOP;
    579    case TGSI_OPCODE_ENDLOOP:
    580       return VGPU10_OPCODE_ENDLOOP;
    581    case TGSI_OPCODE_ENDSUB:
    582       return VGPU10_OPCODE_RET;
    583    case TGSI_OPCODE_NOP:
    584       return VGPU10_OPCODE_NOP;
    585    case TGSI_OPCODE_END:
    586       return VGPU10_OPCODE_RET;
    587    case TGSI_OPCODE_F2I:
    588       return VGPU10_OPCODE_FTOI;
    589    case TGSI_OPCODE_IMAX:
    590       return VGPU10_OPCODE_IMAX;
    591    case TGSI_OPCODE_IMIN:
    592       return VGPU10_OPCODE_IMIN;
    593    case TGSI_OPCODE_UDIV:
    594    case TGSI_OPCODE_UMOD:
    595    case TGSI_OPCODE_MOD:
    596       return VGPU10_OPCODE_UDIV;
    597    case TGSI_OPCODE_IMUL_HI:
    598       return VGPU10_OPCODE_IMUL;
    599    case TGSI_OPCODE_INEG:
    600       return VGPU10_OPCODE_INEG;
    601    case TGSI_OPCODE_ISHR:
    602       return VGPU10_OPCODE_ISHR;
    603    case TGSI_OPCODE_ISGE:
    604       return VGPU10_OPCODE_IGE;
    605    case TGSI_OPCODE_ISLT:
    606       return VGPU10_OPCODE_ILT;
    607    case TGSI_OPCODE_F2U:
    608       return VGPU10_OPCODE_FTOU;
    609    case TGSI_OPCODE_UADD:
    610       return VGPU10_OPCODE_IADD;
    611    case TGSI_OPCODE_U2F:
    612       return VGPU10_OPCODE_UTOF;
    613    case TGSI_OPCODE_UCMP:
    614       return VGPU10_OPCODE_MOVC;
    615    case TGSI_OPCODE_UMAD:
    616       return VGPU10_OPCODE_UMAD;
    617    case TGSI_OPCODE_UMAX:
    618       return VGPU10_OPCODE_UMAX;
    619    case TGSI_OPCODE_UMIN:
    620       return VGPU10_OPCODE_UMIN;
    621    case TGSI_OPCODE_UMUL:
    622    case TGSI_OPCODE_UMUL_HI:
    623       return VGPU10_OPCODE_UMUL;
    624    case TGSI_OPCODE_USEQ:
    625       return VGPU10_OPCODE_IEQ;
    626    case TGSI_OPCODE_USGE:
    627       return VGPU10_OPCODE_UGE;
    628    case TGSI_OPCODE_USHR:
    629       return VGPU10_OPCODE_USHR;
    630    case TGSI_OPCODE_USLT:
    631       return VGPU10_OPCODE_ULT;
    632    case TGSI_OPCODE_USNE:
    633       return VGPU10_OPCODE_INE;
    634    case TGSI_OPCODE_SWITCH:
    635       return VGPU10_OPCODE_SWITCH;
    636    case TGSI_OPCODE_CASE:
    637       return VGPU10_OPCODE_CASE;
    638    case TGSI_OPCODE_DEFAULT:
    639       return VGPU10_OPCODE_DEFAULT;
    640    case TGSI_OPCODE_ENDSWITCH:
    641       return VGPU10_OPCODE_ENDSWITCH;
    642    case TGSI_OPCODE_FSLT:
    643       return VGPU10_OPCODE_LT;
    644    case TGSI_OPCODE_ROUND:
    645       return VGPU10_OPCODE_ROUND_NE;
    646    default:
    647       assert(!"Unexpected TGSI opcode in translate_opcode()");
    648       return VGPU10_OPCODE_NOP;
    649    }
    650 }
    651 
    652 
    653 /**
    654  * Translate a TGSI register file type into a VGPU10 operand type.
    655  * \param array  is the TGSI_FILE_TEMPORARY register an array?
    656  */
    657 static VGPU10_OPERAND_TYPE
    658 translate_register_file(enum tgsi_file_type file, boolean array)
    659 {
    660    switch (file) {
    661    case TGSI_FILE_CONSTANT:
    662       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
    663    case TGSI_FILE_INPUT:
    664       return VGPU10_OPERAND_TYPE_INPUT;
    665    case TGSI_FILE_OUTPUT:
    666       return VGPU10_OPERAND_TYPE_OUTPUT;
    667    case TGSI_FILE_TEMPORARY:
    668       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
    669                    : VGPU10_OPERAND_TYPE_TEMP;
    670    case TGSI_FILE_IMMEDIATE:
    671       /* all immediates are 32-bit values at this time so
    672        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
    673        */
    674       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
    675    case TGSI_FILE_SAMPLER:
    676       return VGPU10_OPERAND_TYPE_SAMPLER;
    677    case TGSI_FILE_SYSTEM_VALUE:
    678       return VGPU10_OPERAND_TYPE_INPUT;
    679 
    680    /* XXX TODO more cases to finish */
    681 
    682    default:
    683       assert(!"Bad tgsi register file!");
    684       return VGPU10_OPERAND_TYPE_NULL;
    685    }
    686 }
    687 
    688 
    689 /**
    690  * Emit a null dst register
    691  */
    692 static void
    693 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
    694 {
    695    VGPU10OperandToken0 operand;
    696 
    697    operand.value = 0;
    698    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
    699    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
    700 
    701    emit_dword(emit, operand.value);
    702 }
    703 
    704 
    705 /**
    706  * If the given register is a temporary, return the array ID.
    707  * Else return zero.
    708  */
    709 static unsigned
    710 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
    711                   enum tgsi_file_type file, unsigned index)
    712 {
    713    if (file == TGSI_FILE_TEMPORARY) {
    714       return emit->temp_map[index].arrayId;
    715    }
    716    else {
    717       return 0;
    718    }
    719 }
    720 
    721 
    722 /**
    723  * If the given register is a temporary, convert the index from a TGSI
    724  * TEMPORARY index to a VGPU10 temp index.
    725  */
    726 static unsigned
    727 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
    728                  enum tgsi_file_type file, unsigned index)
    729 {
    730    if (file == TGSI_FILE_TEMPORARY) {
    731       return emit->temp_map[index].index;
    732    }
    733    else {
    734       return index;
    735    }
    736 }
    737 
    738 
    739 /**
    740  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
    741  * Note: the operandType field must already be initialized.
    742  */
    743 static VGPU10OperandToken0
    744 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
    745                         VGPU10OperandToken0 operand0,
    746                         enum tgsi_file_type file,
    747                         boolean indirect, boolean index2D,
    748                         unsigned tempArrayID)
    749 {
    750    unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    751 
    752    /*
    753     * Compute index dimensions
    754     */
    755    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
    756        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
    757       /* there's no swizzle for in-line immediates */
    758       indexDim = VGPU10_OPERAND_INDEX_0D;
    759       assert(operand0.selectionMode == 0);
    760    }
    761    else {
    762       if (index2D ||
    763           tempArrayID > 0 ||
    764           operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
    765          indexDim = VGPU10_OPERAND_INDEX_2D;
    766       }
    767       else {
    768          indexDim = VGPU10_OPERAND_INDEX_1D;
    769       }
    770    }
    771 
    772    /*
    773     * Compute index representations (immediate, relative, etc).
    774     */
    775    if (tempArrayID > 0) {
    776       assert(file == TGSI_FILE_TEMPORARY);
    777       /* First index is the array ID, second index is the array element */
    778       index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    779       if (indirect) {
    780          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
    781       }
    782       else {
    783          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    784       }
    785    }
    786    else if (indirect) {
    787       if (file == TGSI_FILE_CONSTANT) {
    788          /* index[0] indicates which constant buffer while index[1] indicates
    789           * the position in the constant buffer.
    790           */
    791          index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    792          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
    793       }
    794       else {
    795          /* All other register files are 1-dimensional */
    796          index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
    797       }
    798    }
    799    else {
    800       index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    801       index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    802    }
    803 
    804    operand0.indexDimension = indexDim;
    805    operand0.index0Representation = index0Rep;
    806    operand0.index1Representation = index1Rep;
    807 
    808    return operand0;
    809 }
    810 
    811 
    812 /**
    813  * Emit the operand for expressing an address register for indirect indexing.
    814  * Note that the address register is really just a temp register.
    815  * \param addr_reg_index  which address register to use
    816  */
    817 static void
    818 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
    819                        unsigned addr_reg_index)
    820 {
    821    unsigned tmp_reg_index;
    822    VGPU10OperandToken0 operand0;
    823 
    824    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
    825 
    826    tmp_reg_index = emit->address_reg_index[addr_reg_index];
    827 
    828    /* operand0 is a simple temporary register, selecting one component */
    829    operand0.value = 0;
    830    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
    831    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
    832    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
    833    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    834    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
    835    operand0.swizzleX = 0;
    836    operand0.swizzleY = 1;
    837    operand0.swizzleZ = 2;
    838    operand0.swizzleW = 3;
    839 
    840    emit_dword(emit, operand0.value);
    841    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
    842 }
    843 
    844 
    845 /**
    846  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
    847  * \param emit  the emitter context
    848  * \param reg  the TGSI dst register to translate
    849  */
    850 static void
    851 emit_dst_register(struct svga_shader_emitter_v10 *emit,
    852                   const struct tgsi_full_dst_register *reg)
    853 {
    854    enum tgsi_file_type file = reg->Register.File;
    855    unsigned index = reg->Register.Index;
    856    const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
    857    const unsigned sem_index = emit->info.output_semantic_index[index];
    858    unsigned writemask = reg->Register.WriteMask;
    859    const unsigned indirect = reg->Register.Indirect;
    860    const unsigned tempArrayId = get_temp_array_id(emit, file, index);
    861    const unsigned index2d = reg->Register.Dimension;
    862    VGPU10OperandToken0 operand0;
    863 
    864    if (file == TGSI_FILE_OUTPUT) {
    865       if (emit->unit == PIPE_SHADER_VERTEX ||
    866           emit->unit == PIPE_SHADER_GEOMETRY) {
    867          if (index == emit->vposition.out_index &&
    868              emit->vposition.tmp_index != INVALID_INDEX) {
    869             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
    870              * vertex position result in a temporary so that we can modify
    871              * it in the post_helper() code.
    872              */
    873             file = TGSI_FILE_TEMPORARY;
    874             index = emit->vposition.tmp_index;
    875          }
    876          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
    877                   emit->clip_dist_tmp_index != INVALID_INDEX) {
    878             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
    879              * We store the clip distance in a temporary first, then
    880              * we'll copy it to the shadow copy and to CLIPDIST with the
    881              * enabled planes mask in emit_clip_distance_instructions().
    882              */
    883             file = TGSI_FILE_TEMPORARY;
    884             index = emit->clip_dist_tmp_index + sem_index;
    885          }
    886          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
    887                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
    888             /* replace the CLIPVERTEX output register with a temporary */
    889             assert(emit->clip_mode == CLIP_VERTEX);
    890             assert(sem_index == 0);
    891             file = TGSI_FILE_TEMPORARY;
    892             index = emit->clip_vertex_tmp_index;
    893          }
    894       }
    895       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
    896          if (sem_name == TGSI_SEMANTIC_POSITION) {
    897             /* Fragment depth output register */
    898             operand0.value = 0;
    899             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
    900             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
    901             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
    902             emit_dword(emit, operand0.value);
    903             return;
    904          }
    905          else if (index == emit->fs.color_out_index[0] &&
    906              emit->fs.color_tmp_index != INVALID_INDEX) {
    907             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
    908              * fragment color result in a temporary so that we can read it
    909              * it in the post_helper() code.
    910              */
    911             file = TGSI_FILE_TEMPORARY;
    912             index = emit->fs.color_tmp_index;
    913          }
    914          else {
    915             /* Typically, for fragment shaders, the output register index
    916              * matches the color semantic index.  But not when we write to
    917              * the fragment depth register.  In that case, OUT[0] will be
    918              * fragdepth and OUT[1] will be the 0th color output.  We need
    919              * to use the semantic index for color outputs.
    920              */
    921             assert(sem_name == TGSI_SEMANTIC_COLOR);
    922             index = emit->info.output_semantic_index[index];
    923 
    924             emit->num_output_writes++;
    925          }
    926       }
    927    }
    928 
    929    /* init operand tokens to all zero */
    930    operand0.value = 0;
    931 
    932    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
    933 
    934    /* the operand has a writemask */
    935    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
    936 
    937    /* Which of the four dest components to write to. Note that we can use a
    938     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
    939     */
    940    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
    941    operand0.mask = writemask;
    942 
    943    /* translate TGSI register file type to VGPU10 operand type */
    944    operand0.operandType = translate_register_file(file, tempArrayId > 0);
    945 
    946    check_register_index(emit, operand0.operandType, index);
    947 
    948    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
    949                                       index2d, tempArrayId);
    950 
    951    /* Emit tokens */
    952    emit_dword(emit, operand0.value);
    953    if (tempArrayId > 0) {
    954       emit_dword(emit, tempArrayId);
    955    }
    956 
    957    emit_dword(emit, remap_temp_index(emit, file, index));
    958 
    959    if (indirect) {
    960       emit_indirect_register(emit, reg->Indirect.Index);
    961    }
    962 }
    963 
    964 
    965 /**
    966  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
    967  */
    968 static void
    969 emit_src_register(struct svga_shader_emitter_v10 *emit,
    970                   const struct tgsi_full_src_register *reg)
    971 {
    972    enum tgsi_file_type file = reg->Register.File;
    973    unsigned index = reg->Register.Index;
    974    const unsigned indirect = reg->Register.Indirect;
    975    const unsigned tempArrayId = get_temp_array_id(emit, file, index);
    976    const unsigned index2d = reg->Register.Dimension;
    977    const unsigned swizzleX = reg->Register.SwizzleX;
    978    const unsigned swizzleY = reg->Register.SwizzleY;
    979    const unsigned swizzleZ = reg->Register.SwizzleZ;
    980    const unsigned swizzleW = reg->Register.SwizzleW;
    981    const unsigned absolute = reg->Register.Absolute;
    982    const unsigned negate = reg->Register.Negate;
    983    bool is_prim_id = FALSE;
    984 
    985    VGPU10OperandToken0 operand0;
    986    VGPU10OperandToken1 operand1;
    987 
    988    if (emit->unit == PIPE_SHADER_FRAGMENT &&
    989       file == TGSI_FILE_INPUT) {
    990       if (index == emit->fs.face_input_index) {
    991          /* Replace INPUT[FACE] with TEMP[FACE] */
    992          file = TGSI_FILE_TEMPORARY;
    993          index = emit->fs.face_tmp_index;
    994       }
    995       else if (index == emit->fs.fragcoord_input_index) {
    996          /* Replace INPUT[POSITION] with TEMP[POSITION] */
    997          file = TGSI_FILE_TEMPORARY;
    998          index = emit->fs.fragcoord_tmp_index;
    999       }
   1000       else {
   1001          /* We remap fragment shader inputs to that FS input indexes
   1002           * match up with VS/GS output indexes.
   1003           */
   1004          index = emit->linkage.input_map[index];
   1005       }
   1006    }
   1007    else if (emit->unit == PIPE_SHADER_GEOMETRY &&
   1008             file == TGSI_FILE_INPUT) {
   1009       is_prim_id = (index == emit->gs.prim_id_index);
   1010       index = emit->linkage.input_map[index];
   1011    }
   1012    else if (emit->unit == PIPE_SHADER_VERTEX) {
   1013       if (file == TGSI_FILE_INPUT) {
   1014          /* if input is adjusted... */
   1015          if ((emit->key.vs.adjust_attrib_w_1 |
   1016               emit->key.vs.adjust_attrib_itof |
   1017               emit->key.vs.adjust_attrib_utof |
   1018               emit->key.vs.attrib_is_bgra |
   1019               emit->key.vs.attrib_puint_to_snorm |
   1020               emit->key.vs.attrib_puint_to_uscaled |
   1021               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
   1022             file = TGSI_FILE_TEMPORARY;
   1023             index = emit->vs.adjusted_input[index];
   1024          }
   1025       }
   1026       else if (file == TGSI_FILE_SYSTEM_VALUE) {
   1027          assert(index < ARRAY_SIZE(emit->system_value_indexes));
   1028          index = emit->system_value_indexes[index];
   1029       }
   1030    }
   1031 
   1032    operand0.value = operand1.value = 0;
   1033 
   1034    if (is_prim_id) {
   1035       /* NOTE: we should be using VGPU10_OPERAND_1_COMPONENT here, but
   1036        * our virtual GPU accepts this as-is.
   1037        */
   1038       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
   1039       operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
   1040    }
   1041    else {
   1042       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   1043       operand0.operandType = translate_register_file(file, tempArrayId > 0);
   1044    }
   1045 
   1046    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
   1047                                       index2d, tempArrayId);
   1048 
   1049    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
   1050        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
   1051       /* there's no swizzle for in-line immediates */
   1052       if (swizzleX == swizzleY &&
   1053           swizzleX == swizzleZ &&
   1054           swizzleX == swizzleW) {
   1055          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
   1056       }
   1057       else {
   1058          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
   1059       }
   1060 
   1061       operand0.swizzleX = swizzleX;
   1062       operand0.swizzleY = swizzleY;
   1063       operand0.swizzleZ = swizzleZ;
   1064       operand0.swizzleW = swizzleW;
   1065 
   1066       if (absolute || negate) {
   1067          operand0.extended = 1;
   1068          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
   1069          if (absolute && !negate)
   1070             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
   1071          if (!absolute && negate)
   1072             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
   1073          if (absolute && negate)
   1074             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
   1075       }
   1076    }
   1077 
   1078    /* Emit the operand tokens */
   1079    emit_dword(emit, operand0.value);
   1080    if (operand0.extended)
   1081       emit_dword(emit, operand1.value);
   1082 
   1083    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
   1084       /* Emit the four float/int in-line immediate values */
   1085       unsigned *c;
   1086       assert(index < ARRAY_SIZE(emit->immediates));
   1087       assert(file == TGSI_FILE_IMMEDIATE);
   1088       assert(swizzleX < 4);
   1089       assert(swizzleY < 4);
   1090       assert(swizzleZ < 4);
   1091       assert(swizzleW < 4);
   1092       c = (unsigned *) emit->immediates[index];
   1093       emit_dword(emit, c[swizzleX]);
   1094       emit_dword(emit, c[swizzleY]);
   1095       emit_dword(emit, c[swizzleZ]);
   1096       emit_dword(emit, c[swizzleW]);
   1097    }
   1098    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
   1099       /* Emit the register index(es) */
   1100       if (index2d ||
   1101           operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
   1102          emit_dword(emit, reg->Dimension.Index);
   1103       }
   1104 
   1105       if (tempArrayId > 0) {
   1106          emit_dword(emit, tempArrayId);
   1107       }
   1108 
   1109       emit_dword(emit, remap_temp_index(emit, file, index));
   1110 
   1111       if (indirect) {
   1112          emit_indirect_register(emit, reg->Indirect.Index);
   1113       }
   1114    }
   1115 }
   1116 
   1117 
   1118 /**
   1119  * Emit a resource operand (for use with a SAMPLE instruction).
   1120  */
   1121 static void
   1122 emit_resource_register(struct svga_shader_emitter_v10 *emit,
   1123                        unsigned resource_number)
   1124 {
   1125    VGPU10OperandToken0 operand0;
   1126 
   1127    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
   1128 
   1129    /* init */
   1130    operand0.value = 0;
   1131 
   1132    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
   1133    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   1134    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   1135    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
   1136    operand0.swizzleX = VGPU10_COMPONENT_X;
   1137    operand0.swizzleY = VGPU10_COMPONENT_Y;
   1138    operand0.swizzleZ = VGPU10_COMPONENT_Z;
   1139    operand0.swizzleW = VGPU10_COMPONENT_W;
   1140 
   1141    emit_dword(emit, operand0.value);
   1142    emit_dword(emit, resource_number);
   1143 }
   1144 
   1145 
   1146 /**
   1147  * Emit a sampler operand (for use with a SAMPLE instruction).
   1148  */
   1149 static void
   1150 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
   1151                       unsigned sampler_number)
   1152 {
   1153    VGPU10OperandToken0 operand0;
   1154 
   1155    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
   1156 
   1157    /* init */
   1158    operand0.value = 0;
   1159 
   1160    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
   1161    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   1162 
   1163    emit_dword(emit, operand0.value);
   1164    emit_dword(emit, sampler_number);
   1165 }
   1166 
   1167 
   1168 /**
   1169  * Emit an operand which reads the IS_FRONT_FACING register.
   1170  */
   1171 static void
   1172 emit_face_register(struct svga_shader_emitter_v10 *emit)
   1173 {
   1174    VGPU10OperandToken0 operand0;
   1175    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
   1176 
   1177    /* init */
   1178    operand0.value = 0;
   1179 
   1180    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
   1181    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   1182    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
   1183    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   1184 
   1185    operand0.swizzleX = VGPU10_COMPONENT_X;
   1186    operand0.swizzleY = VGPU10_COMPONENT_X;
   1187    operand0.swizzleZ = VGPU10_COMPONENT_X;
   1188    operand0.swizzleW = VGPU10_COMPONENT_X;
   1189 
   1190    emit_dword(emit, operand0.value);
   1191    emit_dword(emit, index);
   1192 }
   1193 
   1194 
   1195 /**
   1196  * Emit the token for a VGPU10 opcode.
   1197  * \param saturate   clamp result to [0,1]?
   1198  */
   1199 static void
   1200 emit_opcode(struct svga_shader_emitter_v10 *emit,
   1201             unsigned vgpu10_opcode, boolean saturate)
   1202 {
   1203    VGPU10OpcodeToken0 token0;
   1204 
   1205    token0.value = 0;  /* init all fields to zero */
   1206    token0.opcodeType = vgpu10_opcode;
   1207    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
   1208    token0.saturate = saturate;
   1209 
   1210    emit_dword(emit, token0.value);
   1211 }
   1212 
   1213 
   1214 /**
   1215  * Emit the token for a VGPU10 resinfo instruction.
   1216  * \param modifier   return type modifier, _uint or _rcpFloat.
   1217  *                   TODO: We may want to remove this parameter if it will
   1218  *                   only ever be used as _uint.
   1219  */
   1220 static void
   1221 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
   1222                     VGPU10_RESINFO_RETURN_TYPE modifier)
   1223 {
   1224    VGPU10OpcodeToken0 token0;
   1225 
   1226    token0.value = 0;  /* init all fields to zero */
   1227    token0.opcodeType = VGPU10_OPCODE_RESINFO;
   1228    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
   1229    token0.resinfoReturnType = modifier;
   1230 
   1231    emit_dword(emit, token0.value);
   1232 }
   1233 
   1234 
   1235 /**
   1236  * Emit opcode tokens for a texture sample instruction.  Texture instructions
   1237  * can be rather complicated (texel offsets, etc) so we have this specialized
   1238  * function.
   1239  */
   1240 static void
   1241 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
   1242                    unsigned vgpu10_opcode, boolean saturate,
   1243                    const int offsets[3])
   1244 {
   1245    VGPU10OpcodeToken0 token0;
   1246    VGPU10OpcodeToken1 token1;
   1247 
   1248    token0.value = 0;  /* init all fields to zero */
   1249    token0.opcodeType = vgpu10_opcode;
   1250    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
   1251    token0.saturate = saturate;
   1252 
   1253    if (offsets[0] || offsets[1] || offsets[2]) {
   1254       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
   1255       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
   1256       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
   1257       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
   1258       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
   1259       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
   1260 
   1261       token0.extended = 1;
   1262       token1.value = 0;
   1263       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
   1264       token1.offsetU = offsets[0];
   1265       token1.offsetV = offsets[1];
   1266       token1.offsetW = offsets[2];
   1267    }
   1268 
   1269    emit_dword(emit, token0.value);
   1270    if (token0.extended) {
   1271       emit_dword(emit, token1.value);
   1272    }
   1273 }
   1274 
   1275 
   1276 /**
   1277  * Emit a DISCARD opcode token.
   1278  * If nonzero is set, we'll discard the fragment if the X component is not 0.
   1279  * Otherwise, we'll discard the fragment if the X component is 0.
   1280  */
   1281 static void
   1282 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
   1283 {
   1284    VGPU10OpcodeToken0 opcode0;
   1285 
   1286    opcode0.value = 0;
   1287    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
   1288    if (nonzero)
   1289       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
   1290 
   1291    emit_dword(emit, opcode0.value);
   1292 }
   1293 
   1294 
   1295 /**
   1296  * We need to call this before we begin emitting a VGPU10 instruction.
   1297  */
   1298 static void
   1299 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
   1300 {
   1301    assert(emit->inst_start_token == 0);
   1302    /* Save location of the instruction's VGPU10OpcodeToken0 token.
   1303     * Note, we can't save a pointer because it would become invalid if
   1304     * we have to realloc the output buffer.
   1305     */
   1306    emit->inst_start_token = emit_get_num_tokens(emit);
   1307 }
   1308 
   1309 
   1310 /**
   1311  * We need to call this after we emit the last token of a VGPU10 instruction.
   1312  * This function patches in the opcode token's instructionLength field.
   1313  */
   1314 static void
   1315 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
   1316 {
   1317    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
   1318    unsigned inst_length;
   1319 
   1320    assert(emit->inst_start_token > 0);
   1321 
   1322    if (emit->discard_instruction) {
   1323       /* Back up the emit->ptr to where this instruction started so
   1324        * that we discard the current instruction.
   1325        */
   1326       emit->ptr = (char *) (tokens + emit->inst_start_token);
   1327    }
   1328    else {
   1329       /* Compute instruction length and patch that into the start of
   1330        * the instruction.
   1331        */
   1332       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
   1333 
   1334       assert(inst_length > 0);
   1335 
   1336       tokens[emit->inst_start_token].instructionLength = inst_length;
   1337    }
   1338 
   1339    emit->inst_start_token = 0; /* reset to zero for error checking */
   1340    emit->discard_instruction = FALSE;
   1341 }
   1342 
   1343 
   1344 /**
   1345  * Return index for a free temporary register.
   1346  */
   1347 static unsigned
   1348 get_temp_index(struct svga_shader_emitter_v10 *emit)
   1349 {
   1350    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
   1351    return emit->num_shader_temps + emit->internal_temp_count++;
   1352 }
   1353 
   1354 
   1355 /**
   1356  * Release the temporaries which were generated by get_temp_index().
   1357  */
   1358 static void
   1359 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
   1360 {
   1361    emit->internal_temp_count = 0;
   1362 }
   1363 
   1364 
   1365 /**
   1366  * Create a tgsi_full_src_register.
   1367  */
   1368 static struct tgsi_full_src_register
   1369 make_src_reg(enum tgsi_file_type file, unsigned index)
   1370 {
   1371    struct tgsi_full_src_register reg;
   1372 
   1373    memset(&reg, 0, sizeof(reg));
   1374    reg.Register.File = file;
   1375    reg.Register.Index = index;
   1376    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
   1377    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
   1378    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1379    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
   1380    return reg;
   1381 }
   1382 
   1383 
   1384 /**
   1385  * Create a tgsi_full_src_register for a temporary.
   1386  */
   1387 static struct tgsi_full_src_register
   1388 make_src_temp_reg(unsigned index)
   1389 {
   1390    return make_src_reg(TGSI_FILE_TEMPORARY, index);
   1391 }
   1392 
   1393 
   1394 /**
   1395  * Create a tgsi_full_src_register for a constant.
   1396  */
   1397 static struct tgsi_full_src_register
   1398 make_src_const_reg(unsigned index)
   1399 {
   1400    return make_src_reg(TGSI_FILE_CONSTANT, index);
   1401 }
   1402 
   1403 
   1404 /**
   1405  * Create a tgsi_full_src_register for an immediate constant.
   1406  */
   1407 static struct tgsi_full_src_register
   1408 make_src_immediate_reg(unsigned index)
   1409 {
   1410    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
   1411 }
   1412 
   1413 
   1414 /**
   1415  * Create a tgsi_full_dst_register.
   1416  */
   1417 static struct tgsi_full_dst_register
   1418 make_dst_reg(enum tgsi_file_type file, unsigned index)
   1419 {
   1420    struct tgsi_full_dst_register reg;
   1421 
   1422    memset(&reg, 0, sizeof(reg));
   1423    reg.Register.File = file;
   1424    reg.Register.Index = index;
   1425    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
   1426    return reg;
   1427 }
   1428 
   1429 
   1430 /**
   1431  * Create a tgsi_full_dst_register for a temporary.
   1432  */
   1433 static struct tgsi_full_dst_register
   1434 make_dst_temp_reg(unsigned index)
   1435 {
   1436    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
   1437 }
   1438 
   1439 
   1440 /**
   1441  * Create a tgsi_full_dst_register for an output.
   1442  */
   1443 static struct tgsi_full_dst_register
   1444 make_dst_output_reg(unsigned index)
   1445 {
   1446    return make_dst_reg(TGSI_FILE_OUTPUT, index);
   1447 }
   1448 
   1449 
   1450 /**
   1451  * Create negated tgsi_full_src_register.
   1452  */
   1453 static struct tgsi_full_src_register
   1454 negate_src(const struct tgsi_full_src_register *reg)
   1455 {
   1456    struct tgsi_full_src_register neg = *reg;
   1457    neg.Register.Negate = !reg->Register.Negate;
   1458    return neg;
   1459 }
   1460 
   1461 /**
   1462  * Create absolute value of a tgsi_full_src_register.
   1463  */
   1464 static struct tgsi_full_src_register
   1465 absolute_src(const struct tgsi_full_src_register *reg)
   1466 {
   1467    struct tgsi_full_src_register absolute = *reg;
   1468    absolute.Register.Absolute = 1;
   1469    return absolute;
   1470 }
   1471 
   1472 
   1473 /** Return the named swizzle term from the src register */
   1474 static inline unsigned
   1475 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
   1476 {
   1477    switch (term) {
   1478    case TGSI_SWIZZLE_X:
   1479       return reg->Register.SwizzleX;
   1480    case TGSI_SWIZZLE_Y:
   1481       return reg->Register.SwizzleY;
   1482    case TGSI_SWIZZLE_Z:
   1483       return reg->Register.SwizzleZ;
   1484    case TGSI_SWIZZLE_W:
   1485       return reg->Register.SwizzleW;
   1486    default:
   1487       assert(!"Bad swizzle");
   1488       return TGSI_SWIZZLE_X;
   1489    }
   1490 }
   1491 
   1492 
   1493 /**
   1494  * Create swizzled tgsi_full_src_register.
   1495  */
   1496 static struct tgsi_full_src_register
   1497 swizzle_src(const struct tgsi_full_src_register *reg,
   1498             enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
   1499             enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
   1500 {
   1501    struct tgsi_full_src_register swizzled = *reg;
   1502    /* Note: we swizzle the current swizzle */
   1503    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
   1504    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
   1505    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
   1506    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
   1507    return swizzled;
   1508 }
   1509 
   1510 
   1511 /**
   1512  * Create swizzled tgsi_full_src_register where all the swizzle
   1513  * terms are the same.
   1514  */
   1515 static struct tgsi_full_src_register
   1516 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
   1517 {
   1518    struct tgsi_full_src_register swizzled = *reg;
   1519    /* Note: we swizzle the current swizzle */
   1520    swizzled.Register.SwizzleX =
   1521    swizzled.Register.SwizzleY =
   1522    swizzled.Register.SwizzleZ =
   1523    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
   1524    return swizzled;
   1525 }
   1526 
   1527 
   1528 /**
   1529  * Create new tgsi_full_dst_register with writemask.
   1530  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
   1531  */
   1532 static struct tgsi_full_dst_register
   1533 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
   1534 {
   1535    struct tgsi_full_dst_register masked = *reg;
   1536    masked.Register.WriteMask = mask;
   1537    return masked;
   1538 }
   1539 
   1540 
   1541 /**
   1542  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
   1543  */
   1544 static boolean
   1545 same_swizzle_terms(const struct tgsi_full_src_register *reg)
   1546 {
   1547    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
   1548            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
   1549            reg->Register.SwizzleZ == reg->Register.SwizzleW);
   1550 }
   1551 
   1552 
   1553 /**
   1554  * Search the vector for the value 'x' and return its position.
   1555  */
   1556 static int
   1557 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
   1558                  union tgsi_immediate_data x)
   1559 {
   1560    unsigned i;
   1561    for (i = 0; i < 4; i++) {
   1562       if (vec[i].Int == x.Int)
   1563          return i;
   1564    }
   1565    return -1;
   1566 }
   1567 
   1568 
   1569 /**
   1570  * Helper used by make_immediate_reg(), make_immediate_reg_4().
   1571  */
   1572 static int
   1573 find_immediate(struct svga_shader_emitter_v10 *emit,
   1574                union tgsi_immediate_data x, unsigned startIndex)
   1575 {
   1576    const unsigned endIndex = emit->num_immediates;
   1577    unsigned i;
   1578 
   1579    assert(emit->immediates_emitted);
   1580 
   1581    /* Search immediates for x, y, z, w */
   1582    for (i = startIndex; i < endIndex; i++) {
   1583       if (x.Int == emit->immediates[i][0].Int ||
   1584           x.Int == emit->immediates[i][1].Int ||
   1585           x.Int == emit->immediates[i][2].Int ||
   1586           x.Int == emit->immediates[i][3].Int) {
   1587          return i;
   1588       }
   1589    }
   1590    /* Should never try to use an immediate value that wasn't pre-declared */
   1591    assert(!"find_immediate() failed!");
   1592    return -1;
   1593 }
   1594 
   1595 
   1596 /**
   1597  * Return a tgsi_full_src_register for an immediate/literal
   1598  * union tgsi_immediate_data[4] value.
   1599  * Note: the values must have been previously declared/allocated in
   1600  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
   1601  * vec4 immediate.
   1602  */
   1603 static struct tgsi_full_src_register
   1604 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
   1605                      const union tgsi_immediate_data imm[4])
   1606 {
   1607    struct tgsi_full_src_register reg;
   1608    unsigned i;
   1609 
   1610    for (i = 0; i < emit->num_common_immediates; i++) {
   1611       /* search for first component value */
   1612       int immpos = find_immediate(emit, imm[0], i);
   1613       int x, y, z, w;
   1614 
   1615       assert(immpos >= 0);
   1616 
   1617       /* find remaining components within the immediate vector */
   1618       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
   1619       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
   1620       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
   1621       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
   1622 
   1623       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
   1624          /* found them all */
   1625          memset(&reg, 0, sizeof(reg));
   1626          reg.Register.File = TGSI_FILE_IMMEDIATE;
   1627          reg.Register.Index = immpos;
   1628          reg.Register.SwizzleX = x;
   1629          reg.Register.SwizzleY = y;
   1630          reg.Register.SwizzleZ = z;
   1631          reg.Register.SwizzleW = w;
   1632          return reg;
   1633       }
   1634       /* else, keep searching */
   1635    }
   1636 
   1637    assert(!"Failed to find immediate register!");
   1638 
   1639    /* Just return IMM[0].xxxx */
   1640    memset(&reg, 0, sizeof(reg));
   1641    reg.Register.File = TGSI_FILE_IMMEDIATE;
   1642    return reg;
   1643 }
   1644 
   1645 
   1646 /**
   1647  * Return a tgsi_full_src_register for an immediate/literal
   1648  * union tgsi_immediate_data value of the form {value, value, value, value}.
   1649  * \sa make_immediate_reg_4() regarding allowed values.
   1650  */
   1651 static struct tgsi_full_src_register
   1652 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
   1653                    union tgsi_immediate_data value)
   1654 {
   1655    struct tgsi_full_src_register reg;
   1656    int immpos = find_immediate(emit, value, 0);
   1657 
   1658    assert(immpos >= 0);
   1659 
   1660    memset(&reg, 0, sizeof(reg));
   1661    reg.Register.File = TGSI_FILE_IMMEDIATE;
   1662    reg.Register.Index = immpos;
   1663    reg.Register.SwizzleX =
   1664    reg.Register.SwizzleY =
   1665    reg.Register.SwizzleZ =
   1666    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
   1667 
   1668    return reg;
   1669 }
   1670 
   1671 
   1672 /**
   1673  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
   1674  * \sa make_immediate_reg_4() regarding allowed values.
   1675  */
   1676 static struct tgsi_full_src_register
   1677 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
   1678                           float x, float y, float z, float w)
   1679 {
   1680    union tgsi_immediate_data imm[4];
   1681    imm[0].Float = x;
   1682    imm[1].Float = y;
   1683    imm[2].Float = z;
   1684    imm[3].Float = w;
   1685    return make_immediate_reg_4(emit, imm);
   1686 }
   1687 
   1688 
   1689 /**
   1690  * Return a tgsi_full_src_register for an immediate/literal float value
   1691  * of the form {value, value, value, value}.
   1692  * \sa make_immediate_reg_4() regarding allowed values.
   1693  */
   1694 static struct tgsi_full_src_register
   1695 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
   1696 {
   1697    union tgsi_immediate_data imm;
   1698    imm.Float = value;
   1699    return make_immediate_reg(emit, imm);
   1700 }
   1701 
   1702 
   1703 /**
   1704  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
   1705  */
   1706 static struct tgsi_full_src_register
   1707 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
   1708                         int x, int y, int z, int w)
   1709 {
   1710    union tgsi_immediate_data imm[4];
   1711    imm[0].Int = x;
   1712    imm[1].Int = y;
   1713    imm[2].Int = z;
   1714    imm[3].Int = w;
   1715    return make_immediate_reg_4(emit, imm);
   1716 }
   1717 
   1718 
   1719 /**
   1720  * Return a tgsi_full_src_register for an immediate/literal int value
   1721  * of the form {value, value, value, value}.
   1722  * \sa make_immediate_reg_4() regarding allowed values.
   1723  */
   1724 static struct tgsi_full_src_register
   1725 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
   1726 {
   1727    union tgsi_immediate_data imm;
   1728    imm.Int = value;
   1729    return make_immediate_reg(emit, imm);
   1730 }
   1731 
   1732 
   1733 /**
   1734  * Allocate space for a union tgsi_immediate_data[4] immediate.
   1735  * \return  the index/position of the immediate.
   1736  */
   1737 static unsigned
   1738 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
   1739                   const union tgsi_immediate_data imm[4])
   1740 {
   1741    unsigned n = emit->num_immediates++;
   1742    assert(!emit->immediates_emitted);
   1743    assert(n < ARRAY_SIZE(emit->immediates));
   1744    emit->immediates[n][0] = imm[0];
   1745    emit->immediates[n][1] = imm[1];
   1746    emit->immediates[n][2] = imm[2];
   1747    emit->immediates[n][3] = imm[3];
   1748    return n;
   1749 }
   1750 
   1751 
   1752 /**
   1753  * Allocate space for a float[4] immediate.
   1754  * \return  the index/position of the immediate.
   1755  */
   1756 static unsigned
   1757 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
   1758                        float x, float y, float z, float w)
   1759 {
   1760    union tgsi_immediate_data imm[4];
   1761    imm[0].Float = x;
   1762    imm[1].Float = y;
   1763    imm[2].Float = z;
   1764    imm[3].Float = w;
   1765    return alloc_immediate_4(emit, imm);
   1766 }
   1767 
   1768 
   1769 /**
   1770  * Allocate space for an int[4] immediate.
   1771  * \return  the index/position of the immediate.
   1772  */
   1773 static unsigned
   1774 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
   1775                        int x, int y, int z, int w)
   1776 {
   1777    union tgsi_immediate_data imm[4];
   1778    imm[0].Int = x;
   1779    imm[1].Int = y;
   1780    imm[2].Int = z;
   1781    imm[3].Int = w;
   1782    return alloc_immediate_4(emit, imm);
   1783 }
   1784 
   1785 
   1786 /**
   1787  * Allocate a shader input to store a system value.
   1788  */
   1789 static unsigned
   1790 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
   1791 {
   1792    const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index;
   1793    assert(index < ARRAY_SIZE(emit->system_value_indexes));
   1794    emit->system_value_indexes[index] = n;
   1795    return n;
   1796 }
   1797 
   1798 
   1799 /**
   1800  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
   1801  */
   1802 static boolean
   1803 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
   1804                       const struct tgsi_full_immediate *imm)
   1805 {
   1806    /* We don't actually emit any code here.  We just save the
   1807     * immediate values and emit them later.
   1808     */
   1809    alloc_immediate_4(emit, imm->u);
   1810    return TRUE;
   1811 }
   1812 
   1813 
   1814 /**
   1815  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
   1816  * containing all the immediate values previously allocated
   1817  * with alloc_immediate_4().
   1818  */
   1819 static boolean
   1820 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
   1821 {
   1822    VGPU10OpcodeToken0 token;
   1823 
   1824    assert(!emit->immediates_emitted);
   1825 
   1826    token.value = 0;
   1827    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
   1828    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
   1829 
   1830    /* Note: no begin/end_emit_instruction() calls */
   1831    emit_dword(emit, token.value);
   1832    emit_dword(emit, 2 + 4 * emit->num_immediates);
   1833    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
   1834 
   1835    emit->immediates_emitted = TRUE;
   1836 
   1837    return TRUE;
   1838 }
   1839 
   1840 
   1841 /**
   1842  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
   1843  * interpolation mode.
   1844  * \return a VGPU10_INTERPOLATION_x value
   1845  */
   1846 static unsigned
   1847 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
   1848                         enum tgsi_interpolate_mode interp,
   1849                         enum tgsi_interpolate_loc interpolate_loc)
   1850 {
   1851    if (interp == TGSI_INTERPOLATE_COLOR) {
   1852       interp = emit->key.fs.flatshade ?
   1853          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
   1854    }
   1855 
   1856    switch (interp) {
   1857    case TGSI_INTERPOLATE_CONSTANT:
   1858       return VGPU10_INTERPOLATION_CONSTANT;
   1859    case TGSI_INTERPOLATE_LINEAR:
   1860       return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
   1861              VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
   1862              VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
   1863    case TGSI_INTERPOLATE_PERSPECTIVE:
   1864       return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
   1865              VGPU10_INTERPOLATION_LINEAR_CENTROID :
   1866              VGPU10_INTERPOLATION_LINEAR;
   1867    default:
   1868       assert(!"Unexpected interpolation mode");
   1869       return VGPU10_INTERPOLATION_CONSTANT;
   1870    }
   1871 }
   1872 
   1873 
   1874 /**
   1875  * Translate a TGSI property to VGPU10.
   1876  * Don't emit any instructions yet, only need to gather the primitive property
   1877  * information.  The output primitive topology might be changed later. The
   1878  * final property instructions will be emitted as part of the pre-helper code.
   1879  */
   1880 static boolean
   1881 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
   1882                      const struct tgsi_full_property *prop)
   1883 {
   1884    static const VGPU10_PRIMITIVE primType[] = {
   1885       VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
   1886       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
   1887       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
   1888       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
   1889       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
   1890       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
   1891       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
   1892       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
   1893       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
   1894       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
   1895       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
   1896       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
   1897       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
   1898       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
   1899    };
   1900 
   1901    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
   1902       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
   1903       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
   1904       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
   1905       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
   1906       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
   1907       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
   1908       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
   1909       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
   1910       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
   1911       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
   1912       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
   1913       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
   1914       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
   1915       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
   1916    };
   1917 
   1918    static const unsigned inputArraySize[] = {
   1919       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
   1920       1,       /* VGPU10_PRIMITIVE_POINT */
   1921       2,       /* VGPU10_PRIMITIVE_LINE */
   1922       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
   1923       0,
   1924       0,
   1925       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
   1926       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
   1927    };
   1928 
   1929    switch (prop->Property.PropertyName) {
   1930    case TGSI_PROPERTY_GS_INPUT_PRIM:
   1931       assert(prop->u[0].Data < ARRAY_SIZE(primType));
   1932       emit->gs.prim_type = primType[prop->u[0].Data];
   1933       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
   1934       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
   1935       break;
   1936 
   1937    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
   1938       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
   1939       emit->gs.prim_topology = primTopology[prop->u[0].Data];
   1940       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
   1941       break;
   1942 
   1943    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
   1944       emit->gs.max_out_vertices = prop->u[0].Data;
   1945       break;
   1946 
   1947    default:
   1948       break;
   1949    }
   1950 
   1951    return TRUE;
   1952 }
   1953 
   1954 
   1955 static void
   1956 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
   1957                           VGPU10OpcodeToken0 opcode0, unsigned nData,
   1958                           unsigned data)
   1959 {
   1960    begin_emit_instruction(emit);
   1961    emit_dword(emit, opcode0.value);
   1962    if (nData)
   1963       emit_dword(emit, data);
   1964    end_emit_instruction(emit);
   1965 }
   1966 
   1967 
   1968 /**
   1969  * Emit property instructions
   1970  */
   1971 static void
   1972 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
   1973 {
   1974    VGPU10OpcodeToken0 opcode0;
   1975 
   1976    assert(emit->unit == PIPE_SHADER_GEOMETRY);
   1977 
   1978    /* emit input primitive type declaration */
   1979    opcode0.value = 0;
   1980    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
   1981    opcode0.primitive = emit->gs.prim_type;
   1982    emit_property_instruction(emit, opcode0, 0, 0);
   1983 
   1984    /* emit output primitive topology declaration */
   1985    opcode0.value = 0;
   1986    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
   1987    opcode0.primitiveTopology = emit->gs.prim_topology;
   1988    emit_property_instruction(emit, opcode0, 0, 0);
   1989 
   1990    /* emit max output vertices */
   1991    opcode0.value = 0;
   1992    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
   1993    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
   1994 }
   1995 
   1996 
   1997 /**
   1998  * Emit a vgpu10 declaration "instruction".
   1999  * \param index  the register index
   2000  * \param size   array size of the operand. In most cases, it is 1,
   2001  *               but for inputs to geometry shader, the array size varies
   2002  *               depending on the primitive type.
   2003  */
   2004 static void
   2005 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
   2006                       VGPU10OpcodeToken0 opcode0,
   2007                       VGPU10OperandToken0 operand0,
   2008                       VGPU10NameToken name_token,
   2009                       unsigned index, unsigned size)
   2010 {
   2011    assert(opcode0.opcodeType);
   2012    assert(operand0.mask);
   2013 
   2014    begin_emit_instruction(emit);
   2015    emit_dword(emit, opcode0.value);
   2016 
   2017    emit_dword(emit, operand0.value);
   2018 
   2019    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
   2020       /* Next token is the index of the register to declare */
   2021       emit_dword(emit, index);
   2022    }
   2023    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
   2024       /* Next token is the size of the register */
   2025       emit_dword(emit, size);
   2026 
   2027       /* Followed by the index of the register */
   2028       emit_dword(emit, index);
   2029    }
   2030 
   2031    if (name_token.value) {
   2032       emit_dword(emit, name_token.value);
   2033    }
   2034 
   2035    end_emit_instruction(emit);
   2036 }
   2037 
   2038 
   2039 /**
   2040  * Emit the declaration for a shader input.
   2041  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
   2042  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
   2043  * \param dim         index dimension
   2044  * \param index       the input register index
   2045  * \param size        array size of the operand. In most cases, it is 1,
   2046  *                    but for inputs to geometry shader, the array size varies
   2047  *                    depending on the primitive type.
   2048  * \param name        one of VGPU10_NAME_x
   2049  * \parma numComp     number of components
   2050  * \param selMode     component selection mode
   2051  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
   2052  * \param interpMode  interpolation mode
   2053  */
   2054 static void
   2055 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
   2056                        unsigned opcodeType, unsigned operandType,
   2057                        unsigned dim, unsigned index, unsigned size,
   2058                        unsigned name, unsigned numComp,
   2059                        unsigned selMode, unsigned usageMask,
   2060                        unsigned interpMode)
   2061 {
   2062    VGPU10OpcodeToken0 opcode0;
   2063    VGPU10OperandToken0 operand0;
   2064    VGPU10NameToken name_token;
   2065 
   2066    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2067    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
   2068           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
   2069           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
   2070           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
   2071    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
   2072           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
   2073    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
   2074    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
   2075    assert(dim <= VGPU10_OPERAND_INDEX_3D);
   2076    assert(name == VGPU10_NAME_UNDEFINED ||
   2077           name == VGPU10_NAME_POSITION ||
   2078           name == VGPU10_NAME_INSTANCE_ID ||
   2079           name == VGPU10_NAME_VERTEX_ID ||
   2080           name == VGPU10_NAME_PRIMITIVE_ID ||
   2081           name == VGPU10_NAME_IS_FRONT_FACE);
   2082    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
   2083           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
   2084           interpMode == VGPU10_INTERPOLATION_LINEAR ||
   2085           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
   2086           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
   2087           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
   2088 
   2089    check_register_index(emit, opcodeType, index);
   2090 
   2091    opcode0.value = operand0.value = name_token.value = 0;
   2092 
   2093    opcode0.opcodeType = opcodeType;
   2094    opcode0.interpolationMode = interpMode;
   2095 
   2096    operand0.operandType = operandType;
   2097    operand0.numComponents = numComp;
   2098    operand0.selectionMode = selMode;
   2099    operand0.mask = usageMask;
   2100    operand0.indexDimension = dim;
   2101    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2102    if (dim == VGPU10_OPERAND_INDEX_2D)
   2103       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2104 
   2105    name_token.name = name;
   2106 
   2107    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
   2108 }
   2109 
   2110 
   2111 /**
   2112  * Emit the declaration for a shader output.
   2113  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
   2114  * \param index  the output register index
   2115  * \param name  one of VGPU10_NAME_x
   2116  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
   2117  */
   2118 static void
   2119 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
   2120                         unsigned type, unsigned index,
   2121                         unsigned name, unsigned usageMask)
   2122 {
   2123    VGPU10OpcodeToken0 opcode0;
   2124    VGPU10OperandToken0 operand0;
   2125    VGPU10NameToken name_token;
   2126 
   2127    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2128    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
   2129           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
   2130           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
   2131    assert(name == VGPU10_NAME_UNDEFINED ||
   2132           name == VGPU10_NAME_POSITION ||
   2133           name == VGPU10_NAME_PRIMITIVE_ID ||
   2134           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
   2135           name == VGPU10_NAME_CLIP_DISTANCE);
   2136 
   2137    check_register_index(emit, type, index);
   2138 
   2139    opcode0.value = operand0.value = name_token.value = 0;
   2140 
   2141    opcode0.opcodeType = type;
   2142    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
   2143    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   2144    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
   2145    operand0.mask = usageMask;
   2146    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   2147    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2148 
   2149    name_token.name = name;
   2150 
   2151    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
   2152 }
   2153 
   2154 
   2155 /**
   2156  * Emit the declaration for the fragment depth output.
   2157  */
   2158 static void
   2159 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
   2160 {
   2161    VGPU10OpcodeToken0 opcode0;
   2162    VGPU10OperandToken0 operand0;
   2163    VGPU10NameToken name_token;
   2164 
   2165    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   2166 
   2167    opcode0.value = operand0.value = name_token.value = 0;
   2168 
   2169    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
   2170    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
   2171    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
   2172    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
   2173    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
   2174 
   2175    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
   2176 }
   2177 
   2178 
   2179 /**
   2180  * Emit the declaration for a system value input/output.
   2181  */
   2182 static void
   2183 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
   2184                               enum tgsi_semantic semantic_name, unsigned index)
   2185 {
   2186    switch (semantic_name) {
   2187    case TGSI_SEMANTIC_INSTANCEID:
   2188       index = alloc_system_value_index(emit, index);
   2189       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
   2190                              VGPU10_OPERAND_TYPE_INPUT,
   2191                              VGPU10_OPERAND_INDEX_1D,
   2192                              index, 1,
   2193                              VGPU10_NAME_INSTANCE_ID,
   2194                              VGPU10_OPERAND_4_COMPONENT,
   2195                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
   2196                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
   2197                              VGPU10_INTERPOLATION_UNDEFINED);
   2198       break;
   2199    case TGSI_SEMANTIC_VERTEXID:
   2200       index = alloc_system_value_index(emit, index);
   2201       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
   2202                              VGPU10_OPERAND_TYPE_INPUT,
   2203                              VGPU10_OPERAND_INDEX_1D,
   2204                              index, 1,
   2205                              VGPU10_NAME_VERTEX_ID,
   2206                              VGPU10_OPERAND_4_COMPONENT,
   2207                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
   2208                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
   2209                              VGPU10_INTERPOLATION_UNDEFINED);
   2210       break;
   2211    default:
   2212       ; /* XXX */
   2213    }
   2214 }
   2215 
   2216 /**
   2217  * Translate a TGSI declaration to VGPU10.
   2218  */
   2219 static boolean
   2220 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
   2221                         const struct tgsi_full_declaration *decl)
   2222 {
   2223    switch (decl->Declaration.File) {
   2224    case TGSI_FILE_INPUT:
   2225       /* do nothing - see emit_input_declarations() */
   2226       return TRUE;
   2227 
   2228    case TGSI_FILE_OUTPUT:
   2229       assert(decl->Range.First == decl->Range.Last);
   2230       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
   2231       return TRUE;
   2232 
   2233    case TGSI_FILE_TEMPORARY:
   2234       /* Don't declare the temps here.  Just keep track of how many
   2235        * and emit the declaration later.
   2236        */
   2237       if (decl->Declaration.Array) {
   2238          /* Indexed temporary array.  Save the start index of the array
   2239           * and the size of the array.
   2240           */
   2241          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
   2242          unsigned i;
   2243 
   2244          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
   2245 
   2246          /* Save this array so we can emit the declaration for it later */
   2247          emit->temp_arrays[arrayID].start = decl->Range.First;
   2248          emit->temp_arrays[arrayID].size =
   2249             decl->Range.Last - decl->Range.First + 1;
   2250 
   2251          emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
   2252          assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
   2253          emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
   2254 
   2255          /* Fill in the temp_map entries for this array */
   2256          for (i = decl->Range.First; i <= decl->Range.Last; i++) {
   2257             emit->temp_map[i].arrayId = arrayID;
   2258             emit->temp_map[i].index = i - decl->Range.First;
   2259          }
   2260       }
   2261 
   2262       /* for all temps, indexed or not, keep track of highest index */
   2263       emit->num_shader_temps = MAX2(emit->num_shader_temps,
   2264                                     decl->Range.Last + 1);
   2265       return TRUE;
   2266 
   2267    case TGSI_FILE_CONSTANT:
   2268       /* Don't declare constants here.  Just keep track and emit later. */
   2269       {
   2270          unsigned constbuf = 0, num_consts;
   2271          if (decl->Declaration.Dimension) {
   2272             constbuf = decl->Dim.Index2D;
   2273          }
   2274          /* We throw an assertion here when, in fact, the shader should never
   2275           * have linked due to constbuf index out of bounds, so we shouldn't
   2276           * have reached here.
   2277           */
   2278          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
   2279 
   2280          num_consts = MAX2(emit->num_shader_consts[constbuf],
   2281                            decl->Range.Last + 1);
   2282 
   2283          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
   2284             debug_printf("Warning: constant buffer is declared to size [%u]"
   2285                          " but [%u] is the limit.\n",
   2286                          num_consts,
   2287                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
   2288          }
   2289          /* The linker doesn't enforce the max UBO size so we clamp here */
   2290          emit->num_shader_consts[constbuf] =
   2291             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
   2292       }
   2293       return TRUE;
   2294 
   2295    case TGSI_FILE_IMMEDIATE:
   2296       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
   2297       return FALSE;
   2298 
   2299    case TGSI_FILE_SYSTEM_VALUE:
   2300       emit_system_value_declaration(emit, decl->Semantic.Name,
   2301                                     decl->Range.First);
   2302       return TRUE;
   2303 
   2304    case TGSI_FILE_SAMPLER:
   2305       /* Don't declare samplers here.  Just keep track and emit later. */
   2306       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
   2307       return TRUE;
   2308 
   2309 #if 0
   2310    case TGSI_FILE_RESOURCE:
   2311       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
   2312       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
   2313       assert(!"TGSI_FILE_RESOURCE not handled yet");
   2314       return FALSE;
   2315 #endif
   2316 
   2317    case TGSI_FILE_ADDRESS:
   2318       emit->num_address_regs = MAX2(emit->num_address_regs,
   2319                                     decl->Range.Last + 1);
   2320       return TRUE;
   2321 
   2322    case TGSI_FILE_SAMPLER_VIEW:
   2323       {
   2324          unsigned unit = decl->Range.First;
   2325          assert(decl->Range.First == decl->Range.Last);
   2326          emit->sampler_target[unit] = decl->SamplerView.Resource;
   2327          /* Note: we can ignore YZW return types for now */
   2328          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
   2329          emit->sampler_view[unit] = TRUE;
   2330       }
   2331       return TRUE;
   2332 
   2333    default:
   2334       assert(!"Unexpected type of declaration");
   2335       return FALSE;
   2336    }
   2337 }
   2338 
   2339 
   2340 
   2341 /**
   2342  * Emit all input declarations.
   2343  */
   2344 static boolean
   2345 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
   2346 {
   2347    unsigned i;
   2348 
   2349    if (emit->unit == PIPE_SHADER_FRAGMENT) {
   2350 
   2351       for (i = 0; i < emit->linkage.num_inputs; i++) {
   2352          enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
   2353          unsigned usage_mask = emit->info.input_usage_mask[i];
   2354          unsigned index = emit->linkage.input_map[i];
   2355          unsigned type, interpolationMode, name;
   2356 
   2357          if (usage_mask == 0)
   2358             continue;  /* register is not actually used */
   2359 
   2360          if (semantic_name == TGSI_SEMANTIC_POSITION) {
   2361             /* fragment position input */
   2362             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
   2363             interpolationMode = VGPU10_INTERPOLATION_LINEAR;
   2364             name = VGPU10_NAME_POSITION;
   2365             if (usage_mask & TGSI_WRITEMASK_W) {
   2366                /* we need to replace use of 'w' with '1/w' */
   2367                emit->fs.fragcoord_input_index = i;
   2368             }
   2369          }
   2370          else if (semantic_name == TGSI_SEMANTIC_FACE) {
   2371             /* fragment front-facing input */
   2372             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
   2373             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
   2374             name = VGPU10_NAME_IS_FRONT_FACE;
   2375             emit->fs.face_input_index = i;
   2376          }
   2377          else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
   2378             /* primitive ID */
   2379             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
   2380             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
   2381             name = VGPU10_NAME_PRIMITIVE_ID;
   2382          }
   2383          else {
   2384             /* general fragment input */
   2385             type = VGPU10_OPCODE_DCL_INPUT_PS;
   2386             interpolationMode =
   2387                translate_interpolation(emit,
   2388                                        emit->info.input_interpolate[i],
   2389                                        emit->info.input_interpolate_loc[i]);
   2390 
   2391             /* keeps track if flat interpolation mode is being used */
   2392             emit->uses_flat_interp = emit->uses_flat_interp ||
   2393                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
   2394 
   2395             name = VGPU10_NAME_UNDEFINED;
   2396          }
   2397 
   2398          emit_input_declaration(emit, type,
   2399                                 VGPU10_OPERAND_TYPE_INPUT,
   2400                                 VGPU10_OPERAND_INDEX_1D, index, 1,
   2401                                 name,
   2402                                 VGPU10_OPERAND_4_COMPONENT,
   2403                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
   2404                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
   2405                                 interpolationMode);
   2406       }
   2407    }
   2408    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
   2409 
   2410       for (i = 0; i < emit->info.num_inputs; i++) {
   2411          enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
   2412          unsigned usage_mask = emit->info.input_usage_mask[i];
   2413          unsigned index = emit->linkage.input_map[i];
   2414          unsigned opcodeType, operandType;
   2415          unsigned numComp, selMode;
   2416          unsigned name;
   2417          unsigned dim;
   2418 
   2419          if (usage_mask == 0)
   2420             continue;  /* register is not actually used */
   2421 
   2422          opcodeType = VGPU10_OPCODE_DCL_INPUT;
   2423          operandType = VGPU10_OPERAND_TYPE_INPUT;
   2424          numComp = VGPU10_OPERAND_4_COMPONENT;
   2425          selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
   2426          name = VGPU10_NAME_UNDEFINED;
   2427 
   2428          /* all geometry shader inputs are two dimensional except
   2429           * gl_PrimitiveID
   2430           */
   2431          dim = VGPU10_OPERAND_INDEX_2D;
   2432 
   2433          if (semantic_name == TGSI_SEMANTIC_PRIMID) {
   2434             /* Primitive ID */
   2435             operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
   2436             dim = VGPU10_OPERAND_INDEX_0D;
   2437             numComp = VGPU10_OPERAND_0_COMPONENT;
   2438             selMode = 0;
   2439 
   2440             /* also save the register index so we can check for
   2441              * primitive id when emit src register. We need to modify the
   2442              * operand type, index dimension when emit primitive id src reg.
   2443              */
   2444             emit->gs.prim_id_index = i;
   2445          }
   2446          else if (semantic_name == TGSI_SEMANTIC_POSITION) {
   2447             /* vertex position input */
   2448             opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
   2449             name = VGPU10_NAME_POSITION;
   2450          }
   2451 
   2452          emit_input_declaration(emit, opcodeType, operandType,
   2453                                 dim, index,
   2454                                 emit->gs.input_size,
   2455                                 name,
   2456                                 numComp, selMode,
   2457                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
   2458                                 VGPU10_INTERPOLATION_UNDEFINED);
   2459       }
   2460    }
   2461    else {
   2462       assert(emit->unit == PIPE_SHADER_VERTEX);
   2463 
   2464       for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
   2465          unsigned usage_mask = emit->info.input_usage_mask[i];
   2466          unsigned index = i;
   2467 
   2468          if (usage_mask == 0)
   2469             continue;  /* register is not actually used */
   2470 
   2471          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
   2472                                 VGPU10_OPERAND_TYPE_INPUT,
   2473                                 VGPU10_OPERAND_INDEX_1D, index, 1,
   2474                                 VGPU10_NAME_UNDEFINED,
   2475                                 VGPU10_OPERAND_4_COMPONENT,
   2476                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
   2477                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
   2478                                 VGPU10_INTERPOLATION_UNDEFINED);
   2479       }
   2480    }
   2481 
   2482    return TRUE;
   2483 }
   2484 
   2485 
   2486 /**
   2487  * Emit all output declarations.
   2488  */
   2489 static boolean
   2490 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
   2491 {
   2492    unsigned i;
   2493 
   2494    for (i = 0; i < emit->info.num_outputs; i++) {
   2495       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
   2496       const enum tgsi_semantic semantic_name =
   2497          emit->info.output_semantic_name[i];
   2498       const unsigned semantic_index = emit->info.output_semantic_index[i];
   2499       unsigned index = i;
   2500 
   2501       if (emit->unit == PIPE_SHADER_FRAGMENT) {
   2502          if (semantic_name == TGSI_SEMANTIC_COLOR) {
   2503             assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
   2504 
   2505             emit->fs.color_out_index[semantic_index] = index;
   2506 
   2507             emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
   2508                                               index + 1);
   2509 
   2510             /* The semantic index is the shader's color output/buffer index */
   2511             emit_output_declaration(emit,
   2512                                     VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
   2513                                     VGPU10_NAME_UNDEFINED,
   2514                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2515 
   2516             if (semantic_index == 0) {
   2517                if (emit->key.fs.write_color0_to_n_cbufs > 1) {
   2518                   /* Emit declarations for the additional color outputs
   2519                    * for broadcasting.
   2520                    */
   2521                   unsigned j;
   2522                   for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
   2523                      /* Allocate a new output index */
   2524                      unsigned idx = emit->info.num_outputs + j - 1;
   2525                      emit->fs.color_out_index[j] = idx;
   2526                      emit_output_declaration(emit,
   2527                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
   2528                                         VGPU10_NAME_UNDEFINED,
   2529                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2530                      emit->info.output_semantic_index[idx] = j;
   2531                   }
   2532 
   2533                   emit->fs.num_color_outputs =
   2534                      emit->key.fs.write_color0_to_n_cbufs;
   2535                }
   2536             }
   2537             else {
   2538                assert(!emit->key.fs.write_color0_to_n_cbufs);
   2539             }
   2540          }
   2541          else if (semantic_name == TGSI_SEMANTIC_POSITION) {
   2542             /* Fragment depth output */
   2543             emit_fragdepth_output_declaration(emit);
   2544          }
   2545          else {
   2546             assert(!"Bad output semantic name");
   2547          }
   2548       }
   2549       else {
   2550          /* VS or GS */
   2551          unsigned name, type;
   2552          unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
   2553 
   2554          switch (semantic_name) {
   2555          case TGSI_SEMANTIC_POSITION:
   2556             assert(emit->unit != PIPE_SHADER_FRAGMENT);
   2557             type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
   2558             name = VGPU10_NAME_POSITION;
   2559             /* Save the index of the vertex position output register */
   2560             emit->vposition.out_index = index;
   2561             break;
   2562          case TGSI_SEMANTIC_CLIPDIST:
   2563             type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
   2564             name = VGPU10_NAME_CLIP_DISTANCE;
   2565             /* save the starting index of the clip distance output register */
   2566             if (semantic_index == 0)
   2567                emit->clip_dist_out_index = index;
   2568             writemask = emit->output_usage_mask[index];
   2569             writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
   2570             if (writemask == 0x0) {
   2571                continue; /* discard this do-nothing declaration */
   2572             }
   2573             break;
   2574          case TGSI_SEMANTIC_PRIMID:
   2575             assert(emit->unit == PIPE_SHADER_GEOMETRY);
   2576             type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
   2577             name = VGPU10_NAME_PRIMITIVE_ID;
   2578             break;
   2579          case TGSI_SEMANTIC_LAYER:
   2580             assert(emit->unit == PIPE_SHADER_GEOMETRY);
   2581             type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
   2582             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
   2583             break;
   2584          case TGSI_SEMANTIC_CLIPVERTEX:
   2585             type = VGPU10_OPCODE_DCL_OUTPUT;
   2586             name = VGPU10_NAME_UNDEFINED;
   2587             emit->clip_vertex_out_index = index;
   2588             break;
   2589          default:
   2590             /* generic output */
   2591             type = VGPU10_OPCODE_DCL_OUTPUT;
   2592             name = VGPU10_NAME_UNDEFINED;
   2593          }
   2594 
   2595          emit_output_declaration(emit, type, index, name, writemask);
   2596       }
   2597    }
   2598 
   2599    if (emit->vposition.so_index != INVALID_INDEX &&
   2600        emit->vposition.out_index != INVALID_INDEX) {
   2601 
   2602       assert(emit->unit != PIPE_SHADER_FRAGMENT);
   2603 
   2604       /* Emit the declaration for the non-adjusted vertex position
   2605        * for stream output purpose
   2606        */
   2607       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
   2608                               emit->vposition.so_index,
   2609                               VGPU10_NAME_UNDEFINED,
   2610                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2611    }
   2612 
   2613    if (emit->clip_dist_so_index != INVALID_INDEX &&
   2614        emit->clip_dist_out_index != INVALID_INDEX) {
   2615 
   2616       assert(emit->unit != PIPE_SHADER_FRAGMENT);
   2617 
   2618       /* Emit the declaration for the clip distance shadow copy which
   2619        * will be used for stream output purpose and for clip distance
   2620        * varying variable
   2621        */
   2622       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
   2623                               emit->clip_dist_so_index,
   2624                               VGPU10_NAME_UNDEFINED,
   2625                               emit->output_usage_mask[emit->clip_dist_out_index]);
   2626 
   2627       if (emit->info.num_written_clipdistance > 4) {
   2628          /* for the second clip distance register, each handles 4 planes */
   2629          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
   2630                                  emit->clip_dist_so_index + 1,
   2631                                  VGPU10_NAME_UNDEFINED,
   2632                                  emit->output_usage_mask[emit->clip_dist_out_index+1]);
   2633       }
   2634    }
   2635 
   2636    return TRUE;
   2637 }
   2638 
   2639 
   2640 /**
   2641  * Emit the declaration for the temporary registers.
   2642  */
   2643 static boolean
   2644 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
   2645 {
   2646    unsigned total_temps, reg, i;
   2647 
   2648    total_temps = emit->num_shader_temps;
   2649 
   2650    /* If there is indirect access to non-indexable temps in the shader,
   2651     * convert those temps to indexable temps. This works around a bug
   2652     * in the GLSL->TGSI translator exposed in piglit test
   2653     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
   2654     * Internal temps added by the driver remain as non-indexable temps.
   2655     */
   2656    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
   2657        emit->num_temp_arrays == 0) {
   2658       unsigned arrayID;
   2659 
   2660       arrayID = 1;
   2661       emit->num_temp_arrays = arrayID + 1;
   2662       emit->temp_arrays[arrayID].start = 0;
   2663       emit->temp_arrays[arrayID].size = total_temps;
   2664 
   2665       /* Fill in the temp_map entries for this temp array */
   2666       for (i = 0; i < total_temps; i++) {
   2667          emit->temp_map[i].arrayId = arrayID;
   2668          emit->temp_map[i].index = i;
   2669       }
   2670    }
   2671 
   2672    /* Allocate extra temps for specially-implemented instructions,
   2673     * such as LIT.
   2674     */
   2675    total_temps += MAX_INTERNAL_TEMPS;
   2676 
   2677    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
   2678       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
   2679           emit->key.clip_plane_enable ||
   2680           emit->vposition.so_index != INVALID_INDEX) {
   2681          emit->vposition.tmp_index = total_temps;
   2682          total_temps += 1;
   2683       }
   2684 
   2685       if (emit->unit == PIPE_SHADER_VERTEX) {
   2686          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
   2687                                  emit->key.vs.adjust_attrib_itof |
   2688                                  emit->key.vs.adjust_attrib_utof |
   2689                                  emit->key.vs.attrib_is_bgra |
   2690                                  emit->key.vs.attrib_puint_to_snorm |
   2691                                  emit->key.vs.attrib_puint_to_uscaled |
   2692                                  emit->key.vs.attrib_puint_to_sscaled);
   2693          while (attrib_mask) {
   2694             unsigned index = u_bit_scan(&attrib_mask);
   2695             emit->vs.adjusted_input[index] = total_temps++;
   2696          }
   2697       }
   2698 
   2699       if (emit->clip_mode == CLIP_DISTANCE) {
   2700          /* We need to write the clip distance to a temporary register
   2701           * first. Then it will be copied to the shadow copy for
   2702           * the clip distance varying variable and stream output purpose.
   2703           * It will also be copied to the actual CLIPDIST register
   2704           * according to the enabled clip planes
   2705           */
   2706          emit->clip_dist_tmp_index = total_temps++;
   2707          if (emit->info.num_written_clipdistance > 4)
   2708             total_temps++; /* second clip register */
   2709       }
   2710       else if (emit->clip_mode == CLIP_VERTEX) {
   2711          /* We need to convert the TGSI CLIPVERTEX output to one or more
   2712           * clip distances.  Allocate a temp reg for the clipvertex here.
   2713           */
   2714          assert(emit->info.writes_clipvertex > 0);
   2715          emit->clip_vertex_tmp_index = total_temps;
   2716          total_temps++;
   2717       }
   2718    }
   2719    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
   2720       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
   2721           emit->key.fs.write_color0_to_n_cbufs > 1) {
   2722          /* Allocate a temp to hold the output color */
   2723          emit->fs.color_tmp_index = total_temps;
   2724          total_temps += 1;
   2725       }
   2726 
   2727       if (emit->fs.face_input_index != INVALID_INDEX) {
   2728          /* Allocate a temp for the +/-1 face register */
   2729          emit->fs.face_tmp_index = total_temps;
   2730          total_temps += 1;
   2731       }
   2732 
   2733       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
   2734          /* Allocate a temp for modified fragment position register */
   2735          emit->fs.fragcoord_tmp_index = total_temps;
   2736          total_temps += 1;
   2737       }
   2738    }
   2739 
   2740    for (i = 0; i < emit->num_address_regs; i++) {
   2741       emit->address_reg_index[i] = total_temps++;
   2742    }
   2743 
   2744    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
   2745     * temp indexes.  Basically, we compact all the non-array temp register
   2746     * indexes into a consecutive series.
   2747     *
   2748     * Before, we may have some TGSI declarations like:
   2749     *   DCL TEMP[0..1], LOCAL
   2750     *   DCL TEMP[2..4], ARRAY(1), LOCAL
   2751     *   DCL TEMP[5..7], ARRAY(2), LOCAL
   2752     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
   2753     *
   2754     * After, we'll have a map like this:
   2755     *   temp_map[0] = { array 0, index 0 }
   2756     *   temp_map[1] = { array 0, index 1 }
   2757     *   temp_map[2] = { array 1, index 0 }
   2758     *   temp_map[3] = { array 1, index 1 }
   2759     *   temp_map[4] = { array 1, index 2 }
   2760     *   temp_map[5] = { array 2, index 0 }
   2761     *   temp_map[6] = { array 2, index 1 }
   2762     *   temp_map[7] = { array 2, index 2 }
   2763     *   temp_map[8] = { array 0, index 2 }
   2764     *   temp_map[9] = { array 0, index 3 }
   2765     *
   2766     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
   2767     * temps numbered 0..3
   2768     *
   2769     * Any time we emit a temporary register index, we'll have to use the
   2770     * temp_map[] table to convert the TGSI index to the VGPU10 index.
   2771     *
   2772     * Finally, we recompute the total_temps value here.
   2773     */
   2774    reg = 0;
   2775    for (i = 0; i < total_temps; i++) {
   2776       if (emit->temp_map[i].arrayId == 0) {
   2777          emit->temp_map[i].index = reg++;
   2778       }
   2779    }
   2780 
   2781    if (0) {
   2782       debug_printf("total_temps %u\n", total_temps);
   2783       for (i = 0; i < total_temps; i++) {
   2784          debug_printf("temp %u ->  array %u  index %u\n",
   2785                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
   2786       }
   2787    }
   2788 
   2789    total_temps = reg;
   2790 
   2791    /* Emit declaration of ordinary temp registers */
   2792    if (total_temps > 0) {
   2793       VGPU10OpcodeToken0 opcode0;
   2794 
   2795       opcode0.value = 0;
   2796       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
   2797 
   2798       begin_emit_instruction(emit);
   2799       emit_dword(emit, opcode0.value);
   2800       emit_dword(emit, total_temps);
   2801       end_emit_instruction(emit);
   2802    }
   2803 
   2804    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
   2805     * it's unused.
   2806     */
   2807    for (i = 1; i < emit->num_temp_arrays; i++) {
   2808       unsigned num_temps = emit->temp_arrays[i].size;
   2809 
   2810       if (num_temps > 0) {
   2811          VGPU10OpcodeToken0 opcode0;
   2812 
   2813          opcode0.value = 0;
   2814          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
   2815 
   2816          begin_emit_instruction(emit);
   2817          emit_dword(emit, opcode0.value);
   2818          emit_dword(emit, i); /* which array */
   2819          emit_dword(emit, num_temps);
   2820          emit_dword(emit, 4); /* num components */
   2821          end_emit_instruction(emit);
   2822 
   2823          total_temps += num_temps;
   2824       }
   2825    }
   2826 
   2827    /* Check that the grand total of all regular and indexed temps is
   2828     * under the limit.
   2829     */
   2830    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
   2831 
   2832    return TRUE;
   2833 }
   2834 
   2835 
   2836 static boolean
   2837 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
   2838 {
   2839    VGPU10OpcodeToken0 opcode0;
   2840    VGPU10OperandToken0 operand0;
   2841    unsigned total_consts, i;
   2842 
   2843    opcode0.value = 0;
   2844    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
   2845    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
   2846    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
   2847 
   2848    operand0.value = 0;
   2849    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   2850    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
   2851    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2852    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2853    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
   2854    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
   2855    operand0.swizzleX = 0;
   2856    operand0.swizzleY = 1;
   2857    operand0.swizzleZ = 2;
   2858    operand0.swizzleW = 3;
   2859 
   2860    /**
   2861     * Emit declaration for constant buffer [0].  We also allocate
   2862     * room for the extra constants here.
   2863     */
   2864    total_consts = emit->num_shader_consts[0];
   2865 
   2866    /* Now, allocate constant slots for the "extra" constants.
   2867     * Note: it's critical that these extra constant locations
   2868     * exactly match what's emitted by the "extra" constants code
   2869     * in svga_state_constants.c
   2870     */
   2871 
   2872    /* Vertex position scale/translation */
   2873    if (emit->vposition.need_prescale) {
   2874       emit->vposition.prescale_scale_index = total_consts++;
   2875       emit->vposition.prescale_trans_index = total_consts++;
   2876    }
   2877 
   2878    if (emit->unit == PIPE_SHADER_VERTEX) {
   2879       if (emit->key.vs.undo_viewport) {
   2880          emit->vs.viewport_index = total_consts++;
   2881       }
   2882    }
   2883 
   2884    /* user-defined clip planes */
   2885    if (emit->key.clip_plane_enable) {
   2886       unsigned n = util_bitcount(emit->key.clip_plane_enable);
   2887       assert(emit->unit == PIPE_SHADER_VERTEX ||
   2888              emit->unit == PIPE_SHADER_GEOMETRY);
   2889       for (i = 0; i < n; i++) {
   2890          emit->clip_plane_const[i] = total_consts++;
   2891       }
   2892    }
   2893 
   2894    for (i = 0; i < emit->num_samplers; i++) {
   2895 
   2896       if (emit->sampler_view[i]) {
   2897 
   2898          /* Texcoord scale factors for RECT textures */
   2899          if (emit->key.tex[i].unnormalized) {
   2900             emit->texcoord_scale_index[i] = total_consts++;
   2901          }
   2902 
   2903          /* Texture buffer sizes */
   2904          if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
   2905             emit->texture_buffer_size_index[i] = total_consts++;
   2906          }
   2907       }
   2908    }
   2909 
   2910    if (total_consts > 0) {
   2911       begin_emit_instruction(emit);
   2912       emit_dword(emit, opcode0.value);
   2913       emit_dword(emit, operand0.value);
   2914       emit_dword(emit, 0);  /* which const buffer slot */
   2915       emit_dword(emit, total_consts);
   2916       end_emit_instruction(emit);
   2917    }
   2918 
   2919    /* Declare remaining constant buffers (UBOs) */
   2920    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
   2921       if (emit->num_shader_consts[i] > 0) {
   2922          begin_emit_instruction(emit);
   2923          emit_dword(emit, opcode0.value);
   2924          emit_dword(emit, operand0.value);
   2925          emit_dword(emit, i);  /* which const buffer slot */
   2926          emit_dword(emit, emit->num_shader_consts[i]);
   2927          end_emit_instruction(emit);
   2928       }
   2929    }
   2930 
   2931    return TRUE;
   2932 }
   2933 
   2934 
   2935 /**
   2936  * Emit declarations for samplers.
   2937  */
   2938 static boolean
   2939 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
   2940 {
   2941    unsigned i;
   2942 
   2943    for (i = 0; i < emit->num_samplers; i++) {
   2944       VGPU10OpcodeToken0 opcode0;
   2945       VGPU10OperandToken0 operand0;
   2946 
   2947       opcode0.value = 0;
   2948       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
   2949       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
   2950 
   2951       operand0.value = 0;
   2952       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
   2953       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
   2954       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   2955       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2956 
   2957       begin_emit_instruction(emit);
   2958       emit_dword(emit, opcode0.value);
   2959       emit_dword(emit, operand0.value);
   2960       emit_dword(emit, i);
   2961       end_emit_instruction(emit);
   2962    }
   2963 
   2964    return TRUE;
   2965 }
   2966 
   2967 
   2968 /**
   2969  * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
   2970  */
   2971 static unsigned
   2972 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
   2973                                    boolean is_array)
   2974 {
   2975    switch (target) {
   2976    case TGSI_TEXTURE_BUFFER:
   2977       return VGPU10_RESOURCE_DIMENSION_BUFFER;
   2978    case TGSI_TEXTURE_1D:
   2979       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
   2980    case TGSI_TEXTURE_2D:
   2981    case TGSI_TEXTURE_RECT:
   2982       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
   2983    case TGSI_TEXTURE_3D:
   2984       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
   2985    case TGSI_TEXTURE_CUBE:
   2986       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
   2987    case TGSI_TEXTURE_SHADOW1D:
   2988       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
   2989    case TGSI_TEXTURE_SHADOW2D:
   2990    case TGSI_TEXTURE_SHADOWRECT:
   2991       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
   2992    case TGSI_TEXTURE_1D_ARRAY:
   2993    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   2994       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
   2995          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
   2996    case TGSI_TEXTURE_2D_ARRAY:
   2997    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   2998       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
   2999          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
   3000    case TGSI_TEXTURE_SHADOWCUBE:
   3001       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
   3002    case TGSI_TEXTURE_2D_MSAA:
   3003       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
   3004    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   3005       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
   3006          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
   3007    case TGSI_TEXTURE_CUBE_ARRAY:
   3008       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
   3009    default:
   3010       assert(!"Unexpected resource type");
   3011       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
   3012    }
   3013 }
   3014 
   3015 
   3016 /**
   3017  * Given a tgsi_return_type, return true iff it is an integer type.
   3018  */
   3019 static boolean
   3020 is_integer_type(enum tgsi_return_type type)
   3021 {
   3022    switch (type) {
   3023       case TGSI_RETURN_TYPE_SINT:
   3024       case TGSI_RETURN_TYPE_UINT:
   3025          return TRUE;
   3026       case TGSI_RETURN_TYPE_FLOAT:
   3027       case TGSI_RETURN_TYPE_UNORM:
   3028       case TGSI_RETURN_TYPE_SNORM:
   3029          return FALSE;
   3030       case TGSI_RETURN_TYPE_COUNT:
   3031       default:
   3032          assert(!"is_integer_type: Unknown tgsi_return_type");
   3033          return FALSE;
   3034    }
   3035 }
   3036 
   3037 
   3038 /**
   3039  * Emit declarations for resources.
   3040  * XXX When we're sure that all TGSI shaders will be generated with
   3041  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
   3042  * rework this code.
   3043  */
   3044 static boolean
   3045 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
   3046 {
   3047    unsigned i;
   3048 
   3049    /* Emit resource decl for each sampler */
   3050    for (i = 0; i < emit->num_samplers; i++) {
   3051       VGPU10OpcodeToken0 opcode0;
   3052       VGPU10OperandToken0 operand0;
   3053       VGPU10ResourceReturnTypeToken return_type;
   3054       VGPU10_RESOURCE_RETURN_TYPE rt;
   3055 
   3056       opcode0.value = 0;
   3057       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
   3058       opcode0.resourceDimension =
   3059          tgsi_texture_to_resource_dimension(emit->sampler_target[i],
   3060                                             emit->key.tex[i].is_array);
   3061       operand0.value = 0;
   3062       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
   3063       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
   3064       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   3065       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   3066 
   3067 #if 1
   3068       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
   3069       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
   3070       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
   3071       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
   3072       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
   3073       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
   3074       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
   3075       rt = emit->sampler_return_type[i] + 1;
   3076 #else
   3077       switch (emit->sampler_return_type[i]) {
   3078          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
   3079          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
   3080          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
   3081          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
   3082          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
   3083          case TGSI_RETURN_TYPE_COUNT:
   3084          default:
   3085             rt = VGPU10_RETURN_TYPE_FLOAT;
   3086             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
   3087       }
   3088 #endif
   3089 
   3090       return_type.value = 0;
   3091       return_type.component0 = rt;
   3092       return_type.component1 = rt;
   3093       return_type.component2 = rt;
   3094       return_type.component3 = rt;
   3095 
   3096       begin_emit_instruction(emit);
   3097       emit_dword(emit, opcode0.value);
   3098       emit_dword(emit, operand0.value);
   3099       emit_dword(emit, i);
   3100       emit_dword(emit, return_type.value);
   3101       end_emit_instruction(emit);
   3102    }
   3103 
   3104    return TRUE;
   3105 }
   3106 
   3107 static void
   3108 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
   3109                      unsigned opcode,
   3110                      const struct tgsi_full_dst_register *dst,
   3111                      const struct tgsi_full_src_register *src,
   3112                      boolean saturate)
   3113 {
   3114    begin_emit_instruction(emit);
   3115    emit_opcode(emit, opcode, saturate);
   3116    emit_dst_register(emit, dst);
   3117    emit_src_register(emit, src);
   3118    end_emit_instruction(emit);
   3119 }
   3120 
   3121 static void
   3122 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
   3123                      unsigned opcode,
   3124                      const struct tgsi_full_dst_register *dst,
   3125                      const struct tgsi_full_src_register *src1,
   3126                      const struct tgsi_full_src_register *src2,
   3127                      boolean saturate)
   3128 {
   3129    begin_emit_instruction(emit);
   3130    emit_opcode(emit, opcode, saturate);
   3131    emit_dst_register(emit, dst);
   3132    emit_src_register(emit, src1);
   3133    emit_src_register(emit, src2);
   3134    end_emit_instruction(emit);
   3135 }
   3136 
   3137 static void
   3138 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
   3139                      unsigned opcode,
   3140                      const struct tgsi_full_dst_register *dst,
   3141                      const struct tgsi_full_src_register *src1,
   3142                      const struct tgsi_full_src_register *src2,
   3143                      const struct tgsi_full_src_register *src3,
   3144                      boolean saturate)
   3145 {
   3146    begin_emit_instruction(emit);
   3147    emit_opcode(emit, opcode, saturate);
   3148    emit_dst_register(emit, dst);
   3149    emit_src_register(emit, src1);
   3150    emit_src_register(emit, src2);
   3151    emit_src_register(emit, src3);
   3152    end_emit_instruction(emit);
   3153 }
   3154 
   3155 /**
   3156  * Emit the actual clip distance instructions to be used for clipping
   3157  * by copying the clip distance from the temporary registers to the
   3158  * CLIPDIST registers written with the enabled planes mask.
   3159  * Also copy the clip distance from the temporary to the clip distance
   3160  * shadow copy register which will be referenced by the input shader
   3161  */
   3162 static void
   3163 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
   3164 {
   3165    struct tgsi_full_src_register tmp_clip_dist_src;
   3166    struct tgsi_full_dst_register clip_dist_dst;
   3167 
   3168    unsigned i;
   3169    unsigned clip_plane_enable = emit->key.clip_plane_enable;
   3170    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
   3171    int num_written_clipdist = emit->info.num_written_clipdistance;
   3172 
   3173    assert(emit->clip_dist_out_index != INVALID_INDEX);
   3174    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
   3175 
   3176    /**
   3177     * Temporary reset the temporary clip dist register index so
   3178     * that the copy to the real clip dist register will not
   3179     * attempt to copy to the temporary register again
   3180     */
   3181    emit->clip_dist_tmp_index = INVALID_INDEX;
   3182 
   3183    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
   3184 
   3185       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
   3186 
   3187       /**
   3188        * copy to the shadow copy for use by varying variable and
   3189        * stream output. All clip distances
   3190        * will be written regardless of the enabled clipping planes.
   3191        */
   3192       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
   3193                                    emit->clip_dist_so_index + i);
   3194 
   3195       /* MOV clip_dist_so, tmp_clip_dist */
   3196       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
   3197                            &tmp_clip_dist_src, FALSE);
   3198 
   3199       /**
   3200        * copy those clip distances to enabled clipping planes
   3201        * to CLIPDIST registers for clipping
   3202        */
   3203       if (clip_plane_enable & 0xf) {
   3204          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
   3205                                       emit->clip_dist_out_index + i);
   3206          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
   3207 
   3208          /* MOV CLIPDIST, tmp_clip_dist */
   3209          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
   3210                               &tmp_clip_dist_src, FALSE);
   3211       }
   3212       /* four clip planes per clip register */
   3213       clip_plane_enable >>= 4;
   3214    }
   3215    /**
   3216     * set the temporary clip dist register index back to the
   3217     * temporary index for the next vertex
   3218     */
   3219    emit->clip_dist_tmp_index = clip_dist_tmp_index;
   3220 }
   3221 
   3222 /* Declare clip distance output registers for user-defined clip planes
   3223  * or the TGSI_CLIPVERTEX output.
   3224  */
   3225 static void
   3226 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
   3227 {
   3228    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
   3229    unsigned index = emit->num_outputs;
   3230    unsigned plane_mask;
   3231 
   3232    assert(emit->unit == PIPE_SHADER_VERTEX ||
   3233           emit->unit == PIPE_SHADER_GEOMETRY);
   3234    assert(num_clip_planes <= 8);
   3235 
   3236    if (emit->clip_mode != CLIP_LEGACY &&
   3237        emit->clip_mode != CLIP_VERTEX) {
   3238       return;
   3239    }
   3240 
   3241    if (num_clip_planes == 0)
   3242       return;
   3243 
   3244    /* Declare one or two clip output registers.  The number of components
   3245     * in the mask reflects the number of clip planes.  For example, if 5
   3246     * clip planes are needed, we'll declare outputs similar to:
   3247     * dcl_output_siv o2.xyzw, clip_distance
   3248     * dcl_output_siv o3.x, clip_distance
   3249     */
   3250    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
   3251 
   3252    plane_mask = (1 << num_clip_planes) - 1;
   3253    if (plane_mask & 0xf) {
   3254       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
   3255       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
   3256                               VGPU10_NAME_CLIP_DISTANCE, cmask);
   3257       emit->num_outputs++;
   3258    }
   3259    if (plane_mask & 0xf0) {
   3260       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
   3261       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
   3262                               VGPU10_NAME_CLIP_DISTANCE, cmask);
   3263       emit->num_outputs++;
   3264    }
   3265 }
   3266 
   3267 
   3268 /**
   3269  * Emit the instructions for writing to the clip distance registers
   3270  * to handle legacy/automatic clip planes.
   3271  * For each clip plane, the distance is the dot product of the vertex
   3272  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
   3273  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
   3274  * output registers already declared.
   3275  */
   3276 static void
   3277 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
   3278                              unsigned vpos_tmp_index)
   3279 {
   3280    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
   3281 
   3282    assert(emit->clip_mode == CLIP_LEGACY);
   3283    assert(num_clip_planes <= 8);
   3284 
   3285    assert(emit->unit == PIPE_SHADER_VERTEX ||
   3286           emit->unit == PIPE_SHADER_GEOMETRY);
   3287 
   3288    for (i = 0; i < num_clip_planes; i++) {
   3289       struct tgsi_full_dst_register dst;
   3290       struct tgsi_full_src_register plane_src, vpos_src;
   3291       unsigned reg_index = emit->clip_dist_out_index + i / 4;
   3292       unsigned comp = i % 4;
   3293       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
   3294 
   3295       /* create dst, src regs */
   3296       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
   3297       dst = writemask_dst(&dst, writemask);
   3298 
   3299       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
   3300       vpos_src = make_src_temp_reg(vpos_tmp_index);
   3301 
   3302       /* DP4 clip_dist, plane, vpos */
   3303       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
   3304                            &plane_src, &vpos_src, FALSE);
   3305    }
   3306 }
   3307 
   3308 
   3309 /**
   3310  * Emit the instructions for computing the clip distance results from
   3311  * the clip vertex temporary.
   3312  * For each clip plane, the distance is the dot product of the clip vertex
   3313  * position (found in a temp reg) and the clip plane coefficients.
   3314  */
   3315 static void
   3316 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
   3317 {
   3318    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
   3319    unsigned i;
   3320    struct tgsi_full_dst_register dst;
   3321    struct tgsi_full_src_register clipvert_src;
   3322    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
   3323 
   3324    assert(emit->unit == PIPE_SHADER_VERTEX ||
   3325           emit->unit == PIPE_SHADER_GEOMETRY);
   3326 
   3327    assert(emit->clip_mode == CLIP_VERTEX);
   3328 
   3329    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
   3330 
   3331    for (i = 0; i < num_clip; i++) {
   3332       struct tgsi_full_src_register plane_src;
   3333       unsigned reg_index = emit->clip_dist_out_index + i / 4;
   3334       unsigned comp = i % 4;
   3335       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
   3336 
   3337       /* create dst, src regs */
   3338       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
   3339       dst = writemask_dst(&dst, writemask);
   3340 
   3341       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
   3342 
   3343       /* DP4 clip_dist, plane, vpos */
   3344       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
   3345                            &plane_src, &clipvert_src, FALSE);
   3346    }
   3347 
   3348    /* copy temporary clip vertex register to the clip vertex register */
   3349 
   3350    assert(emit->clip_vertex_out_index != INVALID_INDEX);
   3351 
   3352    /**
   3353     * temporary reset the temporary clip vertex register index so
   3354     * that copy to the clip vertex register will not attempt
   3355     * to copy to the temporary register again
   3356     */
   3357    emit->clip_vertex_tmp_index = INVALID_INDEX;
   3358 
   3359    /* MOV clip_vertex, clip_vertex_tmp */
   3360    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
   3361    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   3362                         &dst, &clipvert_src, FALSE);
   3363 
   3364    /**
   3365     * set the temporary clip vertex register index back to the
   3366     * temporary index for the next vertex
   3367     */
   3368    emit->clip_vertex_tmp_index = clip_vertex_tmp;
   3369 }
   3370 
   3371 /**
   3372  * Emit code to convert RGBA to BGRA
   3373  */
   3374 static void
   3375 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
   3376                      const struct tgsi_full_dst_register *dst,
   3377                      const struct tgsi_full_src_register *src)
   3378 {
   3379    struct tgsi_full_src_register bgra_src =
   3380       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
   3381 
   3382    begin_emit_instruction(emit);
   3383    emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
   3384    emit_dst_register(emit, dst);
   3385    emit_src_register(emit, &bgra_src);
   3386    end_emit_instruction(emit);
   3387 }
   3388 
   3389 
   3390 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
   3391 static void
   3392 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
   3393                     const struct tgsi_full_dst_register *dst,
   3394                     const struct tgsi_full_src_register *src)
   3395 {
   3396    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
   3397    struct tgsi_full_src_register two =
   3398       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
   3399    struct tgsi_full_src_register neg_two =
   3400       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
   3401 
   3402    unsigned val_tmp = get_temp_index(emit);
   3403    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
   3404    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
   3405 
   3406    unsigned bias_tmp = get_temp_index(emit);
   3407    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
   3408    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
   3409 
   3410    /* val = src * 2.0 */
   3411    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
   3412                         src, &two, FALSE);
   3413 
   3414    /* bias = src > 0.5 */
   3415    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
   3416                         src, &half, FALSE);
   3417 
   3418    /* bias = bias & -2.0 */
   3419    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
   3420                         &bias_src, &neg_two, FALSE);
   3421 
   3422    /* dst = val + bias */
   3423    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
   3424                         &val_src, &bias_src, FALSE);
   3425 
   3426    free_temp_indexes(emit);
   3427 }
   3428 
   3429 
   3430 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
   3431 static void
   3432 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
   3433                       const struct tgsi_full_dst_register *dst,
   3434                       const struct tgsi_full_src_register *src)
   3435 {
   3436    struct tgsi_full_src_register scale =
   3437       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
   3438 
   3439    /* dst = src * scale */
   3440    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
   3441 }
   3442 
   3443 
   3444 /** Convert from R32_UINT to 10_10_10_2_sscaled */
   3445 static void
   3446 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
   3447                       const struct tgsi_full_dst_register *dst,
   3448                       const struct tgsi_full_src_register *src)
   3449 {
   3450    struct tgsi_full_src_register lshift =
   3451       make_immediate_reg_int4(emit, 22, 12, 2, 0);
   3452    struct tgsi_full_src_register rshift =
   3453       make_immediate_reg_int4(emit, 22, 22, 22, 30);
   3454 
   3455    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
   3456 
   3457    unsigned tmp = get_temp_index(emit);
   3458    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3459    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3460 
   3461    /*
   3462     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
   3463     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
   3464     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
   3465     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
   3466     * dst = i_to_f(r,g,b,a);     # convert to float
   3467     */
   3468    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
   3469                         &src_xxxx, &lshift, FALSE);
   3470    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
   3471                         &tmp_src, &rshift, FALSE);
   3472    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
   3473 
   3474    free_temp_indexes(emit);
   3475 }
   3476 
   3477 
   3478 /**
   3479  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
   3480  */
   3481 static boolean
   3482 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
   3483               const struct tgsi_full_instruction *inst)
   3484 {
   3485    unsigned index = inst->Dst[0].Register.Index;
   3486    struct tgsi_full_dst_register dst;
   3487    unsigned opcode;
   3488 
   3489    assert(index < MAX_VGPU10_ADDR_REGS);
   3490    dst = make_dst_temp_reg(emit->address_reg_index[index]);
   3491 
   3492    /* ARL dst, s0
   3493     * Translates into:
   3494     * FTOI address_tmp, s0
   3495     *
   3496     * UARL dst, s0
   3497     * Translates into:
   3498     * MOV address_tmp, s0
   3499     */
   3500    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
   3501       opcode = VGPU10_OPCODE_FTOI;
   3502    else
   3503       opcode = VGPU10_OPCODE_MOV;
   3504 
   3505    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
   3506 
   3507    return TRUE;
   3508 }
   3509 
   3510 
   3511 /**
   3512  * Emit code for TGSI_OPCODE_CAL instruction.
   3513  */
   3514 static boolean
   3515 emit_cal(struct svga_shader_emitter_v10 *emit,
   3516          const struct tgsi_full_instruction *inst)
   3517 {
   3518    unsigned label = inst->Label.Label;
   3519    VGPU10OperandToken0 operand;
   3520    operand.value = 0;
   3521    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
   3522 
   3523    begin_emit_instruction(emit);
   3524    emit_dword(emit, operand.value);
   3525    emit_dword(emit, label);
   3526    end_emit_instruction(emit);
   3527 
   3528    return TRUE;
   3529 }
   3530 
   3531 
   3532 /**
   3533  * Emit code for TGSI_OPCODE_IABS instruction.
   3534  */
   3535 static boolean
   3536 emit_iabs(struct svga_shader_emitter_v10 *emit,
   3537           const struct tgsi_full_instruction *inst)
   3538 {
   3539    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
   3540     * dst.y = (src0.y < 0) ? -src0.y : src0.y
   3541     * dst.z = (src0.z < 0) ? -src0.z : src0.z
   3542     * dst.w = (src0.w < 0) ? -src0.w : src0.w
   3543     *
   3544     * Translates into
   3545     *   IMAX dst, src, neg(src)
   3546     */
   3547    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
   3548    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
   3549                         &inst->Src[0], &neg_src, FALSE);
   3550 
   3551    return TRUE;
   3552 }
   3553 
   3554 
   3555 /**
   3556  * Emit code for TGSI_OPCODE_CMP instruction.
   3557  */
   3558 static boolean
   3559 emit_cmp(struct svga_shader_emitter_v10 *emit,
   3560          const struct tgsi_full_instruction *inst)
   3561 {
   3562    /* dst.x = (src0.x < 0) ? src1.x : src2.x
   3563     * dst.y = (src0.y < 0) ? src1.y : src2.y
   3564     * dst.z = (src0.z < 0) ? src1.z : src2.z
   3565     * dst.w = (src0.w < 0) ? src1.w : src2.w
   3566     *
   3567     * Translates into
   3568     *   LT tmp, src0, 0.0
   3569     *   MOVC dst, tmp, src1, src2
   3570     */
   3571    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   3572    unsigned tmp = get_temp_index(emit);
   3573    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3574    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3575 
   3576    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
   3577                         &inst->Src[0], &zero, FALSE);
   3578    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
   3579                         &tmp_src, &inst->Src[1], &inst->Src[2],
   3580                         inst->Instruction.Saturate);
   3581 
   3582    free_temp_indexes(emit);
   3583 
   3584    return TRUE;
   3585 }
   3586 
   3587 
   3588 /**
   3589  * Emit code for TGSI_OPCODE_DST instruction.
   3590  */
   3591 static boolean
   3592 emit_dst(struct svga_shader_emitter_v10 *emit,
   3593          const struct tgsi_full_instruction *inst)
   3594 {
   3595    /*
   3596     * dst.x = 1
   3597     * dst.y = src0.y * src1.y
   3598     * dst.z = src0.z
   3599     * dst.w = src1.w
   3600     */
   3601 
   3602    struct tgsi_full_src_register s0_yyyy =
   3603       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
   3604    struct tgsi_full_src_register s0_zzzz =
   3605       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
   3606    struct tgsi_full_src_register s1_yyyy =
   3607       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
   3608    struct tgsi_full_src_register s1_wwww =
   3609       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
   3610 
   3611    /*
   3612     * If dst and either src0 and src1 are the same we need
   3613     * to create a temporary for it and insert a extra move.
   3614     */
   3615    unsigned tmp_move = get_temp_index(emit);
   3616    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
   3617    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
   3618 
   3619    /* MOV dst.x, 1.0 */
   3620    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   3621       struct tgsi_full_dst_register dst_x =
   3622          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
   3623       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   3624 
   3625       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
   3626    }
   3627 
   3628    /* MUL dst.y, s0.y, s1.y */
   3629    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   3630       struct tgsi_full_dst_register dst_y =
   3631          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
   3632 
   3633       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
   3634                            &s1_yyyy, inst->Instruction.Saturate);
   3635    }
   3636 
   3637    /* MOV dst.z, s0.z */
   3638    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   3639       struct tgsi_full_dst_register dst_z =
   3640          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
   3641 
   3642       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
   3643                            inst->Instruction.Saturate);
   3644   }
   3645 
   3646    /* MOV dst.w, s1.w */
   3647    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   3648       struct tgsi_full_dst_register dst_w =
   3649          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
   3650 
   3651       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
   3652                            inst->Instruction.Saturate);
   3653    }
   3654 
   3655    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
   3656                         FALSE);
   3657    free_temp_indexes(emit);
   3658 
   3659    return TRUE;
   3660 }
   3661 
   3662 
   3663 
   3664 /**
   3665  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
   3666  */
   3667 static boolean
   3668 emit_endprim(struct svga_shader_emitter_v10 *emit,
   3669              const struct tgsi_full_instruction *inst)
   3670 {
   3671    assert(emit->unit == PIPE_SHADER_GEOMETRY);
   3672 
   3673    /* We can't use emit_simple() because the TGSI instruction has one
   3674     * operand (vertex stream number) which we must ignore for VGPU10.
   3675     */
   3676    begin_emit_instruction(emit);
   3677    emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
   3678    end_emit_instruction(emit);
   3679    return TRUE;
   3680 }
   3681 
   3682 
   3683 /**
   3684  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
   3685  */
   3686 static boolean
   3687 emit_ex2(struct svga_shader_emitter_v10 *emit,
   3688          const struct tgsi_full_instruction *inst)
   3689 {
   3690    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
   3691     * while VGPU10 computes four values.
   3692     *
   3693     * dst = EX2(src):
   3694     *   dst.xyzw = 2.0 ^ src.x
   3695     */
   3696 
   3697    struct tgsi_full_src_register src_xxxx =
   3698       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   3699                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   3700 
   3701    /* EXP tmp, s0.xxxx */
   3702    emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
   3703                         inst->Instruction.Saturate);
   3704 
   3705    return TRUE;
   3706 }
   3707 
   3708 
   3709 /**
   3710  * Emit code for TGSI_OPCODE_EXP instruction.
   3711  */
   3712 static boolean
   3713 emit_exp(struct svga_shader_emitter_v10 *emit,
   3714          const struct tgsi_full_instruction *inst)
   3715 {
   3716    /*
   3717     * dst.x = 2 ^ floor(s0.x)
   3718     * dst.y = s0.x - floor(s0.x)
   3719     * dst.z = 2 ^ s0.x
   3720     * dst.w = 1.0
   3721     */
   3722 
   3723    struct tgsi_full_src_register src_xxxx =
   3724       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
   3725    unsigned tmp = get_temp_index(emit);
   3726    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3727    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3728 
   3729    /*
   3730     * If dst and src are the same we need to create
   3731     * a temporary for it and insert a extra move.
   3732     */
   3733    unsigned tmp_move = get_temp_index(emit);
   3734    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
   3735    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
   3736 
   3737    /* only use X component of temp reg */
   3738    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   3739    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   3740 
   3741    /* ROUND_NI tmp.x, s0.x */
   3742    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
   3743                         &src_xxxx, FALSE); /* round to -infinity */
   3744 
   3745    /* EXP dst.x, tmp.x */
   3746    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   3747       struct tgsi_full_dst_register dst_x =
   3748          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
   3749 
   3750       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
   3751                            inst->Instruction.Saturate);
   3752    }
   3753 
   3754    /* ADD dst.y, s0.x, -tmp */
   3755    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   3756       struct tgsi_full_dst_register dst_y =
   3757          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
   3758       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
   3759 
   3760       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
   3761                            &neg_tmp_src, inst->Instruction.Saturate);
   3762    }
   3763 
   3764    /* EXP dst.z, s0.x */
   3765    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   3766       struct tgsi_full_dst_register dst_z =
   3767          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
   3768 
   3769       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
   3770                            inst->Instruction.Saturate);
   3771    }
   3772 
   3773    /* MOV dst.w, 1.0 */
   3774    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   3775       struct tgsi_full_dst_register dst_w =
   3776          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
   3777       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   3778 
   3779       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
   3780                            FALSE);
   3781    }
   3782 
   3783    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
   3784                         FALSE);
   3785 
   3786    free_temp_indexes(emit);
   3787 
   3788    return TRUE;
   3789 }
   3790 
   3791 
   3792 /**
   3793  * Emit code for TGSI_OPCODE_IF instruction.
   3794  */
   3795 static boolean
   3796 emit_if(struct svga_shader_emitter_v10 *emit,
   3797         const struct tgsi_full_instruction *inst)
   3798 {
   3799    VGPU10OpcodeToken0 opcode0;
   3800 
   3801    /* The src register should be a scalar */
   3802    assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
   3803           inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
   3804           inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
   3805 
   3806    /* The only special thing here is that we need to set the
   3807     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
   3808     * src.x is non-zero.
   3809     */
   3810    opcode0.value = 0;
   3811    opcode0.opcodeType = VGPU10_OPCODE_IF;
   3812    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
   3813 
   3814    begin_emit_instruction(emit);
   3815    emit_dword(emit, opcode0.value);
   3816    emit_src_register(emit, &inst->Src[0]);
   3817    end_emit_instruction(emit);
   3818 
   3819    return TRUE;
   3820 }
   3821 
   3822 
   3823 /**
   3824  * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
   3825  * the register components are negative).
   3826  */
   3827 static boolean
   3828 emit_kill_if(struct svga_shader_emitter_v10 *emit,
   3829              const struct tgsi_full_instruction *inst)
   3830 {
   3831    unsigned tmp = get_temp_index(emit);
   3832    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3833    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3834 
   3835    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   3836 
   3837    struct tgsi_full_dst_register tmp_dst_x =
   3838       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   3839    struct tgsi_full_src_register tmp_src_xxxx =
   3840       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   3841 
   3842    /* tmp = src[0] < 0.0 */
   3843    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
   3844                         &zero, FALSE);
   3845 
   3846    if (!same_swizzle_terms(&inst->Src[0])) {
   3847       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
   3848        * logically OR the swizzle terms.  Most uses of KILL_IF only
   3849        * test one channel so it's good to avoid these extra steps.
   3850        */
   3851       struct tgsi_full_src_register tmp_src_yyyy =
   3852          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
   3853       struct tgsi_full_src_register tmp_src_zzzz =
   3854          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
   3855       struct tgsi_full_src_register tmp_src_wwww =
   3856          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
   3857 
   3858       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
   3859                            &tmp_src_yyyy, FALSE);
   3860       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
   3861                            &tmp_src_zzzz, FALSE);
   3862       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
   3863                            &tmp_src_wwww, FALSE);
   3864    }
   3865 
   3866    begin_emit_instruction(emit);
   3867    emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
   3868    emit_src_register(emit, &tmp_src_xxxx);
   3869    end_emit_instruction(emit);
   3870 
   3871    free_temp_indexes(emit);
   3872 
   3873    return TRUE;
   3874 }
   3875 
   3876 
   3877 /**
   3878  * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
   3879  */
   3880 static boolean
   3881 emit_kill(struct svga_shader_emitter_v10 *emit,
   3882           const struct tgsi_full_instruction *inst)
   3883 {
   3884    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   3885 
   3886    /* DISCARD if 0.0 is zero */
   3887    begin_emit_instruction(emit);
   3888    emit_discard_opcode(emit, FALSE);
   3889    emit_src_register(emit, &zero);
   3890    end_emit_instruction(emit);
   3891 
   3892    return TRUE;
   3893 }
   3894 
   3895 
   3896 /**
   3897  * Emit code for TGSI_OPCODE_LG2 instruction.
   3898  */
   3899 static boolean
   3900 emit_lg2(struct svga_shader_emitter_v10 *emit,
   3901          const struct tgsi_full_instruction *inst)
   3902 {
   3903    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
   3904     * while VGPU10 computes four values.
   3905     *
   3906     * dst = LG2(src):
   3907     *   dst.xyzw = log2(src.x)
   3908     */
   3909 
   3910    struct tgsi_full_src_register src_xxxx =
   3911       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   3912                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   3913 
   3914    /* LOG tmp, s0.xxxx */
   3915    emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
   3916                         inst->Instruction.Saturate);
   3917 
   3918    return TRUE;
   3919 }
   3920 
   3921 
   3922 /**
   3923  * Emit code for TGSI_OPCODE_LIT instruction.
   3924  */
   3925 static boolean
   3926 emit_lit(struct svga_shader_emitter_v10 *emit,
   3927          const struct tgsi_full_instruction *inst)
   3928 {
   3929    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   3930 
   3931    /*
   3932     * If dst and src are the same we need to create
   3933     * a temporary for it and insert a extra move.
   3934     */
   3935    unsigned tmp_move = get_temp_index(emit);
   3936    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
   3937    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
   3938 
   3939    /*
   3940     * dst.x = 1
   3941     * dst.y = max(src.x, 0)
   3942     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
   3943     * dst.w = 1
   3944     */
   3945 
   3946    /* MOV dst.x, 1.0 */
   3947    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   3948       struct tgsi_full_dst_register dst_x =
   3949          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
   3950       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
   3951    }
   3952 
   3953    /* MOV dst.w, 1.0 */
   3954    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   3955       struct tgsi_full_dst_register dst_w =
   3956          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
   3957       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
   3958    }
   3959 
   3960    /* MAX dst.y, src.x, 0.0 */
   3961    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   3962       struct tgsi_full_dst_register dst_y =
   3963          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
   3964       struct tgsi_full_src_register zero =
   3965          make_immediate_reg_float(emit, 0.0f);
   3966       struct tgsi_full_src_register src_xxxx =
   3967          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   3968                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   3969 
   3970       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
   3971                            &zero, inst->Instruction.Saturate);
   3972    }
   3973 
   3974    /*
   3975     * tmp1 = clamp(src.w, -128, 128);
   3976     *   MAX tmp1, src.w, -128
   3977     *   MIN tmp1, tmp1, 128
   3978     *
   3979     * tmp2 = max(tmp2, 0);
   3980     *   MAX tmp2, src.y, 0
   3981     *
   3982     * tmp1 = pow(tmp2, tmp1);
   3983     *   LOG tmp2, tmp2
   3984     *   MUL tmp1, tmp2, tmp1
   3985     *   EXP tmp1, tmp1
   3986     *
   3987     * tmp1 = (src.w == 0) ? 1 : tmp1;
   3988     *   EQ tmp2, 0, src.w
   3989     *   MOVC tmp1, tmp2, 1.0, tmp1
   3990     *
   3991     * dst.z = (0 < src.x) ? tmp1 : 0;
   3992     *   LT tmp2, 0, src.x
   3993     *   MOVC dst.z, tmp2, tmp1, 0.0
   3994     */
   3995    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   3996       struct tgsi_full_dst_register dst_z =
   3997          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
   3998 
   3999       unsigned tmp1 = get_temp_index(emit);
   4000       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
   4001       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
   4002       unsigned tmp2 = get_temp_index(emit);
   4003       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
   4004       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
   4005 
   4006       struct tgsi_full_src_register src_xxxx =
   4007          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
   4008       struct tgsi_full_src_register src_yyyy =
   4009          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
   4010       struct tgsi_full_src_register src_wwww =
   4011          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
   4012 
   4013       struct tgsi_full_src_register zero =
   4014          make_immediate_reg_float(emit, 0.0f);
   4015       struct tgsi_full_src_register lowerbound =
   4016          make_immediate_reg_float(emit, -128.0f);
   4017       struct tgsi_full_src_register upperbound =
   4018          make_immediate_reg_float(emit, 128.0f);
   4019 
   4020       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
   4021                            &lowerbound, FALSE);
   4022       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
   4023                            &upperbound, FALSE);
   4024       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
   4025                            &zero, FALSE);
   4026 
   4027       /* POW tmp1, tmp2, tmp1 */
   4028       /* LOG tmp2, tmp2 */
   4029       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
   4030                            FALSE);
   4031 
   4032       /* MUL tmp1, tmp2, tmp1 */
   4033       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
   4034                            &tmp1_src, FALSE);
   4035 
   4036       /* EXP tmp1, tmp1 */
   4037       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
   4038                            FALSE);
   4039 
   4040       /* EQ tmp2, 0, src.w */
   4041       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
   4042                            &src_wwww, FALSE);
   4043       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
   4044       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
   4045                            &tmp2_src, &one, &tmp1_src, FALSE);
   4046 
   4047       /* LT tmp2, 0, src.x */
   4048       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
   4049                            &src_xxxx, FALSE);
   4050       /* MOVC dst.z, tmp2, tmp1, 0.0 */
   4051       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
   4052                            &tmp2_src, &tmp1_src, &zero, FALSE);
   4053    }
   4054 
   4055    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
   4056                         FALSE);
   4057    free_temp_indexes(emit);
   4058 
   4059    return TRUE;
   4060 }
   4061 
   4062 
   4063 /**
   4064  * Emit code for TGSI_OPCODE_LOG instruction.
   4065  */
   4066 static boolean
   4067 emit_log(struct svga_shader_emitter_v10 *emit,
   4068          const struct tgsi_full_instruction *inst)
   4069 {
   4070    /*
   4071     * dst.x = floor(lg2(abs(s0.x)))
   4072     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
   4073     * dst.z = lg2(abs(s0.x))
   4074     * dst.w = 1.0
   4075     */
   4076 
   4077    struct tgsi_full_src_register src_xxxx =
   4078       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
   4079    unsigned tmp = get_temp_index(emit);
   4080    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4081    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4082    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
   4083 
   4084    /* only use X component of temp reg */
   4085    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   4086    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   4087 
   4088    /* LOG tmp.x, abs(s0.x) */
   4089    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
   4090       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
   4091                           &abs_src_xxxx, FALSE);
   4092    }
   4093 
   4094    /* MOV dst.z, tmp.x */
   4095    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   4096       struct tgsi_full_dst_register dst_z =
   4097          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
   4098 
   4099       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
   4100                            &tmp_src, inst->Instruction.Saturate);
   4101    }
   4102 
   4103    /* FLR tmp.x, tmp.x */
   4104    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
   4105       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
   4106                            &tmp_src, FALSE);
   4107    }
   4108 
   4109    /* MOV dst.x, tmp.x */
   4110    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   4111       struct tgsi_full_dst_register dst_x =
   4112          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
   4113 
   4114       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
   4115                            inst->Instruction.Saturate);
   4116    }
   4117 
   4118    /* EXP tmp.x, tmp.x */
   4119    /* DIV dst.y, abs(s0.x), tmp.x */
   4120    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   4121       struct tgsi_full_dst_register dst_y =
   4122          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
   4123 
   4124       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
   4125                            FALSE);
   4126       emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
   4127                            &tmp_src, inst->Instruction.Saturate);
   4128    }
   4129 
   4130    /* MOV dst.w, 1.0 */
   4131    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   4132       struct tgsi_full_dst_register dst_w =
   4133          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
   4134       struct tgsi_full_src_register one =
   4135          make_immediate_reg_float(emit, 1.0f);
   4136 
   4137       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
   4138    }
   4139 
   4140    free_temp_indexes(emit);
   4141 
   4142    return TRUE;
   4143 }
   4144 
   4145 
   4146 /**
   4147  * Emit code for TGSI_OPCODE_LRP instruction.
   4148  */
   4149 static boolean
   4150 emit_lrp(struct svga_shader_emitter_v10 *emit,
   4151          const struct tgsi_full_instruction *inst)
   4152 {
   4153    /* dst = LRP(s0, s1, s2):
   4154     *   dst = s0 * (s1 - s2) + s2
   4155     * Translates into:
   4156     *   SUB tmp, s1, s2;        tmp = s1 - s2
   4157     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
   4158     */
   4159    unsigned tmp = get_temp_index(emit);
   4160    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
   4161    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
   4162    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
   4163 
   4164    /* ADD tmp, s1, -s2 */
   4165    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
   4166                         &inst->Src[1], &neg_src2, FALSE);
   4167 
   4168    /* MAD dst, s1, tmp, s3 */
   4169    emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
   4170                         &inst->Src[0], &src_tmp, &inst->Src[2],
   4171                         inst->Instruction.Saturate);
   4172 
   4173    free_temp_indexes(emit);
   4174 
   4175    return TRUE;
   4176 }
   4177 
   4178 
   4179 /**
   4180  * Emit code for TGSI_OPCODE_POW instruction.
   4181  */
   4182 static boolean
   4183 emit_pow(struct svga_shader_emitter_v10 *emit,
   4184          const struct tgsi_full_instruction *inst)
   4185 {
   4186    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
   4187     * src1.x while VGPU10 computes four values.
   4188     *
   4189     * dst = POW(src0, src1):
   4190     *   dst.xyzw = src0.x ^ src1.x
   4191     */
   4192    unsigned tmp = get_temp_index(emit);
   4193    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4194    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4195    struct tgsi_full_src_register src0_xxxx =
   4196       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   4197                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   4198    struct tgsi_full_src_register src1_xxxx =
   4199       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   4200                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   4201 
   4202    /* LOG tmp, s0.xxxx */
   4203    emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
   4204                         FALSE);
   4205 
   4206    /* MUL tmp, tmp, s1.xxxx */
   4207    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
   4208                         &src1_xxxx, FALSE);
   4209 
   4210    /* EXP tmp, s0.xxxx */
   4211    emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
   4212                         &tmp_src, inst->Instruction.Saturate);
   4213 
   4214    /* free tmp */
   4215    free_temp_indexes(emit);
   4216 
   4217    return TRUE;
   4218 }
   4219 
   4220 
   4221 /**
   4222  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
   4223  */
   4224 static boolean
   4225 emit_rcp(struct svga_shader_emitter_v10 *emit,
   4226          const struct tgsi_full_instruction *inst)
   4227 {
   4228    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4229 
   4230    unsigned tmp = get_temp_index(emit);
   4231    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4232    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4233 
   4234    struct tgsi_full_dst_register tmp_dst_x =
   4235       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   4236    struct tgsi_full_src_register tmp_src_xxxx =
   4237       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   4238 
   4239    /* DIV tmp.x, 1.0, s0 */
   4240    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
   4241                         &inst->Src[0], FALSE);
   4242 
   4243    /* MOV dst, tmp.xxxx */
   4244    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
   4245                         &tmp_src_xxxx, inst->Instruction.Saturate);
   4246 
   4247    free_temp_indexes(emit);
   4248 
   4249    return TRUE;
   4250 }
   4251 
   4252 
   4253 /**
   4254  * Emit code for TGSI_OPCODE_RSQ instruction.
   4255  */
   4256 static boolean
   4257 emit_rsq(struct svga_shader_emitter_v10 *emit,
   4258          const struct tgsi_full_instruction *inst)
   4259 {
   4260    /* dst = RSQ(src):
   4261     *   dst.xyzw = 1 / sqrt(src.x)
   4262     * Translates into:
   4263     *   RSQ tmp, src.x
   4264     *   MOV dst, tmp.xxxx
   4265     */
   4266 
   4267    unsigned tmp = get_temp_index(emit);
   4268    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4269    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4270 
   4271    struct tgsi_full_dst_register tmp_dst_x =
   4272       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   4273    struct tgsi_full_src_register tmp_src_xxxx =
   4274       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   4275 
   4276    /* RSQ tmp, src.x */
   4277    emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
   4278                         &inst->Src[0], FALSE);
   4279 
   4280    /* MOV dst, tmp.xxxx */
   4281    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
   4282                         &tmp_src_xxxx, inst->Instruction.Saturate);
   4283 
   4284    /* free tmp */
   4285    free_temp_indexes(emit);
   4286 
   4287    return TRUE;
   4288 }
   4289 
   4290 
   4291 /**
   4292  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
   4293  */
   4294 static boolean
   4295 emit_seq(struct svga_shader_emitter_v10 *emit,
   4296          const struct tgsi_full_instruction *inst)
   4297 {
   4298    /* dst = SEQ(s0, s1):
   4299     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
   4300     * Translates into:
   4301     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
   4302     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4303     */
   4304    unsigned tmp = get_temp_index(emit);
   4305    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4306    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4307    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4308    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4309 
   4310    /* EQ tmp, s0, s1 */
   4311    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
   4312                         &inst->Src[1], FALSE);
   4313 
   4314    /* MOVC dst, tmp, one, zero */
   4315    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4316                         &one, &zero, FALSE);
   4317 
   4318    free_temp_indexes(emit);
   4319 
   4320    return TRUE;
   4321 }
   4322 
   4323 
   4324 /**
   4325  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
   4326  */
   4327 static boolean
   4328 emit_sge(struct svga_shader_emitter_v10 *emit,
   4329          const struct tgsi_full_instruction *inst)
   4330 {
   4331    /* dst = SGE(s0, s1):
   4332     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
   4333     * Translates into:
   4334     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
   4335     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4336     */
   4337    unsigned tmp = get_temp_index(emit);
   4338    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4339    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4340    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4341    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4342 
   4343    /* GE tmp, s0, s1 */
   4344    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
   4345                         &inst->Src[1], FALSE);
   4346 
   4347    /* MOVC dst, tmp, one, zero */
   4348    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4349                         &one, &zero, FALSE);
   4350 
   4351    free_temp_indexes(emit);
   4352 
   4353    return TRUE;
   4354 }
   4355 
   4356 
   4357 /**
   4358  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
   4359  */
   4360 static boolean
   4361 emit_sgt(struct svga_shader_emitter_v10 *emit,
   4362          const struct tgsi_full_instruction *inst)
   4363 {
   4364    /* dst = SGT(s0, s1):
   4365     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
   4366     * Translates into:
   4367     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
   4368     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4369     */
   4370    unsigned tmp = get_temp_index(emit);
   4371    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4372    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4373    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4374    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4375 
   4376    /* LT tmp, s1, s0 */
   4377    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
   4378                         &inst->Src[0], FALSE);
   4379 
   4380    /* MOVC dst, tmp, one, zero */
   4381    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4382                         &one, &zero, FALSE);
   4383 
   4384    free_temp_indexes(emit);
   4385 
   4386    return TRUE;
   4387 }
   4388 
   4389 
   4390 /**
   4391  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
   4392  */
   4393 static boolean
   4394 emit_sincos(struct svga_shader_emitter_v10 *emit,
   4395          const struct tgsi_full_instruction *inst)
   4396 {
   4397    unsigned tmp = get_temp_index(emit);
   4398    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4399    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4400 
   4401    struct tgsi_full_src_register tmp_src_xxxx =
   4402       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   4403    struct tgsi_full_dst_register tmp_dst_x =
   4404       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   4405 
   4406    begin_emit_instruction(emit);
   4407    emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
   4408 
   4409    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
   4410    {
   4411       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
   4412       emit_null_dst_register(emit);  /* second destination register */
   4413    }
   4414    else {
   4415       emit_null_dst_register(emit);
   4416       emit_dst_register(emit, &tmp_dst_x);
   4417    }
   4418 
   4419    emit_src_register(emit, &inst->Src[0]);
   4420    end_emit_instruction(emit);
   4421 
   4422    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
   4423                         &tmp_src_xxxx, inst->Instruction.Saturate);
   4424 
   4425    free_temp_indexes(emit);
   4426 
   4427    return TRUE;
   4428 }
   4429 
   4430 
   4431 /**
   4432  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
   4433  */
   4434 static boolean
   4435 emit_sle(struct svga_shader_emitter_v10 *emit,
   4436          const struct tgsi_full_instruction *inst)
   4437 {
   4438    /* dst = SLE(s0, s1):
   4439     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
   4440     * Translates into:
   4441     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
   4442     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4443     */
   4444    unsigned tmp = get_temp_index(emit);
   4445    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4446    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4447    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4448    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4449 
   4450    /* GE tmp, s1, s0 */
   4451    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
   4452                         &inst->Src[0], FALSE);
   4453 
   4454    /* MOVC dst, tmp, one, zero */
   4455    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4456                         &one, &zero, FALSE);
   4457 
   4458    free_temp_indexes(emit);
   4459 
   4460    return TRUE;
   4461 }
   4462 
   4463 
   4464 /**
   4465  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
   4466  */
   4467 static boolean
   4468 emit_slt(struct svga_shader_emitter_v10 *emit,
   4469          const struct tgsi_full_instruction *inst)
   4470 {
   4471    /* dst = SLT(s0, s1):
   4472     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
   4473     * Translates into:
   4474     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
   4475     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4476     */
   4477    unsigned tmp = get_temp_index(emit);
   4478    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4479    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4480    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4481    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4482 
   4483    /* LT tmp, s0, s1 */
   4484    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
   4485                         &inst->Src[1], FALSE);
   4486 
   4487    /* MOVC dst, tmp, one, zero */
   4488    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4489                         &one, &zero, FALSE);
   4490 
   4491    free_temp_indexes(emit);
   4492 
   4493    return TRUE;
   4494 }
   4495 
   4496 
   4497 /**
   4498  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
   4499  */
   4500 static boolean
   4501 emit_sne(struct svga_shader_emitter_v10 *emit,
   4502          const struct tgsi_full_instruction *inst)
   4503 {
   4504    /* dst = SNE(s0, s1):
   4505     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
   4506     * Translates into:
   4507     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
   4508     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4509     */
   4510    unsigned tmp = get_temp_index(emit);
   4511    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4512    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4513    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4514    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4515 
   4516    /* NE tmp, s0, s1 */
   4517    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
   4518                         &inst->Src[1], FALSE);
   4519 
   4520    /* MOVC dst, tmp, one, zero */
   4521    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4522                         &one, &zero, FALSE);
   4523 
   4524    free_temp_indexes(emit);
   4525 
   4526    return TRUE;
   4527 }
   4528 
   4529 
   4530 /**
   4531  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
   4532  */
   4533 static boolean
   4534 emit_ssg(struct svga_shader_emitter_v10 *emit,
   4535          const struct tgsi_full_instruction *inst)
   4536 {
   4537    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
   4538     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
   4539     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
   4540     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
   4541     * Translates into:
   4542     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
   4543     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
   4544     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
   4545     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
   4546     */
   4547    struct tgsi_full_src_register zero =
   4548       make_immediate_reg_float(emit, 0.0f);
   4549    struct tgsi_full_src_register one =
   4550       make_immediate_reg_float(emit, 1.0f);
   4551    struct tgsi_full_src_register neg_one =
   4552       make_immediate_reg_float(emit, -1.0f);
   4553 
   4554    unsigned tmp1 = get_temp_index(emit);
   4555    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
   4556    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
   4557 
   4558    unsigned tmp2 = get_temp_index(emit);
   4559    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
   4560    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
   4561 
   4562    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
   4563                         &zero, FALSE);
   4564    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
   4565                         &neg_one, &zero, FALSE);
   4566    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
   4567                         &inst->Src[0], FALSE);
   4568    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
   4569                         &one, &tmp2_src, FALSE);
   4570 
   4571    free_temp_indexes(emit);
   4572 
   4573    return TRUE;
   4574 }
   4575 
   4576 
   4577 /**
   4578  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
   4579  */
   4580 static boolean
   4581 emit_issg(struct svga_shader_emitter_v10 *emit,
   4582           const struct tgsi_full_instruction *inst)
   4583 {
   4584    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
   4585     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
   4586     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
   4587     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
   4588     * Translates into:
   4589     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
   4590     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
   4591     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
   4592     */
   4593    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4594 
   4595    unsigned tmp1 = get_temp_index(emit);
   4596    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
   4597    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
   4598 
   4599    unsigned tmp2 = get_temp_index(emit);
   4600    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
   4601    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
   4602 
   4603    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
   4604 
   4605    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
   4606                         &inst->Src[0], &zero, FALSE);
   4607    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
   4608                         &zero, &inst->Src[0], FALSE);
   4609    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
   4610                         &tmp1_src, &neg_tmp2, FALSE);
   4611 
   4612    free_temp_indexes(emit);
   4613 
   4614    return TRUE;
   4615 }
   4616 
   4617 
   4618 /**
   4619  * Emit a comparison instruction.  The dest register will get
   4620  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
   4621  */
   4622 static void
   4623 emit_comparison(struct svga_shader_emitter_v10 *emit,
   4624                 SVGA3dCmpFunc func,
   4625                 const struct tgsi_full_dst_register *dst,
   4626                 const struct tgsi_full_src_register *src0,
   4627                 const struct tgsi_full_src_register *src1)
   4628 {
   4629    struct tgsi_full_src_register immediate;
   4630    VGPU10OpcodeToken0 opcode0;
   4631    boolean swapSrc = FALSE;
   4632 
   4633    /* Sanity checks for svga vs. gallium enums */
   4634    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
   4635    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
   4636 
   4637    opcode0.value = 0;
   4638 
   4639    switch (func) {
   4640    case SVGA3D_CMP_NEVER:
   4641       immediate = make_immediate_reg_int(emit, 0);
   4642       /* MOV dst, {0} */
   4643       begin_emit_instruction(emit);
   4644       emit_dword(emit, VGPU10_OPCODE_MOV);
   4645       emit_dst_register(emit, dst);
   4646       emit_src_register(emit, &immediate);
   4647       end_emit_instruction(emit);
   4648       return;
   4649    case SVGA3D_CMP_ALWAYS:
   4650       immediate = make_immediate_reg_int(emit, -1);
   4651       /* MOV dst, {-1} */
   4652       begin_emit_instruction(emit);
   4653       emit_dword(emit, VGPU10_OPCODE_MOV);
   4654       emit_dst_register(emit, dst);
   4655       emit_src_register(emit, &immediate);
   4656       end_emit_instruction(emit);
   4657       return;
   4658    case SVGA3D_CMP_LESS:
   4659       opcode0.opcodeType = VGPU10_OPCODE_LT;
   4660       break;
   4661    case SVGA3D_CMP_EQUAL:
   4662       opcode0.opcodeType = VGPU10_OPCODE_EQ;
   4663       break;
   4664    case SVGA3D_CMP_LESSEQUAL:
   4665       opcode0.opcodeType = VGPU10_OPCODE_GE;
   4666       swapSrc = TRUE;
   4667       break;
   4668    case SVGA3D_CMP_GREATER:
   4669       opcode0.opcodeType = VGPU10_OPCODE_LT;
   4670       swapSrc = TRUE;
   4671       break;
   4672    case SVGA3D_CMP_NOTEQUAL:
   4673       opcode0.opcodeType = VGPU10_OPCODE_NE;
   4674       break;
   4675    case SVGA3D_CMP_GREATEREQUAL:
   4676       opcode0.opcodeType = VGPU10_OPCODE_GE;
   4677       break;
   4678    default:
   4679       assert(!"Unexpected comparison mode");
   4680       opcode0.opcodeType = VGPU10_OPCODE_EQ;
   4681    }
   4682 
   4683    begin_emit_instruction(emit);
   4684    emit_dword(emit, opcode0.value);
   4685    emit_dst_register(emit, dst);
   4686    if (swapSrc) {
   4687       emit_src_register(emit, src1);
   4688       emit_src_register(emit, src0);
   4689    }
   4690    else {
   4691       emit_src_register(emit, src0);
   4692       emit_src_register(emit, src1);
   4693    }
   4694    end_emit_instruction(emit);
   4695 }
   4696 
   4697 
   4698 /**
   4699  * Get texel/address offsets for a texture instruction.
   4700  */
   4701 static void
   4702 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
   4703                   const struct tgsi_full_instruction *inst, int offsets[3])
   4704 {
   4705    if (inst->Texture.NumOffsets == 1) {
   4706       /* According to OpenGL Shader Language spec the offsets are only
   4707        * fetched from a previously-declared immediate/literal.
   4708        */
   4709       const struct tgsi_texture_offset *off = inst->TexOffsets;
   4710       const unsigned index = off[0].Index;
   4711       const unsigned swizzleX = off[0].SwizzleX;
   4712       const unsigned swizzleY = off[0].SwizzleY;
   4713       const unsigned swizzleZ = off[0].SwizzleZ;
   4714       const union tgsi_immediate_data *imm = emit->immediates[index];
   4715 
   4716       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
   4717 
   4718       offsets[0] = imm[swizzleX].Int;
   4719       offsets[1] = imm[swizzleY].Int;
   4720       offsets[2] = imm[swizzleZ].Int;
   4721    }
   4722    else {
   4723       offsets[0] = offsets[1] = offsets[2] = 0;
   4724    }
   4725 }
   4726 
   4727 
   4728 /**
   4729  * Set up the coordinate register for texture sampling.
   4730  * When we're sampling from a RECT texture we have to scale the
   4731  * unnormalized coordinate to a normalized coordinate.
   4732  * We do that by multiplying the coordinate by an "extra" constant.
   4733  * An alternative would be to use the RESINFO instruction to query the
   4734  * texture's size.
   4735  */
   4736 static struct tgsi_full_src_register
   4737 setup_texcoord(struct svga_shader_emitter_v10 *emit,
   4738                unsigned unit,
   4739                const struct tgsi_full_src_register *coord)
   4740 {
   4741    if (emit->key.tex[unit].unnormalized) {
   4742       unsigned scale_index = emit->texcoord_scale_index[unit];
   4743       unsigned tmp = get_temp_index(emit);
   4744       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4745       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4746       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
   4747 
   4748       if (emit->key.tex[unit].texel_bias) {
   4749          /* to fix texture coordinate rounding issue, 0.0001 offset is
   4750           * been added. This fixes piglit test fbo-blit-scaled-linear. */
   4751          struct tgsi_full_src_register offset =
   4752             make_immediate_reg_float(emit, 0.0001f);
   4753 
   4754          /* ADD tmp, coord, offset */
   4755          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
   4756                               coord, &offset, FALSE);
   4757          /* MUL tmp, tmp, scale */
   4758          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
   4759                               &tmp_src, &scale_src, FALSE);
   4760       }
   4761       else {
   4762          /* MUL tmp, coord, const[] */
   4763          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
   4764                               coord, &scale_src, FALSE);
   4765       }
   4766       return tmp_src;
   4767    }
   4768    else {
   4769       /* use texcoord as-is */
   4770       return *coord;
   4771    }
   4772 }
   4773 
   4774 
   4775 /**
   4776  * For SAMPLE_C instructions, emit the extra src register which indicates
   4777  * the reference/comparision value.
   4778  */
   4779 static void
   4780 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
   4781                           enum tgsi_texture_type target,
   4782                           const struct tgsi_full_src_register *coord)
   4783 {
   4784    struct tgsi_full_src_register coord_src_ref;
   4785    int component;
   4786 
   4787    assert(tgsi_is_shadow_target(target));
   4788 
   4789    component = tgsi_util_get_shadow_ref_src_index(target) % 4;
   4790    assert(component >= 0);
   4791 
   4792    coord_src_ref = scalar_src(coord, component);
   4793 
   4794    emit_src_register(emit, &coord_src_ref);
   4795 }
   4796 
   4797 
   4798 /**
   4799  * Info for implementing texture swizzles.
   4800  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
   4801  * functions use this to encapsulate the extra steps needed to perform
   4802  * a texture swizzle, or shadow/depth comparisons.
   4803  * The shadow/depth comparison is only done here if for the cases where
   4804  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
   4805  */
   4806 struct tex_swizzle_info
   4807 {
   4808    boolean swizzled;
   4809    boolean shadow_compare;
   4810    unsigned unit;
   4811    enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
   4812    struct tgsi_full_src_register tmp_src;
   4813    struct tgsi_full_dst_register tmp_dst;
   4814    const struct tgsi_full_dst_register *inst_dst;
   4815    const struct tgsi_full_src_register *coord_src;
   4816 };
   4817 
   4818 
   4819 /**
   4820  * Do setup for handling texture swizzles or shadow compares.
   4821  * \param unit  the texture unit
   4822  * \param inst  the TGSI texture instruction
   4823  * \param shadow_compare  do shadow/depth comparison?
   4824  * \param swz  returns the swizzle info
   4825  */
   4826 static void
   4827 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
   4828                   unsigned unit,
   4829                   const struct tgsi_full_instruction *inst,
   4830                   boolean shadow_compare,
   4831                   struct tex_swizzle_info *swz)
   4832 {
   4833    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
   4834                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
   4835                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
   4836                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
   4837 
   4838    swz->shadow_compare = shadow_compare;
   4839    swz->texture_target = inst->Texture.Texture;
   4840 
   4841    if (swz->swizzled || shadow_compare) {
   4842       /* Allocate temp register for the result of the SAMPLE instruction
   4843        * and the source of the MOV/compare/swizzle instructions.
   4844        */
   4845       unsigned tmp = get_temp_index(emit);
   4846       swz->tmp_src = make_src_temp_reg(tmp);
   4847       swz->tmp_dst = make_dst_temp_reg(tmp);
   4848 
   4849       swz->unit = unit;
   4850    }
   4851    swz->inst_dst = &inst->Dst[0];
   4852    swz->coord_src = &inst->Src[0];
   4853 
   4854    emit->fs.shadow_compare_units |= shadow_compare << unit;
   4855 }
   4856 
   4857 
   4858 /**
   4859  * Returns the register to put the SAMPLE instruction results into.
   4860  * This will either be the original instruction dst reg (if no swizzle
   4861  * and no shadow comparison) or a temporary reg if there is a swizzle.
   4862  */
   4863 static const struct tgsi_full_dst_register *
   4864 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
   4865 {
   4866    return (swz->swizzled || swz->shadow_compare)
   4867       ? &swz->tmp_dst : swz->inst_dst;
   4868 }
   4869 
   4870 
   4871 /**
   4872  * This emits the MOV instruction that actually implements a texture swizzle
   4873  * and/or shadow comparison.
   4874  */
   4875 static void
   4876 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
   4877                 const struct tex_swizzle_info *swz)
   4878 {
   4879    if (swz->shadow_compare) {
   4880       /* Emit extra instructions to compare the fetched texel value against
   4881        * a texture coordinate component.  The result of the comparison
   4882        * is 0.0 or 1.0.
   4883        */
   4884       struct tgsi_full_src_register coord_src;
   4885       struct tgsi_full_src_register texel_src =
   4886          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
   4887       struct tgsi_full_src_register one =
   4888          make_immediate_reg_float(emit, 1.0f);
   4889       /* convert gallium comparison func to SVGA comparison func */
   4890       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
   4891 
   4892       assert(emit->unit == PIPE_SHADER_FRAGMENT);
   4893 
   4894       int component =
   4895          tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
   4896       assert(component >= 0);
   4897       coord_src = scalar_src(swz->coord_src, component);
   4898 
   4899       /* COMPARE tmp, coord, texel */
   4900       emit_comparison(emit, compare_func,
   4901                       &swz->tmp_dst, &coord_src, &texel_src);
   4902 
   4903       /* AND dest, tmp, {1.0} */
   4904       begin_emit_instruction(emit);
   4905       emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
   4906       if (swz->swizzled) {
   4907          emit_dst_register(emit, &swz->tmp_dst);
   4908       }
   4909       else {
   4910          emit_dst_register(emit, swz->inst_dst);
   4911       }
   4912       emit_src_register(emit, &swz->tmp_src);
   4913       emit_src_register(emit, &one);
   4914       end_emit_instruction(emit);
   4915    }
   4916 
   4917    if (swz->swizzled) {
   4918       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
   4919       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
   4920       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
   4921       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
   4922       unsigned writemask_0 = 0, writemask_1 = 0;
   4923       boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
   4924 
   4925       /* Swizzle w/out zero/one terms */
   4926       struct tgsi_full_src_register src_swizzled =
   4927          swizzle_src(&swz->tmp_src,
   4928                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
   4929                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
   4930                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
   4931                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
   4932 
   4933       /* MOV dst, color(tmp).<swizzle> */
   4934       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   4935                            swz->inst_dst, &src_swizzled, FALSE);
   4936 
   4937       /* handle swizzle zero terms */
   4938       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
   4939                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
   4940                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
   4941                      ((swz_a == PIPE_SWIZZLE_0) << 3));
   4942       writemask_0 &= swz->inst_dst->Register.WriteMask;
   4943 
   4944       if (writemask_0) {
   4945          struct tgsi_full_src_register zero = int_tex ?
   4946             make_immediate_reg_int(emit, 0) :
   4947             make_immediate_reg_float(emit, 0.0f);
   4948          struct tgsi_full_dst_register dst =
   4949             writemask_dst(swz->inst_dst, writemask_0);
   4950 
   4951          /* MOV dst.writemask_0, {0,0,0,0} */
   4952          emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   4953                               &dst, &zero, FALSE);
   4954       }
   4955 
   4956       /* handle swizzle one terms */
   4957       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
   4958                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
   4959                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
   4960                      ((swz_a == PIPE_SWIZZLE_1) << 3));
   4961       writemask_1 &= swz->inst_dst->Register.WriteMask;
   4962 
   4963       if (writemask_1) {
   4964          struct tgsi_full_src_register one = int_tex ?
   4965             make_immediate_reg_int(emit, 1) :
   4966             make_immediate_reg_float(emit, 1.0f);
   4967          struct tgsi_full_dst_register dst =
   4968             writemask_dst(swz->inst_dst, writemask_1);
   4969 
   4970          /* MOV dst.writemask_1, {1,1,1,1} */
   4971          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
   4972       }
   4973    }
   4974 }
   4975 
   4976 
   4977 /**
   4978  * Emit code for TGSI_OPCODE_SAMPLE instruction.
   4979  */
   4980 static boolean
   4981 emit_sample(struct svga_shader_emitter_v10 *emit,
   4982             const struct tgsi_full_instruction *inst)
   4983 {
   4984    const unsigned resource_unit = inst->Src[1].Register.Index;
   4985    const unsigned sampler_unit = inst->Src[2].Register.Index;
   4986    struct tgsi_full_src_register coord;
   4987    int offsets[3];
   4988    struct tex_swizzle_info swz_info;
   4989 
   4990    begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
   4991 
   4992    get_texel_offsets(emit, inst, offsets);
   4993 
   4994    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
   4995 
   4996    /* SAMPLE dst, coord(s0), resource, sampler */
   4997    begin_emit_instruction(emit);
   4998 
   4999    /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
   5000     * with LOD=0.  But our virtual GPU accepts this as-is.
   5001     */
   5002    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
   5003                       inst->Instruction.Saturate, offsets);
   5004    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5005    emit_src_register(emit, &coord);
   5006    emit_resource_register(emit, resource_unit);
   5007    emit_sampler_register(emit, sampler_unit);
   5008    end_emit_instruction(emit);
   5009 
   5010    end_tex_swizzle(emit, &swz_info);
   5011 
   5012    free_temp_indexes(emit);
   5013 
   5014    return TRUE;
   5015 }
   5016 
   5017 
   5018 /**
   5019  * Check if a texture instruction is valid.
   5020  * An example of an invalid texture instruction is doing shadow comparison
   5021  * with an integer-valued texture.
   5022  * If we detect an invalid texture instruction, we replace it with:
   5023  *   MOV dst, {1,1,1,1};
   5024  * \return TRUE if valid, FALSE if invalid.
   5025  */
   5026 static boolean
   5027 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
   5028                          const struct tgsi_full_instruction *inst)
   5029 {
   5030    const unsigned unit = inst->Src[1].Register.Index;
   5031    const enum tgsi_texture_type target = inst->Texture.Texture;
   5032    boolean valid = TRUE;
   5033 
   5034    if (tgsi_is_shadow_target(target) &&
   5035        is_integer_type(emit->sampler_return_type[unit])) {
   5036       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
   5037       valid = FALSE;
   5038    }
   5039    /* XXX might check for other conditions in the future here */
   5040 
   5041    if (!valid) {
   5042       /* emit a MOV dst, {1,1,1,1} instruction. */
   5043       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   5044       begin_emit_instruction(emit);
   5045       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
   5046       emit_dst_register(emit, &inst->Dst[0]);
   5047       emit_src_register(emit, &one);
   5048       end_emit_instruction(emit);
   5049    }
   5050 
   5051    return valid;
   5052 }
   5053 
   5054 
   5055 /**
   5056  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
   5057  */
   5058 static boolean
   5059 emit_tex(struct svga_shader_emitter_v10 *emit,
   5060          const struct tgsi_full_instruction *inst)
   5061 {
   5062    const uint unit = inst->Src[1].Register.Index;
   5063    const enum tgsi_texture_type target = inst->Texture.Texture;
   5064    unsigned opcode;
   5065    struct tgsi_full_src_register coord;
   5066    int offsets[3];
   5067    struct tex_swizzle_info swz_info;
   5068 
   5069    /* check that the sampler returns a float */
   5070    if (!is_valid_tex_instruction(emit, inst))
   5071       return TRUE;
   5072 
   5073    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
   5074 
   5075    get_texel_offsets(emit, inst, offsets);
   5076 
   5077    coord = setup_texcoord(emit, unit, &inst->Src[0]);
   5078 
   5079    /* SAMPLE dst, coord(s0), resource, sampler */
   5080    begin_emit_instruction(emit);
   5081 
   5082    if (tgsi_is_shadow_target(target))
   5083       opcode = VGPU10_OPCODE_SAMPLE_C;
   5084    else
   5085       opcode = VGPU10_OPCODE_SAMPLE;
   5086 
   5087    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
   5088    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5089    emit_src_register(emit, &coord);
   5090    emit_resource_register(emit, unit);
   5091    emit_sampler_register(emit, unit);
   5092    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
   5093       emit_tex_compare_refcoord(emit, target, &coord);
   5094    }
   5095    end_emit_instruction(emit);
   5096 
   5097    end_tex_swizzle(emit, &swz_info);
   5098 
   5099    free_temp_indexes(emit);
   5100 
   5101    return TRUE;
   5102 }
   5103 
   5104 
   5105 /**
   5106  * Emit code for TGSI_OPCODE_TXP (projective texture)
   5107  */
   5108 static boolean
   5109 emit_txp(struct svga_shader_emitter_v10 *emit,
   5110          const struct tgsi_full_instruction *inst)
   5111 {
   5112    const uint unit = inst->Src[1].Register.Index;
   5113    const enum tgsi_texture_type target = inst->Texture.Texture;
   5114    unsigned opcode;
   5115    int offsets[3];
   5116    unsigned tmp = get_temp_index(emit);
   5117    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   5118    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   5119    struct tgsi_full_src_register src0_wwww =
   5120       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
   5121    struct tgsi_full_src_register coord;
   5122    struct tex_swizzle_info swz_info;
   5123 
   5124    /* check that the sampler returns a float */
   5125    if (!is_valid_tex_instruction(emit, inst))
   5126       return TRUE;
   5127 
   5128    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
   5129 
   5130    get_texel_offsets(emit, inst, offsets);
   5131 
   5132    coord = setup_texcoord(emit, unit, &inst->Src[0]);
   5133 
   5134    /* DIV tmp, coord, coord.wwww */
   5135    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
   5136                         &coord, &src0_wwww, FALSE);
   5137 
   5138    /* SAMPLE dst, coord(tmp), resource, sampler */
   5139    begin_emit_instruction(emit);
   5140 
   5141    if (tgsi_is_shadow_target(target))
   5142       /* NOTE: for non-fragment shaders, we should use
   5143        * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
   5144        */
   5145       opcode = VGPU10_OPCODE_SAMPLE_C;
   5146    else
   5147       opcode = VGPU10_OPCODE_SAMPLE;
   5148 
   5149    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
   5150    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5151    emit_src_register(emit, &tmp_src);  /* projected coord */
   5152    emit_resource_register(emit, unit);
   5153    emit_sampler_register(emit, unit);
   5154    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
   5155       emit_tex_compare_refcoord(emit, target, &tmp_src);
   5156    }
   5157    end_emit_instruction(emit);
   5158 
   5159    end_tex_swizzle(emit, &swz_info);
   5160 
   5161    free_temp_indexes(emit);
   5162 
   5163    return TRUE;
   5164 }
   5165 
   5166 
   5167 /**
   5168  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
   5169  */
   5170 static boolean
   5171 emit_txd(struct svga_shader_emitter_v10 *emit,
   5172          const struct tgsi_full_instruction *inst)
   5173 {
   5174    const uint unit = inst->Src[3].Register.Index;
   5175    const enum tgsi_texture_type target = inst->Texture.Texture;
   5176    int offsets[3];
   5177    struct tgsi_full_src_register coord;
   5178    struct tex_swizzle_info swz_info;
   5179 
   5180    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
   5181                      &swz_info);
   5182 
   5183    get_texel_offsets(emit, inst, offsets);
   5184 
   5185    coord = setup_texcoord(emit, unit, &inst->Src[0]);
   5186 
   5187    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
   5188    begin_emit_instruction(emit);
   5189    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
   5190                       inst->Instruction.Saturate, offsets);
   5191    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5192    emit_src_register(emit, &coord);
   5193    emit_resource_register(emit, unit);
   5194    emit_sampler_register(emit, unit);
   5195    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
   5196    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
   5197    end_emit_instruction(emit);
   5198 
   5199    end_tex_swizzle(emit, &swz_info);
   5200 
   5201    free_temp_indexes(emit);
   5202 
   5203    return TRUE;
   5204 }
   5205 
   5206 
   5207 /**
   5208  * Emit code for TGSI_OPCODE_TXF (texel fetch)
   5209  */
   5210 static boolean
   5211 emit_txf(struct svga_shader_emitter_v10 *emit,
   5212          const struct tgsi_full_instruction *inst)
   5213 {
   5214    const uint unit = inst->Src[1].Register.Index;
   5215    const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture);
   5216    int offsets[3];
   5217    struct tex_swizzle_info swz_info;
   5218 
   5219    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
   5220 
   5221    get_texel_offsets(emit, inst, offsets);
   5222 
   5223    if (msaa) {
   5224       /* Fetch one sample from an MSAA texture */
   5225       struct tgsi_full_src_register sampleIndex =
   5226          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
   5227       /* LD_MS dst, coord(s0), resource, sampleIndex */
   5228       begin_emit_instruction(emit);
   5229       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
   5230                          inst->Instruction.Saturate, offsets);
   5231       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5232       emit_src_register(emit, &inst->Src[0]);
   5233       emit_resource_register(emit, unit);
   5234       emit_src_register(emit, &sampleIndex);
   5235       end_emit_instruction(emit);
   5236    }
   5237    else {
   5238       /* Fetch one texel specified by integer coordinate */
   5239       /* LD dst, coord(s0), resource */
   5240       begin_emit_instruction(emit);
   5241       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
   5242                          inst->Instruction.Saturate, offsets);
   5243       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5244       emit_src_register(emit, &inst->Src[0]);
   5245       emit_resource_register(emit, unit);
   5246       end_emit_instruction(emit);
   5247    }
   5248 
   5249    end_tex_swizzle(emit, &swz_info);
   5250 
   5251    free_temp_indexes(emit);
   5252 
   5253    return TRUE;
   5254 }
   5255 
   5256 
   5257 /**
   5258  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
   5259  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
   5260  */
   5261 static boolean
   5262 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
   5263              const struct tgsi_full_instruction *inst)
   5264 {
   5265    const enum tgsi_texture_type target = inst->Texture.Texture;
   5266    unsigned opcode, unit;
   5267    int offsets[3];
   5268    struct tgsi_full_src_register coord, lod_bias;
   5269    struct tex_swizzle_info swz_info;
   5270 
   5271    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
   5272           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
   5273           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
   5274 
   5275    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
   5276       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
   5277       unit = inst->Src[2].Register.Index;
   5278    }
   5279    else {
   5280       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
   5281       unit = inst->Src[1].Register.Index;
   5282    }
   5283 
   5284    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
   5285                      &swz_info);
   5286 
   5287    get_texel_offsets(emit, inst, offsets);
   5288 
   5289    coord = setup_texcoord(emit, unit, &inst->Src[0]);
   5290 
   5291    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
   5292    begin_emit_instruction(emit);
   5293    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
   5294       opcode = VGPU10_OPCODE_SAMPLE_L;
   5295    }
   5296    else {
   5297       opcode = VGPU10_OPCODE_SAMPLE_B;
   5298    }
   5299    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
   5300    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5301    emit_src_register(emit, &coord);
   5302    emit_resource_register(emit, unit);
   5303    emit_sampler_register(emit, unit);
   5304    emit_src_register(emit, &lod_bias);
   5305    end_emit_instruction(emit);
   5306 
   5307    end_tex_swizzle(emit, &swz_info);
   5308 
   5309    free_temp_indexes(emit);
   5310 
   5311    return TRUE;
   5312 }
   5313 
   5314 
   5315 /**
   5316  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
   5317  */
   5318 static boolean
   5319 emit_txq(struct svga_shader_emitter_v10 *emit,
   5320          const struct tgsi_full_instruction *inst)
   5321 {
   5322    const uint unit = inst->Src[1].Register.Index;
   5323 
   5324    if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
   5325       /* RESINFO does not support querying texture buffers, so we instead
   5326        * store texture buffer sizes in shader constants, then copy them to
   5327        * implement TXQ instead of emitting RESINFO.
   5328        * MOV dst, const[texture_buffer_size_index[unit]]
   5329        */
   5330       struct tgsi_full_src_register size_src =
   5331          make_src_const_reg(emit->texture_buffer_size_index[unit]);
   5332       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
   5333                            FALSE);
   5334    } else {
   5335       /* RESINFO dst, srcMipLevel, resource */
   5336       begin_emit_instruction(emit);
   5337       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
   5338       emit_dst_register(emit, &inst->Dst[0]);
   5339       emit_src_register(emit, &inst->Src[0]);
   5340       emit_resource_register(emit, unit);
   5341       end_emit_instruction(emit);
   5342    }
   5343 
   5344    free_temp_indexes(emit);
   5345 
   5346    return TRUE;
   5347 }
   5348 
   5349 
   5350 /**
   5351  * Emit a simple instruction (like ADD, MUL, MIN, etc).
   5352  */
   5353 static boolean
   5354 emit_simple(struct svga_shader_emitter_v10 *emit,
   5355             const struct tgsi_full_instruction *inst)
   5356 {
   5357    const unsigned opcode = inst->Instruction.Opcode;
   5358    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
   5359    unsigned i;
   5360 
   5361    begin_emit_instruction(emit);
   5362    emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
   5363                inst->Instruction.Saturate);
   5364    for (i = 0; i < op->num_dst; i++) {
   5365       emit_dst_register(emit, &inst->Dst[i]);
   5366    }
   5367    for (i = 0; i < op->num_src; i++) {
   5368       emit_src_register(emit, &inst->Src[i]);
   5369    }
   5370    end_emit_instruction(emit);
   5371 
   5372    return TRUE;
   5373 }
   5374 
   5375 
   5376 /**
   5377  * We only special case the MOV instruction to try to detect constant
   5378  * color writes in the fragment shader.
   5379  */
   5380 static boolean
   5381 emit_mov(struct svga_shader_emitter_v10 *emit,
   5382          const struct tgsi_full_instruction *inst)
   5383 {
   5384    const struct tgsi_full_src_register *src = &inst->Src[0];
   5385    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
   5386 
   5387    if (emit->unit == PIPE_SHADER_FRAGMENT &&
   5388        dst->Register.File == TGSI_FILE_OUTPUT &&
   5389        dst->Register.Index == 0 &&
   5390        src->Register.File == TGSI_FILE_CONSTANT &&
   5391        !src->Register.Indirect) {
   5392       emit->constant_color_output = TRUE;
   5393    }
   5394 
   5395    return emit_simple(emit, inst);
   5396 }
   5397 
   5398 
   5399 /**
   5400  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
   5401  * where TGSI only uses one dest register.
   5402  */
   5403 static boolean
   5404 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
   5405                  const struct tgsi_full_instruction *inst,
   5406                  unsigned dst_count,
   5407                  unsigned dst_index)
   5408 {
   5409    const unsigned opcode = inst->Instruction.Opcode;
   5410    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
   5411    unsigned i;
   5412 
   5413    begin_emit_instruction(emit);
   5414    emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
   5415                inst->Instruction.Saturate);
   5416 
   5417    for (i = 0; i < dst_count; i++) {
   5418       if (i == dst_index) {
   5419          emit_dst_register(emit, &inst->Dst[0]);
   5420       } else {
   5421          emit_null_dst_register(emit);
   5422       }
   5423    }
   5424 
   5425    for (i = 0; i < op->num_src; i++) {
   5426       emit_src_register(emit, &inst->Src[i]);
   5427    }
   5428    end_emit_instruction(emit);
   5429 
   5430    return TRUE;
   5431 }
   5432 
   5433 
   5434 /**
   5435  * Translate a single TGSI instruction to VGPU10.
   5436  */
   5437 static boolean
   5438 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
   5439                         unsigned inst_number,
   5440                         const struct tgsi_full_instruction *inst)
   5441 {
   5442    const unsigned opcode = inst->Instruction.Opcode;
   5443 
   5444    switch (opcode) {
   5445    case TGSI_OPCODE_ADD:
   5446    case TGSI_OPCODE_AND:
   5447    case TGSI_OPCODE_BGNLOOP:
   5448    case TGSI_OPCODE_BRK:
   5449    case TGSI_OPCODE_CEIL:
   5450    case TGSI_OPCODE_CONT:
   5451    case TGSI_OPCODE_DDX:
   5452    case TGSI_OPCODE_DDY:
   5453    case TGSI_OPCODE_DIV:
   5454    case TGSI_OPCODE_DP2:
   5455    case TGSI_OPCODE_DP3:
   5456    case TGSI_OPCODE_DP4:
   5457    case TGSI_OPCODE_ELSE:
   5458    case TGSI_OPCODE_ENDIF:
   5459    case TGSI_OPCODE_ENDLOOP:
   5460    case TGSI_OPCODE_ENDSUB:
   5461    case TGSI_OPCODE_F2I:
   5462    case TGSI_OPCODE_F2U:
   5463    case TGSI_OPCODE_FLR:
   5464    case TGSI_OPCODE_FRC:
   5465    case TGSI_OPCODE_FSEQ:
   5466    case TGSI_OPCODE_FSGE:
   5467    case TGSI_OPCODE_FSLT:
   5468    case TGSI_OPCODE_FSNE:
   5469    case TGSI_OPCODE_I2F:
   5470    case TGSI_OPCODE_IMAX:
   5471    case TGSI_OPCODE_IMIN:
   5472    case TGSI_OPCODE_INEG:
   5473    case TGSI_OPCODE_ISGE:
   5474    case TGSI_OPCODE_ISHR:
   5475    case TGSI_OPCODE_ISLT:
   5476    case TGSI_OPCODE_MAD:
   5477    case TGSI_OPCODE_MAX:
   5478    case TGSI_OPCODE_MIN:
   5479    case TGSI_OPCODE_MUL:
   5480    case TGSI_OPCODE_NOP:
   5481    case TGSI_OPCODE_NOT:
   5482    case TGSI_OPCODE_OR:
   5483    case TGSI_OPCODE_RET:
   5484    case TGSI_OPCODE_UADD:
   5485    case TGSI_OPCODE_USEQ:
   5486    case TGSI_OPCODE_USGE:
   5487    case TGSI_OPCODE_USLT:
   5488    case TGSI_OPCODE_UMIN:
   5489    case TGSI_OPCODE_UMAD:
   5490    case TGSI_OPCODE_UMAX:
   5491    case TGSI_OPCODE_ROUND:
   5492    case TGSI_OPCODE_SQRT:
   5493    case TGSI_OPCODE_SHL:
   5494    case TGSI_OPCODE_TRUNC:
   5495    case TGSI_OPCODE_U2F:
   5496    case TGSI_OPCODE_UCMP:
   5497    case TGSI_OPCODE_USHR:
   5498    case TGSI_OPCODE_USNE:
   5499    case TGSI_OPCODE_XOR:
   5500       /* simple instructions */
   5501       return emit_simple(emit, inst);
   5502 
   5503    case TGSI_OPCODE_MOV:
   5504       return emit_mov(emit, inst);
   5505    case TGSI_OPCODE_EMIT:
   5506       return emit_vertex(emit, inst);
   5507    case TGSI_OPCODE_ENDPRIM:
   5508       return emit_endprim(emit, inst);
   5509    case TGSI_OPCODE_IABS:
   5510       return emit_iabs(emit, inst);
   5511    case TGSI_OPCODE_ARL:
   5512       /* fall-through */
   5513    case TGSI_OPCODE_UARL:
   5514       return emit_arl_uarl(emit, inst);
   5515    case TGSI_OPCODE_BGNSUB:
   5516       /* no-op */
   5517       return TRUE;
   5518    case TGSI_OPCODE_CAL:
   5519       return emit_cal(emit, inst);
   5520    case TGSI_OPCODE_CMP:
   5521       return emit_cmp(emit, inst);
   5522    case TGSI_OPCODE_COS:
   5523       return emit_sincos(emit, inst);
   5524    case TGSI_OPCODE_DST:
   5525       return emit_dst(emit, inst);
   5526    case TGSI_OPCODE_EX2:
   5527       return emit_ex2(emit, inst);
   5528    case TGSI_OPCODE_EXP:
   5529       return emit_exp(emit, inst);
   5530    case TGSI_OPCODE_IF:
   5531       return emit_if(emit, inst);
   5532    case TGSI_OPCODE_KILL:
   5533       return emit_kill(emit, inst);
   5534    case TGSI_OPCODE_KILL_IF:
   5535       return emit_kill_if(emit, inst);
   5536    case TGSI_OPCODE_LG2:
   5537       return emit_lg2(emit, inst);
   5538    case TGSI_OPCODE_LIT:
   5539       return emit_lit(emit, inst);
   5540    case TGSI_OPCODE_LOG:
   5541       return emit_log(emit, inst);
   5542    case TGSI_OPCODE_LRP:
   5543       return emit_lrp(emit, inst);
   5544    case TGSI_OPCODE_POW:
   5545       return emit_pow(emit, inst);
   5546    case TGSI_OPCODE_RCP:
   5547       return emit_rcp(emit, inst);
   5548    case TGSI_OPCODE_RSQ:
   5549       return emit_rsq(emit, inst);
   5550    case TGSI_OPCODE_SAMPLE:
   5551       return emit_sample(emit, inst);
   5552    case TGSI_OPCODE_SEQ:
   5553       return emit_seq(emit, inst);
   5554    case TGSI_OPCODE_SGE:
   5555       return emit_sge(emit, inst);
   5556    case TGSI_OPCODE_SGT:
   5557       return emit_sgt(emit, inst);
   5558    case TGSI_OPCODE_SIN:
   5559       return emit_sincos(emit, inst);
   5560    case TGSI_OPCODE_SLE:
   5561       return emit_sle(emit, inst);
   5562    case TGSI_OPCODE_SLT:
   5563       return emit_slt(emit, inst);
   5564    case TGSI_OPCODE_SNE:
   5565       return emit_sne(emit, inst);
   5566    case TGSI_OPCODE_SSG:
   5567       return emit_ssg(emit, inst);
   5568    case TGSI_OPCODE_ISSG:
   5569       return emit_issg(emit, inst);
   5570    case TGSI_OPCODE_TEX:
   5571       return emit_tex(emit, inst);
   5572    case TGSI_OPCODE_TXP:
   5573       return emit_txp(emit, inst);
   5574    case TGSI_OPCODE_TXB:
   5575    case TGSI_OPCODE_TXB2:
   5576    case TGSI_OPCODE_TXL:
   5577       return emit_txl_txb(emit, inst);
   5578    case TGSI_OPCODE_TXD:
   5579       return emit_txd(emit, inst);
   5580    case TGSI_OPCODE_TXF:
   5581       return emit_txf(emit, inst);
   5582    case TGSI_OPCODE_TXQ:
   5583       return emit_txq(emit, inst);
   5584    case TGSI_OPCODE_UIF:
   5585       return emit_if(emit, inst);
   5586    case TGSI_OPCODE_UMUL_HI:
   5587    case TGSI_OPCODE_IMUL_HI:
   5588    case TGSI_OPCODE_UDIV:
   5589    case TGSI_OPCODE_IDIV:
   5590       /* These cases use only the FIRST of two destination registers */
   5591       return emit_simple_1dst(emit, inst, 2, 0);
   5592    case TGSI_OPCODE_UMUL:
   5593    case TGSI_OPCODE_UMOD:
   5594    case TGSI_OPCODE_MOD:
   5595       /* These cases use only the SECOND of two destination registers */
   5596       return emit_simple_1dst(emit, inst, 2, 1);
   5597    case TGSI_OPCODE_END:
   5598       if (!emit_post_helpers(emit))
   5599          return FALSE;
   5600       return emit_simple(emit, inst);
   5601 
   5602    default:
   5603       debug_printf("Unimplemented tgsi instruction %s\n",
   5604                    tgsi_get_opcode_name(opcode));
   5605       return FALSE;
   5606    }
   5607 
   5608    return TRUE;
   5609 }
   5610 
   5611 
   5612 /**
   5613  * Emit the extra instructions to adjust the vertex position.
   5614  * There are two possible adjustments:
   5615  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
   5616  *    "prescale" and "pretranslate" values.
   5617  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
   5618  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
   5619  */
   5620 static void
   5621 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
   5622                        unsigned vs_pos_tmp_index)
   5623 {
   5624    struct tgsi_full_src_register tmp_pos_src;
   5625    struct tgsi_full_dst_register pos_dst;
   5626 
   5627    /* Don't bother to emit any extra vertex instructions if vertex position is
   5628     * not written out
   5629     */
   5630    if (emit->vposition.out_index == INVALID_INDEX)
   5631       return;
   5632 
   5633    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
   5634    pos_dst = make_dst_output_reg(emit->vposition.out_index);
   5635 
   5636    /* If non-adjusted vertex position register index
   5637     * is valid, copy the vertex position from the temporary
   5638     * vertex position register before it is modified by the
   5639     * prescale computation.
   5640     */
   5641    if (emit->vposition.so_index != INVALID_INDEX) {
   5642       struct tgsi_full_dst_register pos_so_dst =
   5643          make_dst_output_reg(emit->vposition.so_index);
   5644 
   5645       /* MOV pos_so, tmp_pos */
   5646       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
   5647                            &tmp_pos_src, FALSE);
   5648    }
   5649 
   5650    if (emit->vposition.need_prescale) {
   5651       /* This code adjusts the vertex position to match the VGPU10 convention.
   5652        * If p is the position computed by the shader (usually by applying the
   5653        * modelview and projection matrices), the new position q is computed by:
   5654        *
   5655        * q.x = p.w * trans.x + p.x * scale.x
   5656        * q.y = p.w * trans.y + p.y * scale.y
   5657        * q.z = p.w * trans.z + p.z * scale.z;
   5658        * q.w = p.w * trans.w + p.w;
   5659        */
   5660       struct tgsi_full_src_register tmp_pos_src_w =
   5661          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
   5662       struct tgsi_full_dst_register tmp_pos_dst =
   5663          make_dst_temp_reg(vs_pos_tmp_index);
   5664       struct tgsi_full_dst_register tmp_pos_dst_xyz =
   5665          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
   5666 
   5667       struct tgsi_full_src_register prescale_scale =
   5668          make_src_const_reg(emit->vposition.prescale_scale_index);
   5669       struct tgsi_full_src_register prescale_trans =
   5670          make_src_const_reg(emit->vposition.prescale_trans_index);
   5671 
   5672       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
   5673       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
   5674                            &tmp_pos_src, &prescale_scale, FALSE);
   5675 
   5676       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
   5677       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
   5678                            &prescale_trans, &tmp_pos_src, FALSE);
   5679    }
   5680    else if (emit->key.vs.undo_viewport) {
   5681       /* This code computes the final vertex position from the temporary
   5682        * vertex position by undoing the viewport transformation and the
   5683        * divide-by-W operation (we convert window coords back to clip coords).
   5684        * This is needed when we use the 'draw' module for fallbacks.
   5685        * If p is the temp pos in window coords, then the NDC coord q is:
   5686        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
   5687        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
   5688        *   q.z = p.z * p.w
   5689        *   q.w = p.w
   5690        * CONST[vs_viewport_index] contains:
   5691        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
   5692        */
   5693       struct tgsi_full_dst_register tmp_pos_dst =
   5694          make_dst_temp_reg(vs_pos_tmp_index);
   5695       struct tgsi_full_dst_register tmp_pos_dst_xy =
   5696          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
   5697       struct tgsi_full_src_register tmp_pos_src_wwww =
   5698          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
   5699 
   5700       struct tgsi_full_dst_register pos_dst_xyz =
   5701          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
   5702       struct tgsi_full_dst_register pos_dst_w =
   5703          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
   5704 
   5705       struct tgsi_full_src_register vp_xyzw =
   5706          make_src_const_reg(emit->vs.viewport_index);
   5707       struct tgsi_full_src_register vp_zwww =
   5708          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
   5709                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
   5710 
   5711       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
   5712       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
   5713                            &tmp_pos_src, &vp_zwww, FALSE);
   5714 
   5715       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
   5716       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
   5717                            &tmp_pos_src, &vp_xyzw, FALSE);
   5718 
   5719       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
   5720       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
   5721                            &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
   5722 
   5723       /* MOV pos.w, tmp_pos.w */
   5724       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
   5725                            &tmp_pos_src, FALSE);
   5726    }
   5727    else if (vs_pos_tmp_index != INVALID_INDEX) {
   5728       /* This code is to handle the case where the temporary vertex
   5729        * position register is created when the vertex shader has stream
   5730        * output and prescale is disabled because rasterization is to be
   5731        * discarded.
   5732        */
   5733       struct tgsi_full_dst_register pos_dst =
   5734          make_dst_output_reg(emit->vposition.out_index);
   5735 
   5736       /* MOV pos, tmp_pos */
   5737       begin_emit_instruction(emit);
   5738       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
   5739       emit_dst_register(emit, &pos_dst);
   5740       emit_src_register(emit, &tmp_pos_src);
   5741       end_emit_instruction(emit);
   5742    }
   5743 }
   5744 
   5745 static void
   5746 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
   5747 {
   5748    if (emit->clip_mode == CLIP_DISTANCE) {
   5749       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
   5750       emit_clip_distance_instructions(emit);
   5751 
   5752    } else if (emit->clip_mode == CLIP_VERTEX) {
   5753       /* Convert TGSI CLIPVERTEX to CLIPDIST */
   5754       emit_clip_vertex_instructions(emit);
   5755    }
   5756 
   5757    /**
   5758     * Emit vertex position and take care of legacy user planes only if
   5759     * there is a valid vertex position register index.
   5760     * This is to take care of the case
   5761     * where the shader doesn't output vertex position. Then in
   5762     * this case, don't bother to emit more vertex instructions.
   5763     */
   5764    if (emit->vposition.out_index == INVALID_INDEX)
   5765       return;
   5766 
   5767    /**
   5768     * Emit per-vertex clipping instructions for legacy user defined clip planes.
   5769     * NOTE: we must emit the clip distance instructions before the
   5770     * emit_vpos_instructions() call since the later function will change
   5771     * the TEMP[vs_pos_tmp_index] value.
   5772     */
   5773    if (emit->clip_mode == CLIP_LEGACY) {
   5774       /* Emit CLIPDIST for legacy user defined clip planes */
   5775       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
   5776    }
   5777 }
   5778 
   5779 
   5780 /**
   5781  * Emit extra per-vertex instructions.  This includes clip-coordinate
   5782  * space conversion and computing clip distances.  This is called for
   5783  * each GS emit-vertex instruction and at the end of VS translation.
   5784  */
   5785 static void
   5786 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
   5787 {
   5788    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
   5789 
   5790    /* Emit clipping instructions based on clipping mode */
   5791    emit_clipping_instructions(emit);
   5792 
   5793    /**
   5794     * Reset the temporary vertex position register index
   5795     * so that emit_dst_register() will use the real vertex position output
   5796     */
   5797    emit->vposition.tmp_index = INVALID_INDEX;
   5798 
   5799    /* Emit vertex position instructions */
   5800    emit_vpos_instructions(emit, vs_pos_tmp_index);
   5801 
   5802    /* Restore original vposition.tmp_index value for the next GS vertex.
   5803     * It doesn't matter for VS.
   5804     */
   5805    emit->vposition.tmp_index = vs_pos_tmp_index;
   5806 }
   5807 
   5808 /**
   5809  * Translate the TGSI_OPCODE_EMIT GS instruction.
   5810  */
   5811 static boolean
   5812 emit_vertex(struct svga_shader_emitter_v10 *emit,
   5813             const struct tgsi_full_instruction *inst)
   5814 {
   5815    unsigned ret = TRUE;
   5816 
   5817    assert(emit->unit == PIPE_SHADER_GEOMETRY);
   5818 
   5819    emit_vertex_instructions(emit);
   5820 
   5821    /* We can't use emit_simple() because the TGSI instruction has one
   5822     * operand (vertex stream number) which we must ignore for VGPU10.
   5823     */
   5824    begin_emit_instruction(emit);
   5825    emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
   5826    end_emit_instruction(emit);
   5827 
   5828    return ret;
   5829 }
   5830 
   5831 
   5832 /**
   5833  * Emit the extra code to convert from VGPU10's boolean front-face
   5834  * register to TGSI's signed front-face register.
   5835  *
   5836  * TODO: Make temporary front-face register a scalar.
   5837  */
   5838 static void
   5839 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
   5840 {
   5841    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   5842 
   5843    if (emit->fs.face_input_index != INVALID_INDEX) {
   5844       /* convert vgpu10 boolean face register to gallium +/-1 value */
   5845       struct tgsi_full_dst_register tmp_dst =
   5846          make_dst_temp_reg(emit->fs.face_tmp_index);
   5847       struct tgsi_full_src_register one =
   5848          make_immediate_reg_float(emit, 1.0f);
   5849       struct tgsi_full_src_register neg_one =
   5850          make_immediate_reg_float(emit, -1.0f);
   5851 
   5852       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
   5853       begin_emit_instruction(emit);
   5854       emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
   5855       emit_dst_register(emit, &tmp_dst);
   5856       emit_face_register(emit);
   5857       emit_src_register(emit, &one);
   5858       emit_src_register(emit, &neg_one);
   5859       end_emit_instruction(emit);
   5860    }
   5861 }
   5862 
   5863 
   5864 /**
   5865  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
   5866  */
   5867 static void
   5868 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
   5869 {
   5870    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   5871 
   5872    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
   5873       struct tgsi_full_dst_register tmp_dst =
   5874          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
   5875       struct tgsi_full_dst_register tmp_dst_xyz =
   5876          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
   5877       struct tgsi_full_dst_register tmp_dst_w =
   5878          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
   5879       struct tgsi_full_src_register one =
   5880          make_immediate_reg_float(emit, 1.0f);
   5881       struct tgsi_full_src_register fragcoord =
   5882          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
   5883 
   5884       /* save the input index */
   5885       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
   5886       /* set to invalid to prevent substitution in emit_src_register() */
   5887       emit->fs.fragcoord_input_index = INVALID_INDEX;
   5888 
   5889       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
   5890       begin_emit_instruction(emit);
   5891       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
   5892       emit_dst_register(emit, &tmp_dst_xyz);
   5893       emit_src_register(emit, &fragcoord);
   5894       end_emit_instruction(emit);
   5895 
   5896       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
   5897       begin_emit_instruction(emit);
   5898       emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
   5899       emit_dst_register(emit, &tmp_dst_w);
   5900       emit_src_register(emit, &one);
   5901       emit_src_register(emit, &fragcoord);
   5902       end_emit_instruction(emit);
   5903 
   5904       /* restore saved value */
   5905       emit->fs.fragcoord_input_index = fragcoord_input_index;
   5906    }
   5907 }
   5908 
   5909 
   5910 /**
   5911  * Emit extra instructions to adjust VS inputs/attributes.  This can
   5912  * mean casting a vertex attribute from int to float or setting the
   5913  * W component to 1, or both.
   5914  */
   5915 static void
   5916 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
   5917 {
   5918    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
   5919    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
   5920    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
   5921    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
   5922    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
   5923    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
   5924    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
   5925 
   5926    unsigned adjust_mask = (save_w_1_mask |
   5927                            save_itof_mask |
   5928                            save_utof_mask |
   5929                            save_is_bgra_mask |
   5930                            save_puint_to_snorm_mask |
   5931                            save_puint_to_uscaled_mask |
   5932                            save_puint_to_sscaled_mask);
   5933 
   5934    assert(emit->unit == PIPE_SHADER_VERTEX);
   5935 
   5936    if (adjust_mask) {
   5937       struct tgsi_full_src_register one =
   5938          make_immediate_reg_float(emit, 1.0f);
   5939 
   5940       struct tgsi_full_src_register one_int =
   5941          make_immediate_reg_int(emit, 1);
   5942 
   5943       /* We need to turn off these bitmasks while emitting the
   5944        * instructions below, then restore them afterward.
   5945        */
   5946       emit->key.vs.adjust_attrib_w_1 = 0;
   5947       emit->key.vs.adjust_attrib_itof = 0;
   5948       emit->key.vs.adjust_attrib_utof = 0;
   5949       emit->key.vs.attrib_is_bgra = 0;
   5950       emit->key.vs.attrib_puint_to_snorm = 0;
   5951       emit->key.vs.attrib_puint_to_uscaled = 0;
   5952       emit->key.vs.attrib_puint_to_sscaled = 0;
   5953 
   5954       while (adjust_mask) {
   5955          unsigned index = u_bit_scan(&adjust_mask);
   5956 
   5957          /* skip the instruction if this vertex attribute is not being used */
   5958          if (emit->info.input_usage_mask[index] == 0)
   5959             continue;
   5960 
   5961          unsigned tmp = emit->vs.adjusted_input[index];
   5962          struct tgsi_full_src_register input_src =
   5963             make_src_reg(TGSI_FILE_INPUT, index);
   5964 
   5965          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   5966          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   5967          struct tgsi_full_dst_register tmp_dst_w =
   5968             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
   5969 
   5970          /* ITOF/UTOF/MOV tmp, input[index] */
   5971          if (save_itof_mask & (1 << index)) {
   5972             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
   5973                                  &tmp_dst, &input_src, FALSE);
   5974          }
   5975          else if (save_utof_mask & (1 << index)) {
   5976             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
   5977                                  &tmp_dst, &input_src, FALSE);
   5978          }
   5979          else if (save_puint_to_snorm_mask & (1 << index)) {
   5980             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
   5981          }
   5982          else if (save_puint_to_uscaled_mask & (1 << index)) {
   5983             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
   5984          }
   5985          else if (save_puint_to_sscaled_mask & (1 << index)) {
   5986             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
   5987          }
   5988          else {
   5989             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
   5990             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   5991                                  &tmp_dst, &input_src, FALSE);
   5992          }
   5993 
   5994          if (save_is_bgra_mask & (1 << index)) {
   5995             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
   5996          }
   5997 
   5998          if (save_w_1_mask & (1 << index)) {
   5999             /* MOV tmp.w, 1.0 */
   6000             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
   6001                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   6002                                     &tmp_dst_w, &one_int, FALSE);
   6003             }
   6004             else {
   6005                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   6006                                     &tmp_dst_w, &one, FALSE);
   6007             }
   6008          }
   6009       }
   6010 
   6011       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
   6012       emit->key.vs.adjust_attrib_itof = save_itof_mask;
   6013       emit->key.vs.adjust_attrib_utof = save_utof_mask;
   6014       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
   6015       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
   6016       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
   6017       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
   6018    }
   6019 }
   6020 
   6021 
   6022 /**
   6023  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
   6024  * to implement some instructions.  We pre-allocate those values here
   6025  * in the immediate constant buffer.
   6026  */
   6027 static void
   6028 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
   6029 {
   6030    unsigned n = 0;
   6031 
   6032    emit->common_immediate_pos[n++] =
   6033       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
   6034 
   6035    if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
   6036       emit->common_immediate_pos[n++] =
   6037          alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
   6038    }
   6039 
   6040    emit->common_immediate_pos[n++] =
   6041       alloc_immediate_int4(emit, 0, 1, 0, -1);
   6042 
   6043    if (emit->key.vs.attrib_puint_to_snorm) {
   6044       emit->common_immediate_pos[n++] =
   6045          alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
   6046    }
   6047 
   6048    if (emit->key.vs.attrib_puint_to_uscaled) {
   6049       emit->common_immediate_pos[n++] =
   6050          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
   6051    }
   6052 
   6053    if (emit->key.vs.attrib_puint_to_sscaled) {
   6054       emit->common_immediate_pos[n++] =
   6055          alloc_immediate_int4(emit, 22, 12, 2, 0);
   6056 
   6057       emit->common_immediate_pos[n++] =
   6058          alloc_immediate_int4(emit, 22, 30, 0, 0);
   6059    }
   6060 
   6061    unsigned i;
   6062 
   6063    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
   6064       if (emit->key.tex[i].texel_bias) {
   6065          /* Replace 0.0f if more immediate float value is needed */
   6066          emit->common_immediate_pos[n++] =
   6067             alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
   6068          break;
   6069       }
   6070    }
   6071 
   6072    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
   6073    emit->num_common_immediates = n;
   6074 }
   6075 
   6076 
   6077 /**
   6078  * Emit any extra/helper declarations/code that we might need between
   6079  * the declaration section and code section.
   6080  */
   6081 static boolean
   6082 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
   6083 {
   6084    /* Properties */
   6085    if (emit->unit == PIPE_SHADER_GEOMETRY)
   6086       emit_property_instructions(emit);
   6087 
   6088    /* Declare inputs */
   6089    if (!emit_input_declarations(emit))
   6090       return FALSE;
   6091 
   6092    /* Declare outputs */
   6093    if (!emit_output_declarations(emit))
   6094       return FALSE;
   6095 
   6096    /* Declare temporary registers */
   6097    emit_temporaries_declaration(emit);
   6098 
   6099    /* Declare constant registers */
   6100    emit_constant_declaration(emit);
   6101 
   6102    /* Declare samplers and resources */
   6103    emit_sampler_declarations(emit);
   6104    emit_resource_declarations(emit);
   6105 
   6106    /* Declare clip distance output registers */
   6107    if (emit->unit == PIPE_SHADER_VERTEX ||
   6108        emit->unit == PIPE_SHADER_GEOMETRY) {
   6109       emit_clip_distance_declarations(emit);
   6110    }
   6111 
   6112    alloc_common_immediates(emit);
   6113 
   6114    if (emit->unit == PIPE_SHADER_FRAGMENT &&
   6115        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
   6116       float alpha = emit->key.fs.alpha_ref;
   6117       emit->fs.alpha_ref_index =
   6118          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
   6119    }
   6120 
   6121    /* Now, emit the constant block containing all the immediates
   6122     * declared by shader, as well as the extra ones seen above.
   6123     */
   6124    emit_vgpu10_immediates_block(emit);
   6125 
   6126    if (emit->unit == PIPE_SHADER_FRAGMENT) {
   6127       emit_frontface_instructions(emit);
   6128       emit_fragcoord_instructions(emit);
   6129    }
   6130    else if (emit->unit == PIPE_SHADER_VERTEX) {
   6131       emit_vertex_attrib_instructions(emit);
   6132    }
   6133 
   6134    return TRUE;
   6135 }
   6136 
   6137 
   6138 /**
   6139  * The device has no direct support for the pipe_blend_state::alpha_to_one
   6140  * option so we implement it here with shader code.
   6141  *
   6142  * Note that this is kind of pointless, actually.  Here we're clobbering
   6143  * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
   6144  * up with 100% coverage.  That's almost certainly not what the user wants.
   6145  * The work-around is to add extra shader code to compute coverage from alpha
   6146  * and write it to the coverage output register (if the user's shader doesn't
   6147  * do so already).  We'll probably do that in the future.
   6148  */
   6149 static void
   6150 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
   6151                                unsigned fs_color_tmp_index)
   6152 {
   6153    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   6154    unsigned i;
   6155 
   6156    /* Note: it's not 100% clear from the spec if we're supposed to clobber
   6157     * the alpha for all render targets.  But that's what NVIDIA does and
   6158     * that's what Piglit tests.
   6159     */
   6160    for (i = 0; i < emit->fs.num_color_outputs; i++) {
   6161       struct tgsi_full_dst_register color_dst;
   6162 
   6163       if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
   6164          /* write to the temp color register */
   6165          color_dst = make_dst_temp_reg(fs_color_tmp_index);
   6166       }
   6167       else {
   6168          /* write directly to the color[i] output */
   6169          color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
   6170       }
   6171 
   6172       color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
   6173 
   6174       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one, FALSE);
   6175    }
   6176 }
   6177 
   6178 
   6179 /**
   6180  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
   6181  * against the alpha reference value and discards the fragment if the
   6182  * comparison fails.
   6183  */
   6184 static void
   6185 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
   6186                              unsigned fs_color_tmp_index)
   6187 {
   6188    /* compare output color's alpha to alpha ref and kill */
   6189    unsigned tmp = get_temp_index(emit);
   6190    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   6191    struct tgsi_full_src_register tmp_src_x =
   6192       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   6193    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   6194    struct tgsi_full_src_register color_src =
   6195       make_src_temp_reg(fs_color_tmp_index);
   6196    struct tgsi_full_src_register color_src_w =
   6197       scalar_src(&color_src, TGSI_SWIZZLE_W);
   6198    struct tgsi_full_src_register ref_src =
   6199       make_src_immediate_reg(emit->fs.alpha_ref_index);
   6200    struct tgsi_full_dst_register color_dst =
   6201       make_dst_output_reg(emit->fs.color_out_index[0]);
   6202 
   6203    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   6204 
   6205    /* dst = src0 'alpha_func' src1 */
   6206    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
   6207                    &color_src_w, &ref_src);
   6208 
   6209    /* DISCARD if dst.x == 0 */
   6210    begin_emit_instruction(emit);
   6211    emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
   6212    emit_src_register(emit, &tmp_src_x);
   6213    end_emit_instruction(emit);
   6214 
   6215    /* If we don't need to broadcast the color below, emit the final color here.
   6216     */
   6217    if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
   6218       /* MOV output.color, tempcolor */
   6219       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
   6220                            &color_src, FALSE);     /* XXX saturate? */
   6221    }
   6222 
   6223    free_temp_indexes(emit);
   6224 }
   6225 
   6226 
   6227 /**
   6228  * Emit instructions for writing a single color output to multiple
   6229  * color buffers.
   6230  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
   6231  * when key.fs.white_fragments is true).
   6232  * property is set and the number of render targets is greater than one.
   6233  * \param fs_color_tmp_index  index of the temp register that holds the
   6234  *                            color to broadcast.
   6235  */
   6236 static void
   6237 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
   6238                                  unsigned fs_color_tmp_index)
   6239 {
   6240    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
   6241    unsigned i;
   6242    struct tgsi_full_src_register color_src;
   6243 
   6244    if (emit->key.fs.white_fragments) {
   6245       /* set all color outputs to white */
   6246       color_src = make_immediate_reg_float(emit, 1.0f);
   6247    }
   6248    else {
   6249       /* set all color outputs to TEMP[fs_color_tmp_index] */
   6250       assert(fs_color_tmp_index != INVALID_INDEX);
   6251       color_src = make_src_temp_reg(fs_color_tmp_index);
   6252    }
   6253 
   6254    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   6255 
   6256    for (i = 0; i < n; i++) {
   6257       unsigned output_reg = emit->fs.color_out_index[i];
   6258       struct tgsi_full_dst_register color_dst =
   6259          make_dst_output_reg(output_reg);
   6260 
   6261       /* Fill in this semantic here since we'll use it later in
   6262        * emit_dst_register().
   6263        */
   6264       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
   6265 
   6266       /* MOV output.color[i], tempcolor */
   6267       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
   6268                            &color_src, FALSE);     /* XXX saturate? */
   6269    }
   6270 }
   6271 
   6272 
   6273 /**
   6274  * Emit extra helper code after the original shader code, but before the
   6275  * last END/RET instruction.
   6276  * For vertex shaders this means emitting the extra code to apply the
   6277  * prescale scale/translation.
   6278  */
   6279 static boolean
   6280 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
   6281 {
   6282    if (emit->unit == PIPE_SHADER_VERTEX) {
   6283       emit_vertex_instructions(emit);
   6284    }
   6285    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
   6286       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
   6287 
   6288       assert(!(emit->key.fs.white_fragments &&
   6289                emit->key.fs.write_color0_to_n_cbufs == 0));
   6290 
   6291       /* We no longer want emit_dst_register() to substitute the
   6292        * temporary fragment color register for the real color output.
   6293        */
   6294       emit->fs.color_tmp_index = INVALID_INDEX;
   6295 
   6296       if (emit->key.fs.alpha_to_one) {
   6297          emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
   6298       }
   6299       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
   6300          emit_alpha_test_instructions(emit, fs_color_tmp_index);
   6301       }
   6302       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
   6303           emit->key.fs.white_fragments) {
   6304          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
   6305       }
   6306    }
   6307 
   6308    return TRUE;
   6309 }
   6310 
   6311 
   6312 /**
   6313  * Translate the TGSI tokens into VGPU10 tokens.
   6314  */
   6315 static boolean
   6316 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
   6317                          const struct tgsi_token *tokens)
   6318 {
   6319    struct tgsi_parse_context parse;
   6320    boolean ret = TRUE;
   6321    boolean pre_helpers_emitted = FALSE;
   6322    unsigned inst_number = 0;
   6323 
   6324    tgsi_parse_init(&parse, tokens);
   6325 
   6326    while (!tgsi_parse_end_of_tokens(&parse)) {
   6327       tgsi_parse_token(&parse);
   6328 
   6329       switch (parse.FullToken.Token.Type) {
   6330       case TGSI_TOKEN_TYPE_IMMEDIATE:
   6331          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
   6332          if (!ret)
   6333             goto done;
   6334          break;
   6335 
   6336       case TGSI_TOKEN_TYPE_DECLARATION:
   6337          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
   6338          if (!ret)
   6339             goto done;
   6340          break;
   6341 
   6342       case TGSI_TOKEN_TYPE_INSTRUCTION:
   6343          if (!pre_helpers_emitted) {
   6344             ret = emit_pre_helpers(emit);
   6345             if (!ret)
   6346                goto done;
   6347             pre_helpers_emitted = TRUE;
   6348          }
   6349          ret = emit_vgpu10_instruction(emit, inst_number++,
   6350                                        &parse.FullToken.FullInstruction);
   6351          if (!ret)
   6352             goto done;
   6353          break;
   6354 
   6355       case TGSI_TOKEN_TYPE_PROPERTY:
   6356          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
   6357          if (!ret)
   6358             goto done;
   6359          break;
   6360 
   6361       default:
   6362          break;
   6363       }
   6364    }
   6365 
   6366 done:
   6367    tgsi_parse_free(&parse);
   6368    return ret;
   6369 }
   6370 
   6371 
   6372 /**
   6373  * Emit the first VGPU10 shader tokens.
   6374  */
   6375 static boolean
   6376 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
   6377 {
   6378    VGPU10ProgramToken ptoken;
   6379 
   6380    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
   6381    ptoken.majorVersion = 4;
   6382    ptoken.minorVersion = 0;
   6383    ptoken.programType = translate_shader_type(emit->unit);
   6384    if (!emit_dword(emit, ptoken.value))
   6385       return FALSE;
   6386 
   6387    /* Second token: total length of shader, in tokens.  We can't fill this
   6388     * in until we're all done.  Emit zero for now.
   6389     */
   6390    return emit_dword(emit, 0);
   6391 }
   6392 
   6393 
   6394 static boolean
   6395 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
   6396 {
   6397    VGPU10ProgramToken *tokens;
   6398 
   6399    /* Replace the second token with total shader length */
   6400    tokens = (VGPU10ProgramToken *) emit->buf;
   6401    tokens[1].value = emit_get_num_tokens(emit);
   6402 
   6403    return TRUE;
   6404 }
   6405 
   6406 
   6407 /**
   6408  * Modify the FS to read the BCOLORs and use the FACE register
   6409  * to choose between the front/back colors.
   6410  */
   6411 static const struct tgsi_token *
   6412 transform_fs_twoside(const struct tgsi_token *tokens)
   6413 {
   6414    if (0) {
   6415       debug_printf("Before tgsi_add_two_side ------------------\n");
   6416       tgsi_dump(tokens,0);
   6417    }
   6418    tokens = tgsi_add_two_side(tokens);
   6419    if (0) {
   6420       debug_printf("After tgsi_add_two_side ------------------\n");
   6421       tgsi_dump(tokens, 0);
   6422    }
   6423    return tokens;
   6424 }
   6425 
   6426 
   6427 /**
   6428  * Modify the FS to do polygon stipple.
   6429  */
   6430 static const struct tgsi_token *
   6431 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
   6432                       const struct tgsi_token *tokens)
   6433 {
   6434    const struct tgsi_token *new_tokens;
   6435    unsigned unit;
   6436 
   6437    if (0) {
   6438       debug_printf("Before pstipple ------------------\n");
   6439       tgsi_dump(tokens,0);
   6440    }
   6441 
   6442    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
   6443                                                      TGSI_FILE_INPUT);
   6444 
   6445    emit->fs.pstipple_sampler_unit = unit;
   6446 
   6447    /* Setup texture state for stipple */
   6448    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
   6449    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
   6450    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
   6451    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
   6452    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
   6453 
   6454    if (0) {
   6455       debug_printf("After pstipple ------------------\n");
   6456       tgsi_dump(new_tokens, 0);
   6457    }
   6458 
   6459    return new_tokens;
   6460 }
   6461 
   6462 /**
   6463  * Modify the FS to support anti-aliasing point.
   6464  */
   6465 static const struct tgsi_token *
   6466 transform_fs_aapoint(const struct tgsi_token *tokens,
   6467                      int aa_coord_index)
   6468 {
   6469    if (0) {
   6470       debug_printf("Before tgsi_add_aa_point ------------------\n");
   6471       tgsi_dump(tokens,0);
   6472    }
   6473    tokens = tgsi_add_aa_point(tokens, aa_coord_index);
   6474    if (0) {
   6475       debug_printf("After tgsi_add_aa_point ------------------\n");
   6476       tgsi_dump(tokens, 0);
   6477    }
   6478    return tokens;
   6479 }
   6480 
   6481 /**
   6482  * This is the main entrypoint for the TGSI -> VPGU10 translator.
   6483  */
   6484 struct svga_shader_variant *
   6485 svga_tgsi_vgpu10_translate(struct svga_context *svga,
   6486                            const struct svga_shader *shader,
   6487                            const struct svga_compile_key *key,
   6488                            unsigned unit)
   6489 {
   6490    struct svga_shader_variant *variant = NULL;
   6491    struct svga_shader_emitter_v10 *emit;
   6492    const struct tgsi_token *tokens = shader->tokens;
   6493    struct svga_vertex_shader *vs = svga->curr.vs;
   6494    struct svga_geometry_shader *gs = svga->curr.gs;
   6495 
   6496    assert(unit == PIPE_SHADER_VERTEX ||
   6497           unit == PIPE_SHADER_GEOMETRY ||
   6498           unit == PIPE_SHADER_FRAGMENT);
   6499 
   6500    /* These two flags cannot be used together */
   6501    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
   6502 
   6503    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
   6504    /*
   6505     * Setup the code emitter
   6506     */
   6507    emit = alloc_emitter();
   6508    if (!emit)
   6509       goto done;
   6510 
   6511    emit->unit = unit;
   6512    emit->key = *key;
   6513 
   6514    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
   6515                                    emit->key.gs.need_prescale);
   6516    emit->vposition.tmp_index = INVALID_INDEX;
   6517    emit->vposition.so_index = INVALID_INDEX;
   6518    emit->vposition.out_index = INVALID_INDEX;
   6519 
   6520    emit->fs.color_tmp_index = INVALID_INDEX;
   6521    emit->fs.face_input_index = INVALID_INDEX;
   6522    emit->fs.fragcoord_input_index = INVALID_INDEX;
   6523 
   6524    emit->gs.prim_id_index = INVALID_INDEX;
   6525 
   6526    emit->clip_dist_out_index = INVALID_INDEX;
   6527    emit->clip_dist_tmp_index = INVALID_INDEX;
   6528    emit->clip_dist_so_index = INVALID_INDEX;
   6529    emit->clip_vertex_out_index = INVALID_INDEX;
   6530 
   6531    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
   6532       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
   6533    }
   6534 
   6535    if (unit == PIPE_SHADER_FRAGMENT) {
   6536       if (key->fs.light_twoside) {
   6537          tokens = transform_fs_twoside(tokens);
   6538       }
   6539       if (key->fs.pstipple) {
   6540          const struct tgsi_token *new_tokens =
   6541             transform_fs_pstipple(emit, tokens);
   6542          if (tokens != shader->tokens) {
   6543             /* free the two-sided shader tokens */
   6544             tgsi_free_tokens(tokens);
   6545          }
   6546          tokens = new_tokens;
   6547       }
   6548       if (key->fs.aa_point) {
   6549          tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
   6550       }
   6551    }
   6552 
   6553    if (SVGA_DEBUG & DEBUG_TGSI) {
   6554       debug_printf("#####################################\n");
   6555       debug_printf("### TGSI Shader %u\n", shader->id);
   6556       tgsi_dump(tokens, 0);
   6557    }
   6558 
   6559    /**
   6560     * Rescan the header if the token string is different from the one
   6561     * included in the shader; otherwise, the header info is already up-to-date
   6562     */
   6563    if (tokens != shader->tokens) {
   6564       tgsi_scan_shader(tokens, &emit->info);
   6565    } else {
   6566       emit->info = shader->info;
   6567    }
   6568 
   6569    emit->num_outputs = emit->info.num_outputs;
   6570 
   6571    if (unit == PIPE_SHADER_FRAGMENT) {
   6572       /* Compute FS input remapping to match the output from VS/GS */
   6573       if (gs) {
   6574          svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
   6575       } else {
   6576          assert(vs);
   6577          svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
   6578       }
   6579    } else if (unit == PIPE_SHADER_GEOMETRY) {
   6580       assert(vs);
   6581       svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
   6582    }
   6583 
   6584    determine_clipping_mode(emit);
   6585 
   6586    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
   6587       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
   6588          /* if there is stream output declarations associated
   6589           * with this shader or the shader writes to ClipDistance
   6590           * then reserve extra registers for the non-adjusted vertex position
   6591           * and the ClipDistance shadow copy
   6592           */
   6593          emit->vposition.so_index = emit->num_outputs++;
   6594 
   6595          if (emit->clip_mode == CLIP_DISTANCE) {
   6596             emit->clip_dist_so_index = emit->num_outputs++;
   6597             if (emit->info.num_written_clipdistance > 4)
   6598                emit->num_outputs++;
   6599          }
   6600       }
   6601    }
   6602 
   6603    /*
   6604     * Do actual shader translation.
   6605     */
   6606    if (!emit_vgpu10_header(emit)) {
   6607       debug_printf("svga: emit VGPU10 header failed\n");
   6608       goto cleanup;
   6609    }
   6610 
   6611    if (!emit_vgpu10_instructions(emit, tokens)) {
   6612       debug_printf("svga: emit VGPU10 instructions failed\n");
   6613       goto cleanup;
   6614    }
   6615 
   6616    if (!emit_vgpu10_tail(emit)) {
   6617       debug_printf("svga: emit VGPU10 tail failed\n");
   6618       goto cleanup;
   6619    }
   6620 
   6621    if (emit->register_overflow) {
   6622       goto cleanup;
   6623    }
   6624 
   6625    /*
   6626     * Create, initialize the 'variant' object.
   6627     */
   6628    variant = svga_new_shader_variant(svga);
   6629    if (!variant)
   6630       goto cleanup;
   6631 
   6632    variant->shader = shader;
   6633    variant->nr_tokens = emit_get_num_tokens(emit);
   6634    variant->tokens = (const unsigned *)emit->buf;
   6635    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
   6636    memcpy(&variant->key, key, sizeof(*key));
   6637    variant->id = UTIL_BITMASK_INVALID_INDEX;
   6638 
   6639    /* The extra constant starting offset starts with the number of
   6640     * shader constants declared in the shader.
   6641     */
   6642    variant->extra_const_start = emit->num_shader_consts[0];
   6643    if (key->gs.wide_point) {
   6644       /**
   6645        * The extra constant added in the transformed shader
   6646        * for inverse viewport scale is to be supplied by the driver.
   6647        * So the extra constant starting offset needs to be reduced by 1.
   6648        */
   6649       assert(variant->extra_const_start > 0);
   6650       variant->extra_const_start--;
   6651    }
   6652 
   6653    variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
   6654 
   6655    /* If there was exactly one write to a fragment shader output register
   6656     * and it came from a constant buffer, we know all fragments will have
   6657     * the same color (except for blending).
   6658     */
   6659    variant->constant_color_output =
   6660       emit->constant_color_output && emit->num_output_writes == 1;
   6661 
   6662    /** keep track in the variant if flat interpolation is used
   6663     *  for any of the varyings.
   6664     */
   6665    variant->uses_flat_interp = emit->uses_flat_interp;
   6666 
   6667    variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
   6668 
   6669    if (tokens != shader->tokens) {
   6670       tgsi_free_tokens(tokens);
   6671    }
   6672 
   6673 cleanup:
   6674    free_emitter(emit);
   6675 
   6676 done:
   6677    SVGA_STATS_TIME_POP(svga_sws(svga));
   6678    return variant;
   6679 }
   6680