Home | History | Annotate | Download | only in state_tracker
      1 /**************************************************************************
      2  *
      3  * Copyright 2007-2008 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /*
     29  * \author
     30  * Michal Krol,
     31  * Keith Whitwell
     32  */
     33 
     34 #include "pipe/p_compiler.h"
     35 #include "pipe/p_context.h"
     36 #include "pipe/p_screen.h"
     37 #include "pipe/p_shader_tokens.h"
     38 #include "pipe/p_state.h"
     39 #include "tgsi/tgsi_ureg.h"
     40 #include "st_mesa_to_tgsi.h"
     41 #include "st_context.h"
     42 #include "program/prog_instruction.h"
     43 #include "program/prog_parameter.h"
     44 #include "util/u_debug.h"
     45 #include "util/u_math.h"
     46 #include "util/u_memory.h"
     47 #include "st_glsl_to_tgsi.h" /* for _mesa_sysval_to_semantic */
     48 
     49 
     50 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
     51                            (1 << PROGRAM_CONSTANT) |     \
     52                            (1 << PROGRAM_UNIFORM))
     53 
     54 /**
     55  * Intermediate state used during shader translation.
     56  */
     57 struct st_translate {
     58    struct ureg_program *ureg;
     59 
     60    struct ureg_dst temps[MAX_PROGRAM_TEMPS];
     61    struct ureg_src *constants;
     62    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
     63    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
     64    struct ureg_dst address[1];
     65    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
     66    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
     67 
     68    const GLuint *inputMapping;
     69    const GLuint *outputMapping;
     70 
     71    unsigned procType;  /**< PIPE_SHADER_VERTEX/FRAGMENT */
     72 };
     73 
     74 
     75 /**
     76  * Map a Mesa dst register to a TGSI ureg_dst register.
     77  */
     78 static struct ureg_dst
     79 dst_register( struct st_translate *t,
     80               gl_register_file file,
     81               GLuint index )
     82 {
     83    switch( file ) {
     84    case PROGRAM_UNDEFINED:
     85       return ureg_dst_undef();
     86 
     87    case PROGRAM_TEMPORARY:
     88       if (ureg_dst_is_undef(t->temps[index]))
     89          t->temps[index] = ureg_DECL_temporary( t->ureg );
     90 
     91       return t->temps[index];
     92 
     93    case PROGRAM_OUTPUT:
     94       if (t->procType == PIPE_SHADER_VERTEX)
     95          assert(index < VARYING_SLOT_MAX);
     96       else if (t->procType == PIPE_SHADER_FRAGMENT)
     97          assert(index < FRAG_RESULT_MAX);
     98       else
     99          assert(index < VARYING_SLOT_MAX);
    100 
    101       assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
    102 
    103       return t->outputs[t->outputMapping[index]];
    104 
    105    case PROGRAM_ADDRESS:
    106       return t->address[index];
    107 
    108    default:
    109       debug_assert( 0 );
    110       return ureg_dst_undef();
    111    }
    112 }
    113 
    114 
    115 /**
    116  * Map a Mesa src register to a TGSI ureg_src register.
    117  */
    118 static struct ureg_src
    119 src_register( struct st_translate *t,
    120               gl_register_file file,
    121               GLint index )
    122 {
    123    switch( file ) {
    124    case PROGRAM_UNDEFINED:
    125       return ureg_src_undef();
    126 
    127    case PROGRAM_TEMPORARY:
    128       assert(index >= 0);
    129       assert(index < ARRAY_SIZE(t->temps));
    130       if (ureg_dst_is_undef(t->temps[index]))
    131          t->temps[index] = ureg_DECL_temporary( t->ureg );
    132       return ureg_src(t->temps[index]);
    133 
    134    case PROGRAM_UNIFORM:
    135       assert(index >= 0);
    136       return t->constants[index];
    137    case PROGRAM_STATE_VAR:
    138    case PROGRAM_CONSTANT:       /* ie, immediate */
    139       if (index < 0)
    140          return ureg_DECL_constant( t->ureg, 0 );
    141       else
    142          return t->constants[index];
    143 
    144    case PROGRAM_INPUT:
    145       assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
    146       return t->inputs[t->inputMapping[index]];
    147 
    148    case PROGRAM_OUTPUT:
    149       assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
    150       return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
    151 
    152    case PROGRAM_ADDRESS:
    153       return ureg_src(t->address[index]);
    154 
    155    case PROGRAM_SYSTEM_VALUE:
    156       assert(index < ARRAY_SIZE(t->systemValues));
    157       return t->systemValues[index];
    158 
    159    default:
    160       debug_assert( 0 );
    161       return ureg_src_undef();
    162    }
    163 }
    164 
    165 
    166 /**
    167  * Map mesa texture target to TGSI texture target.
    168  */
    169 unsigned
    170 st_translate_texture_target(GLuint textarget, GLboolean shadow)
    171 {
    172    if (shadow) {
    173       switch (textarget) {
    174       case TEXTURE_1D_INDEX:
    175          return TGSI_TEXTURE_SHADOW1D;
    176       case TEXTURE_2D_INDEX:
    177          return TGSI_TEXTURE_SHADOW2D;
    178       case TEXTURE_RECT_INDEX:
    179          return TGSI_TEXTURE_SHADOWRECT;
    180       case TEXTURE_1D_ARRAY_INDEX:
    181          return TGSI_TEXTURE_SHADOW1D_ARRAY;
    182       case TEXTURE_2D_ARRAY_INDEX:
    183          return TGSI_TEXTURE_SHADOW2D_ARRAY;
    184       case TEXTURE_CUBE_INDEX:
    185          return TGSI_TEXTURE_SHADOWCUBE;
    186       case TEXTURE_CUBE_ARRAY_INDEX:
    187          return TGSI_TEXTURE_SHADOWCUBE_ARRAY;
    188       default:
    189          break;
    190       }
    191    }
    192 
    193    switch (textarget) {
    194    case TEXTURE_2D_MULTISAMPLE_INDEX:
    195       return TGSI_TEXTURE_2D_MSAA;
    196    case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
    197       return TGSI_TEXTURE_2D_ARRAY_MSAA;
    198    case TEXTURE_BUFFER_INDEX:
    199       return TGSI_TEXTURE_BUFFER;
    200    case TEXTURE_1D_INDEX:
    201       return TGSI_TEXTURE_1D;
    202    case TEXTURE_2D_INDEX:
    203       return TGSI_TEXTURE_2D;
    204    case TEXTURE_3D_INDEX:
    205       return TGSI_TEXTURE_3D;
    206    case TEXTURE_CUBE_INDEX:
    207       return TGSI_TEXTURE_CUBE;
    208    case TEXTURE_CUBE_ARRAY_INDEX:
    209       return TGSI_TEXTURE_CUBE_ARRAY;
    210    case TEXTURE_RECT_INDEX:
    211       return TGSI_TEXTURE_RECT;
    212    case TEXTURE_1D_ARRAY_INDEX:
    213       return TGSI_TEXTURE_1D_ARRAY;
    214    case TEXTURE_2D_ARRAY_INDEX:
    215       return TGSI_TEXTURE_2D_ARRAY;
    216    case TEXTURE_EXTERNAL_INDEX:
    217       return TGSI_TEXTURE_2D;
    218    default:
    219       debug_assert(!"unexpected texture target index");
    220       return TGSI_TEXTURE_1D;
    221    }
    222 }
    223 
    224 
    225 /**
    226  * Translate a (1 << TEXTURE_x_INDEX) bit into a TGSI_TEXTURE_x enum.
    227  */
    228 static unsigned
    229 translate_texture_index(GLbitfield texBit, bool shadow)
    230 {
    231    int index = ffs(texBit);
    232    assert(index > 0);
    233    assert(index - 1 < NUM_TEXTURE_TARGETS);
    234    return st_translate_texture_target(index - 1, shadow);
    235 }
    236 
    237 
    238 /**
    239  * Create a TGSI ureg_dst register from a Mesa dest register.
    240  */
    241 static struct ureg_dst
    242 translate_dst( struct st_translate *t,
    243                const struct prog_dst_register *DstReg,
    244                boolean saturate)
    245 {
    246    struct ureg_dst dst = dst_register( t,
    247                                        DstReg->File,
    248                                        DstReg->Index );
    249 
    250    dst = ureg_writemask( dst,
    251                          DstReg->WriteMask );
    252 
    253    if (saturate)
    254       dst = ureg_saturate( dst );
    255 
    256    if (DstReg->RelAddr)
    257       dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
    258 
    259    return dst;
    260 }
    261 
    262 
    263 /**
    264  * Create a TGSI ureg_src register from a Mesa src register.
    265  */
    266 static struct ureg_src
    267 translate_src( struct st_translate *t,
    268                const struct prog_src_register *SrcReg )
    269 {
    270    struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
    271 
    272    src = ureg_swizzle( src,
    273                        GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3,
    274                        GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3,
    275                        GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3,
    276                        GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3);
    277 
    278    if (SrcReg->Negate == NEGATE_XYZW)
    279       src = ureg_negate(src);
    280 
    281    if (SrcReg->RelAddr) {
    282       src = ureg_src_indirect( src, ureg_src(t->address[0]));
    283       if (SrcReg->File != PROGRAM_INPUT &&
    284           SrcReg->File != PROGRAM_OUTPUT) {
    285          /* If SrcReg->Index was negative, it was set to zero in
    286           * src_register().  Reassign it now.  But don't do this
    287           * for input/output regs since they get remapped while
    288           * const buffers don't.
    289           */
    290          src.Index = SrcReg->Index;
    291       }
    292    }
    293 
    294    return src;
    295 }
    296 
    297 
    298 static struct ureg_src swizzle_4v( struct ureg_src src,
    299                                    const unsigned *swz )
    300 {
    301    return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] );
    302 }
    303 
    304 
    305 /**
    306  * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
    307  *
    308  *   SWZ dst, src.x-y10
    309  *
    310  * becomes:
    311  *
    312  *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
    313  */
    314 static void emit_swz( struct st_translate *t,
    315                       struct ureg_dst dst,
    316                       const struct prog_src_register *SrcReg )
    317 {
    318    struct ureg_program *ureg = t->ureg;
    319    struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
    320 
    321    unsigned negate_mask =  SrcReg->Negate;
    322 
    323    unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
    324                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
    325                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
    326                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
    327 
    328    unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
    329                          (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
    330                          (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
    331                          (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
    332 
    333    unsigned negative_one_mask = one_mask & negate_mask;
    334    unsigned positive_one_mask = one_mask & ~negate_mask;
    335 
    336    struct ureg_src imm;
    337    unsigned i;
    338    unsigned mul_swizzle[4] = {0,0,0,0};
    339    unsigned add_swizzle[4] = {0,0,0,0};
    340    unsigned src_swizzle[4] = {0,0,0,0};
    341    boolean need_add = FALSE;
    342    boolean need_mul = FALSE;
    343 
    344    if (dst.WriteMask == 0)
    345       return;
    346 
    347    /* Is this just a MOV?
    348     */
    349    if (zero_mask == 0 &&
    350        one_mask == 0 &&
    351        (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW))
    352    {
    353       ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
    354       return;
    355    }
    356 
    357 #define IMM_ZERO    0
    358 #define IMM_ONE     1
    359 #define IMM_NEG_ONE 2
    360 
    361    imm = ureg_imm3f( ureg, 0, 1, -1 );
    362 
    363    for (i = 0; i < 4; i++) {
    364       unsigned bit = 1 << i;
    365 
    366       if (dst.WriteMask & bit) {
    367          if (positive_one_mask & bit) {
    368             mul_swizzle[i] = IMM_ZERO;
    369             add_swizzle[i] = IMM_ONE;
    370             need_add = TRUE;
    371          }
    372          else if (negative_one_mask & bit) {
    373             mul_swizzle[i] = IMM_ZERO;
    374             add_swizzle[i] = IMM_NEG_ONE;
    375             need_add = TRUE;
    376          }
    377          else if (zero_mask & bit) {
    378             mul_swizzle[i] = IMM_ZERO;
    379             add_swizzle[i] = IMM_ZERO;
    380             need_add = TRUE;
    381          }
    382          else {
    383             add_swizzle[i] = IMM_ZERO;
    384             src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
    385             need_mul = TRUE;
    386             if (negate_mask & bit) {
    387                mul_swizzle[i] = IMM_NEG_ONE;
    388             }
    389             else {
    390                mul_swizzle[i] = IMM_ONE;
    391             }
    392          }
    393       }
    394    }
    395 
    396    if (need_mul && need_add) {
    397       ureg_MAD( ureg,
    398                 dst,
    399                 swizzle_4v( src, src_swizzle ),
    400                 swizzle_4v( imm, mul_swizzle ),
    401                 swizzle_4v( imm, add_swizzle ) );
    402    }
    403    else if (need_mul) {
    404       ureg_MUL( ureg,
    405                 dst,
    406                 swizzle_4v( src, src_swizzle ),
    407                 swizzle_4v( imm, mul_swizzle ) );
    408    }
    409    else if (need_add) {
    410       ureg_MOV( ureg,
    411                 dst,
    412                 swizzle_4v( imm, add_swizzle ) );
    413    }
    414    else {
    415       debug_assert(0);
    416    }
    417 
    418 #undef IMM_ZERO
    419 #undef IMM_ONE
    420 #undef IMM_NEG_ONE
    421 }
    422 
    423 
    424 static unsigned
    425 translate_opcode( unsigned op )
    426 {
    427    switch( op ) {
    428    case OPCODE_ARL:
    429       return TGSI_OPCODE_ARL;
    430    case OPCODE_ADD:
    431       return TGSI_OPCODE_ADD;
    432    case OPCODE_CMP:
    433       return TGSI_OPCODE_CMP;
    434    case OPCODE_COS:
    435       return TGSI_OPCODE_COS;
    436    case OPCODE_DP3:
    437       return TGSI_OPCODE_DP3;
    438    case OPCODE_DP4:
    439       return TGSI_OPCODE_DP4;
    440    case OPCODE_DPH:
    441       return TGSI_OPCODE_DPH;
    442    case OPCODE_DST:
    443       return TGSI_OPCODE_DST;
    444    case OPCODE_EX2:
    445       return TGSI_OPCODE_EX2;
    446    case OPCODE_EXP:
    447       return TGSI_OPCODE_EXP;
    448    case OPCODE_FLR:
    449       return TGSI_OPCODE_FLR;
    450    case OPCODE_FRC:
    451       return TGSI_OPCODE_FRC;
    452    case OPCODE_KIL:
    453       return TGSI_OPCODE_KILL_IF;
    454    case OPCODE_LG2:
    455       return TGSI_OPCODE_LG2;
    456    case OPCODE_LOG:
    457       return TGSI_OPCODE_LOG;
    458    case OPCODE_LIT:
    459       return TGSI_OPCODE_LIT;
    460    case OPCODE_LRP:
    461       return TGSI_OPCODE_LRP;
    462    case OPCODE_MAD:
    463       return TGSI_OPCODE_MAD;
    464    case OPCODE_MAX:
    465       return TGSI_OPCODE_MAX;
    466    case OPCODE_MIN:
    467       return TGSI_OPCODE_MIN;
    468    case OPCODE_MOV:
    469       return TGSI_OPCODE_MOV;
    470    case OPCODE_MUL:
    471       return TGSI_OPCODE_MUL;
    472    case OPCODE_POW:
    473       return TGSI_OPCODE_POW;
    474    case OPCODE_RCP:
    475       return TGSI_OPCODE_RCP;
    476    case OPCODE_SCS:
    477       return TGSI_OPCODE_SCS;
    478    case OPCODE_SGE:
    479       return TGSI_OPCODE_SGE;
    480    case OPCODE_SIN:
    481       return TGSI_OPCODE_SIN;
    482    case OPCODE_SLT:
    483       return TGSI_OPCODE_SLT;
    484    case OPCODE_TEX:
    485       return TGSI_OPCODE_TEX;
    486    case OPCODE_TXB:
    487       return TGSI_OPCODE_TXB;
    488    case OPCODE_TXP:
    489       return TGSI_OPCODE_TXP;
    490    case OPCODE_XPD:
    491       return TGSI_OPCODE_XPD;
    492    case OPCODE_END:
    493       return TGSI_OPCODE_END;
    494    default:
    495       debug_assert( 0 );
    496       return TGSI_OPCODE_NOP;
    497    }
    498 }
    499 
    500 
    501 static void
    502 compile_instruction(
    503    struct gl_context *ctx,
    504    struct st_translate *t,
    505    const struct prog_instruction *inst)
    506 {
    507    struct ureg_program *ureg = t->ureg;
    508    GLuint i;
    509    struct ureg_dst dst[1] = { { 0 } };
    510    struct ureg_src src[4];
    511    unsigned num_dst;
    512    unsigned num_src;
    513 
    514    num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
    515    num_src = _mesa_num_inst_src_regs( inst->Opcode );
    516 
    517    if (num_dst)
    518       dst[0] = translate_dst( t,
    519                               &inst->DstReg,
    520                               inst->Saturate);
    521 
    522    for (i = 0; i < num_src; i++)
    523       src[i] = translate_src( t, &inst->SrcReg[i] );
    524 
    525    switch( inst->Opcode ) {
    526    case OPCODE_SWZ:
    527       emit_swz( t, dst[0], &inst->SrcReg[0] );
    528       return;
    529 
    530    case OPCODE_TEX:
    531    case OPCODE_TXB:
    532    case OPCODE_TXP:
    533       src[num_src++] = t->samplers[inst->TexSrcUnit];
    534       ureg_tex_insn( ureg,
    535                      translate_opcode( inst->Opcode ),
    536                      dst, num_dst,
    537                      st_translate_texture_target( inst->TexSrcTarget,
    538                                                inst->TexShadow ),
    539                      NULL, 0,
    540                      src, num_src );
    541       return;
    542 
    543    case OPCODE_SCS:
    544       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
    545       ureg_insn( ureg,
    546                  translate_opcode( inst->Opcode ),
    547                  dst, num_dst,
    548                  src, num_src );
    549       break;
    550 
    551    case OPCODE_XPD:
    552       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
    553       ureg_insn( ureg,
    554                  translate_opcode( inst->Opcode ),
    555                  dst, num_dst,
    556                  src, num_src );
    557       break;
    558 
    559    case OPCODE_RSQ:
    560       ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) );
    561       break;
    562 
    563    case OPCODE_ABS:
    564       ureg_MOV(ureg, dst[0], ureg_abs(src[0]));
    565       break;
    566 
    567    case OPCODE_SUB:
    568       ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1]));
    569       break;
    570 
    571    default:
    572       ureg_insn( ureg,
    573                  translate_opcode( inst->Opcode ),
    574                  dst, num_dst,
    575                  src, num_src );
    576       break;
    577    }
    578 }
    579 
    580 
    581 /**
    582  * Emit the TGSI instructions for inverting and adjusting WPOS.
    583  * This code is unavoidable because it also depends on whether
    584  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
    585  */
    586 static void
    587 emit_wpos_adjustment(struct gl_context *ctx,
    588                      struct st_translate *t,
    589                      const struct gl_program *program,
    590                      boolean invert,
    591                      GLfloat adjX, GLfloat adjY[2])
    592 {
    593    struct ureg_program *ureg = t->ureg;
    594 
    595    /* Fragment program uses fragment position input.
    596     * Need to replace instances of INPUT[WPOS] with temp T
    597     * where T = INPUT[WPOS] by y is inverted.
    598     */
    599    static const gl_state_index wposTransformState[STATE_LENGTH]
    600       = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 };
    601 
    602    /* XXX: note we are modifying the incoming shader here!  Need to
    603     * do this before emitting the constant decls below, or this
    604     * will be missed:
    605     */
    606    unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
    607                                                        wposTransformState);
    608 
    609    struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
    610    struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
    611    struct ureg_src *wpos =
    612       ctx->Const.GLSLFragCoordIsSysVal ?
    613          &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
    614          &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
    615    struct ureg_src wpos_input = *wpos;
    616 
    617    /* First, apply the coordinate shift: */
    618    if (adjX || adjY[0] || adjY[1]) {
    619       if (adjY[0] != adjY[1]) {
    620          /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
    621           * depending on whether inversion is actually going to be applied
    622           * or not, which is determined by testing against the inversion
    623           * state variable used below, which will be either +1 or -1.
    624           */
    625          struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);
    626 
    627          ureg_CMP(ureg, adj_temp,
    628                   ureg_scalar(wpostrans, invert ? 2 : 0),
    629                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
    630                   ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
    631          ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
    632       } else {
    633          ureg_ADD(ureg, wpos_temp, wpos_input,
    634                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
    635       }
    636       wpos_input = ureg_src(wpos_temp);
    637    } else {
    638       /* MOV wpos_temp, input[wpos]
    639        */
    640       ureg_MOV( ureg, wpos_temp, wpos_input );
    641    }
    642 
    643    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
    644     * inversion/identity, or the other way around if we're drawing to an FBO.
    645     */
    646    if (invert) {
    647       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
    648        */
    649       ureg_MAD( ureg,
    650                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
    651                 wpos_input,
    652                 ureg_scalar(wpostrans, 0),
    653                 ureg_scalar(wpostrans, 1));
    654    } else {
    655       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
    656        */
    657       ureg_MAD( ureg,
    658                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
    659                 wpos_input,
    660                 ureg_scalar(wpostrans, 2),
    661                 ureg_scalar(wpostrans, 3));
    662    }
    663 
    664    /* Use wpos_temp as position input from here on:
    665     */
    666    *wpos = ureg_src(wpos_temp);
    667 }
    668 
    669 
    670 /**
    671  * Emit fragment position/coordinate code.
    672  */
    673 static void
    674 emit_wpos(struct st_context *st,
    675           struct st_translate *t,
    676           const struct gl_program *program,
    677           struct ureg_program *ureg)
    678 {
    679    struct pipe_screen *pscreen = st->pipe->screen;
    680    GLfloat adjX = 0.0f;
    681    GLfloat adjY[2] = { 0.0f, 0.0f };
    682    boolean invert = FALSE;
    683 
    684    /* Query the pixel center conventions supported by the pipe driver and set
    685     * adjX, adjY to help out if it cannot handle the requested one internally.
    686     *
    687     * The bias of the y-coordinate depends on whether y-inversion takes place
    688     * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
    689     * drawing to an FBO (causes additional inversion), and whether the pipe
    690     * driver origin and the requested origin differ (the latter condition is
    691     * stored in the 'invert' variable).
    692     *
    693     * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
    694     *
    695     * center shift only:
    696     * i -> h: +0.5
    697     * h -> i: -0.5
    698     *
    699     * inversion only:
    700     * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
    701     * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
    702     * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
    703     * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
    704     *
    705     * inversion and center shift:
    706     * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
    707     * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
    708     * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
    709     * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
    710     */
    711    if (program->OriginUpperLeft) {
    712       /* Fragment shader wants origin in upper-left */
    713       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
    714          /* the driver supports upper-left origin */
    715       }
    716       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
    717          /* the driver supports lower-left origin, need to invert Y */
    718          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
    719                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
    720          invert = TRUE;
    721       }
    722       else
    723          assert(0);
    724    }
    725    else {
    726       /* Fragment shader wants origin in lower-left */
    727       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
    728          /* the driver supports lower-left origin */
    729          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
    730                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
    731       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
    732          /* the driver supports upper-left origin, need to invert Y */
    733          invert = TRUE;
    734       else
    735          assert(0);
    736    }
    737 
    738    if (program->PixelCenterInteger) {
    739       /* Fragment shader wants pixel center integer */
    740       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
    741          /* the driver supports pixel center integer */
    742          adjY[1] = 1.0f;
    743          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
    744                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
    745       }
    746       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
    747          /* the driver supports pixel center half integer, need to bias X,Y */
    748          adjX = -0.5f;
    749          adjY[0] = -0.5f;
    750          adjY[1] = 0.5f;
    751       }
    752       else
    753          assert(0);
    754    }
    755    else {
    756       /* Fragment shader wants pixel center half integer */
    757       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
    758          /* the driver supports pixel center half integer */
    759       }
    760       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
    761          /* the driver supports pixel center integer, need to bias X,Y */
    762          adjX = adjY[0] = adjY[1] = 0.5f;
    763          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
    764                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
    765       }
    766       else
    767          assert(0);
    768    }
    769 
    770    /* we invert after adjustment so that we avoid the MOV to temporary,
    771     * and reuse the adjustment ADD instead */
    772    emit_wpos_adjustment(st->ctx, t, program, invert, adjX, adjY);
    773 }
    774 
    775 
    776 /**
    777  * Translate Mesa program to TGSI format.
    778  * \param program  the program to translate
    779  * \param numInputs  number of input registers used
    780  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
    781  *                      input indexes
    782  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
    783  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
    784  *                            each input
    785  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
    786  * \param numOutputs  number of output registers used
    787  * \param outputMapping  maps Mesa fragment program outputs to TGSI
    788  *                       generic outputs
    789  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
    790  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
    791  *                             each output
    792  *
    793  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
    794  */
    795 enum pipe_error
    796 st_translate_mesa_program(
    797    struct gl_context *ctx,
    798    uint procType,
    799    struct ureg_program *ureg,
    800    const struct gl_program *program,
    801    GLuint numInputs,
    802    const GLuint inputMapping[],
    803    const ubyte inputSemanticName[],
    804    const ubyte inputSemanticIndex[],
    805    const GLuint interpMode[],
    806    GLuint numOutputs,
    807    const GLuint outputMapping[],
    808    const ubyte outputSemanticName[],
    809    const ubyte outputSemanticIndex[])
    810 {
    811    struct st_translate translate, *t;
    812    unsigned i;
    813    enum pipe_error ret = PIPE_OK;
    814 
    815    assert(numInputs <= ARRAY_SIZE(t->inputs));
    816    assert(numOutputs <= ARRAY_SIZE(t->outputs));
    817 
    818    t = &translate;
    819    memset(t, 0, sizeof *t);
    820 
    821    t->procType = procType;
    822    t->inputMapping = inputMapping;
    823    t->outputMapping = outputMapping;
    824    t->ureg = ureg;
    825 
    826    /*_mesa_print_program(program);*/
    827 
    828    /*
    829     * Declare input attributes.
    830     */
    831    if (procType == PIPE_SHADER_FRAGMENT) {
    832       for (i = 0; i < numInputs; i++) {
    833          t->inputs[i] = ureg_DECL_fs_input(ureg,
    834                                            inputSemanticName[i],
    835                                            inputSemanticIndex[i],
    836                                            interpMode[i]);
    837       }
    838 
    839       if (program->info.inputs_read & VARYING_BIT_POS) {
    840          /* Must do this after setting up t->inputs, and before
    841           * emitting constant references, below:
    842           */
    843          emit_wpos(st_context(ctx), t, program, ureg);
    844       }
    845 
    846       /*
    847        * Declare output attributes.
    848        */
    849       for (i = 0; i < numOutputs; i++) {
    850          switch (outputSemanticName[i]) {
    851          case TGSI_SEMANTIC_POSITION:
    852             t->outputs[i] = ureg_DECL_output( ureg,
    853                                               TGSI_SEMANTIC_POSITION, /* Z / Depth */
    854                                               outputSemanticIndex[i] );
    855 
    856             t->outputs[i] = ureg_writemask( t->outputs[i],
    857                                             TGSI_WRITEMASK_Z );
    858             break;
    859          case TGSI_SEMANTIC_STENCIL:
    860             t->outputs[i] = ureg_DECL_output( ureg,
    861                                               TGSI_SEMANTIC_STENCIL, /* Stencil */
    862                                               outputSemanticIndex[i] );
    863             t->outputs[i] = ureg_writemask( t->outputs[i],
    864                                             TGSI_WRITEMASK_Y );
    865             break;
    866          case TGSI_SEMANTIC_COLOR:
    867             t->outputs[i] = ureg_DECL_output( ureg,
    868                                               TGSI_SEMANTIC_COLOR,
    869                                               outputSemanticIndex[i] );
    870             break;
    871          default:
    872             debug_assert(0);
    873             return 0;
    874          }
    875       }
    876    }
    877    else if (procType == PIPE_SHADER_GEOMETRY) {
    878       for (i = 0; i < numInputs; i++) {
    879          t->inputs[i] = ureg_DECL_input(ureg,
    880                                         inputSemanticName[i],
    881                                         inputSemanticIndex[i], 0, 1);
    882       }
    883 
    884       for (i = 0; i < numOutputs; i++) {
    885          t->outputs[i] = ureg_DECL_output( ureg,
    886                                            outputSemanticName[i],
    887                                            outputSemanticIndex[i] );
    888       }
    889    }
    890    else {
    891       assert(procType == PIPE_SHADER_VERTEX);
    892 
    893       for (i = 0; i < numInputs; i++) {
    894          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
    895       }
    896 
    897       for (i = 0; i < numOutputs; i++) {
    898          t->outputs[i] = ureg_DECL_output( ureg,
    899                                            outputSemanticName[i],
    900                                            outputSemanticIndex[i] );
    901          if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
    902             /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
    903             ureg_MOV(ureg,
    904                      ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
    905                      ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
    906             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
    907 	 }
    908       }
    909    }
    910 
    911    /* Declare address register.
    912     */
    913    if (program->arb.NumAddressRegs > 0) {
    914       debug_assert( program->arb.NumAddressRegs == 1 );
    915       t->address[0] = ureg_DECL_address( ureg );
    916    }
    917 
    918    /* Declare misc input registers
    919     */
    920    {
    921       GLbitfield sysInputs = program->info.system_values_read;
    922 
    923       for (i = 0; sysInputs; i++) {
    924          if (sysInputs & (1 << i)) {
    925             unsigned semName = _mesa_sysval_to_semantic(i);
    926 
    927             t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
    928 
    929             if (semName == TGSI_SEMANTIC_INSTANCEID ||
    930                 semName == TGSI_SEMANTIC_VERTEXID) {
    931                /* From Gallium perspective, these system values are always
    932                 * integer, and require native integer support.  However, if
    933                 * native integer is supported on the vertex stage but not the
    934                 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
    935                 * assumes these system values are floats. To resolve the
    936                 * inconsistency, we insert a U2F.
    937                 */
    938                struct st_context *st = st_context(ctx);
    939                struct pipe_screen *pscreen = st->pipe->screen;
    940                assert(procType == PIPE_SHADER_VERTEX);
    941                assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
    942                (void) pscreen;  /* silence non-debug build warnings */
    943                if (!ctx->Const.NativeIntegers) {
    944                   struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
    945                   ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
    946                   t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
    947                }
    948             }
    949 
    950             if (procType == PIPE_SHADER_FRAGMENT &&
    951                 semName == TGSI_SEMANTIC_POSITION)
    952                emit_wpos(st_context(ctx), t, program, ureg);
    953 
    954             sysInputs &= ~(1 << i);
    955          }
    956       }
    957    }
    958 
    959    if (program->arb.IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
    960       /* If temps are accessed with indirect addressing, declare temporaries
    961        * in sequential order.  Else, we declare them on demand elsewhere.
    962        */
    963       for (i = 0; i < program->arb.NumTemporaries; i++) {
    964          /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
    965          t->temps[i] = ureg_DECL_temporary( t->ureg );
    966       }
    967    }
    968 
    969    /* Emit constants and immediates.  Mesa uses a single index space
    970     * for these, so we put all the translated regs in t->constants.
    971     */
    972    if (program->Parameters) {
    973       t->constants = calloc( program->Parameters->NumParameters,
    974                              sizeof t->constants[0] );
    975       if (t->constants == NULL) {
    976          ret = PIPE_ERROR_OUT_OF_MEMORY;
    977          goto out;
    978       }
    979 
    980       for (i = 0; i < program->Parameters->NumParameters; i++) {
    981          switch (program->Parameters->Parameters[i].Type) {
    982          case PROGRAM_STATE_VAR:
    983          case PROGRAM_UNIFORM:
    984             t->constants[i] = ureg_DECL_constant( ureg, i );
    985             break;
    986 
    987             /* Emit immediates only when there's no indirect addressing of
    988              * the const buffer.
    989              * FIXME: Be smarter and recognize param arrays:
    990              * indirect addressing is only valid within the referenced
    991              * array.
    992              */
    993          case PROGRAM_CONSTANT:
    994             if (program->arb.IndirectRegisterFiles & PROGRAM_ANY_CONST)
    995                t->constants[i] = ureg_DECL_constant( ureg, i );
    996             else
    997                t->constants[i] =
    998                   ureg_DECL_immediate( ureg,
    999                                        (const float*) program->Parameters->ParameterValues[i],
   1000                                        4 );
   1001             break;
   1002          default:
   1003             break;
   1004          }
   1005       }
   1006    }
   1007 
   1008    /* texture samplers */
   1009    for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
   1010       if (program->SamplersUsed & (1u << i)) {
   1011          unsigned target =
   1012             translate_texture_index(program->TexturesUsed[i],
   1013                                     !!(program->ShadowSamplers & (1 << i)));
   1014          t->samplers[i] = ureg_DECL_sampler( ureg, i );
   1015          ureg_DECL_sampler_view(ureg, i, target,
   1016                                 TGSI_RETURN_TYPE_FLOAT,
   1017                                 TGSI_RETURN_TYPE_FLOAT,
   1018                                 TGSI_RETURN_TYPE_FLOAT,
   1019                                 TGSI_RETURN_TYPE_FLOAT);
   1020 
   1021       }
   1022    }
   1023 
   1024    /* Emit each instruction in turn:
   1025     */
   1026    for (i = 0; i < program->arb.NumInstructions; i++)
   1027       compile_instruction(ctx, t, &program->arb.Instructions[i]);
   1028 
   1029 out:
   1030    free(t->constants);
   1031    return ret;
   1032 }
   1033