Home | History | Annotate | Download | only in i965
      1 /*
      2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
      3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
      4  develop this 3D driver.
      5 
      6  Permission is hereby granted, free of charge, to any person obtaining
      7  a copy of this software and associated documentation files (the
      8  "Software"), to deal in the Software without restriction, including
      9  without limitation the rights to use, copy, modify, merge, publish,
     10  distribute, sublicense, and/or sell copies of the Software, and to
     11  permit persons to whom the Software is furnished to do so, subject to
     12  the following conditions:
     13 
     14  The above copyright notice and this permission notice (including the
     15  next paragraph) shall be included in all copies or substantial
     16  portions of the Software.
     17 
     18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 
     26  **********************************************************************/
     27  /*
     28   * Authors:
     29   *   Keith Whitwell <keith (at) tungstengraphics.com>
     30   */
     31 
     32 
     33 #include "main/glheader.h"
     34 #include "main/macros.h"
     35 #include "main/enums.h"
     36 #include "brw_context.h"
     37 #include "brw_wm.h"
     38 #include "brw_util.h"
     39 
     40 #include "program/prog_parameter.h"
     41 #include "program/prog_print.h"
     42 #include "program/prog_statevars.h"
     43 
     44 
     45 /** An invalid texture target */
     46 #define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
     47 
     48 /** An invalid texture unit */
     49 #define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
     50 
     51 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
     52 
     53 #define X    0
     54 #define Y    1
     55 #define Z    2
     56 #define W    3
     57 
     58 
     59 static const char *wm_opcode_strings[] = {
     60    "PIXELXY",
     61    "DELTAXY",
     62    "PIXELW",
     63    "LINTERP",
     64    "PINTERP",
     65    "CINTERP",
     66    "WPOSXY",
     67    "FB_WRITE",
     68    "FRONTFACING",
     69 };
     70 
     71 #if 0
     72 static const char *wm_file_strings[] = {
     73    "PAYLOAD"
     74 };
     75 #endif
     76 
     77 
     78 /***********************************************************************
     79  * Source regs
     80  */
     81 
     82 static struct prog_src_register src_reg(GLuint file, GLuint idx)
     83 {
     84    struct prog_src_register reg;
     85    reg.File = file;
     86    reg.Index = idx;
     87    reg.Swizzle = SWIZZLE_NOOP;
     88    reg.RelAddr = 0;
     89    reg.Negate = NEGATE_NONE;
     90    reg.Abs = 0;
     91    reg.HasIndex2 = 0;
     92    reg.RelAddr2 = 0;
     93    reg.Index2 = 0;
     94    return reg;
     95 }
     96 
     97 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
     98 {
     99    return src_reg(dst.File, dst.Index);
    100 }
    101 
    102 static struct prog_src_register src_undef( void )
    103 {
    104    return src_reg(PROGRAM_UNDEFINED, 0);
    105 }
    106 
    107 static bool src_is_undef(struct prog_src_register src)
    108 {
    109    return src.File == PROGRAM_UNDEFINED;
    110 }
    111 
    112 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
    113 {
    114    reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
    115    return reg;
    116 }
    117 
    118 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
    119 {
    120    return src_swizzle(reg, x, x, x, x);
    121 }
    122 
    123 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
    124 {
    125    reg.Swizzle = swizzle;
    126    return reg;
    127 }
    128 
    129 
    130 /***********************************************************************
    131  * Dest regs
    132  */
    133 
    134 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
    135 {
    136    struct prog_dst_register reg;
    137    reg.File = file;
    138    reg.Index = idx;
    139    reg.WriteMask = WRITEMASK_XYZW;
    140    reg.RelAddr = 0;
    141    reg.CondMask = COND_TR;
    142    reg.CondSwizzle = 0;
    143    reg.CondSrc = 0;
    144    return reg;
    145 }
    146 
    147 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
    148 {
    149    reg.WriteMask &= mask;
    150    return reg;
    151 }
    152 
    153 static struct prog_dst_register dst_undef( void )
    154 {
    155    return dst_reg(PROGRAM_UNDEFINED, 0);
    156 }
    157 
    158 
    159 
    160 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
    161 {
    162    int bit = ffs( ~c->fp_temp );
    163 
    164    if (!bit) {
    165       printf("%s: out of temporaries\n", __FILE__);
    166       exit(1);
    167    }
    168 
    169    c->fp_temp |= 1<<(bit-1);
    170    return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
    171 }
    172 
    173 
    174 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
    175 {
    176    c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
    177 }
    178 
    179 
    180 /***********************************************************************
    181  * Instructions
    182  */
    183 
    184 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
    185 {
    186    assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
    187    memset(&c->prog_instructions[c->nr_fp_insns], 0,
    188 	  sizeof(*c->prog_instructions));
    189    return &c->prog_instructions[c->nr_fp_insns++];
    190 }
    191 
    192 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
    193 					const struct prog_instruction *inst0)
    194 {
    195    struct prog_instruction *inst = get_fp_inst(c);
    196    *inst = *inst0;
    197    return inst;
    198 }
    199 
    200 static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
    201 				       GLuint op,
    202 				       struct prog_dst_register dest,
    203 				       GLuint saturate,
    204 				       GLuint tex_src_unit,
    205 				       GLuint tex_src_target,
    206 				       GLuint tex_shadow,
    207 				       struct prog_src_register src0,
    208 				       struct prog_src_register src1,
    209 				       struct prog_src_register src2 )
    210 {
    211    struct prog_instruction *inst = get_fp_inst(c);
    212 
    213    assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
    214           tex_src_unit == TEX_UNIT_NONE);
    215    assert(tex_src_target < NUM_TEXTURE_TARGETS ||
    216           tex_src_target == TEX_TARGET_NONE);
    217 
    218    memset(inst, 0, sizeof(*inst));
    219 
    220    inst->Opcode = op;
    221    inst->DstReg = dest;
    222    inst->SaturateMode = saturate;
    223    inst->TexSrcUnit = tex_src_unit;
    224    inst->TexSrcTarget = tex_src_target;
    225    inst->TexShadow = tex_shadow;
    226    inst->SrcReg[0] = src0;
    227    inst->SrcReg[1] = src1;
    228    inst->SrcReg[2] = src2;
    229    return inst;
    230 }
    231 
    232 
    233 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
    234 				       GLuint op,
    235 				       struct prog_dst_register dest,
    236 				       GLuint saturate,
    237 				       struct prog_src_register src0,
    238 				       struct prog_src_register src1,
    239 				       struct prog_src_register src2 )
    240 {
    241    return emit_tex_op(c, op, dest, saturate,
    242                       TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
    243                       src0, src1, src2);
    244 }
    245 
    246 
    247 /* Many Mesa opcodes produce the same value across all the result channels.
    248  * We'd rather not have to support that splatting in the opcode implementations,
    249  * and brw_wm_pass*.c wants to optimize them out by shuffling references around
    250  * anyway.  We can easily get both by emitting the opcode to one channel, and
    251  * then MOVing it to the others, which brw_wm_pass*.c already understands.
    252  */
    253 static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
    254 						 const struct prog_instruction *inst0)
    255 {
    256    struct prog_instruction *inst;
    257    unsigned int dst_chan;
    258    unsigned int other_channel_mask;
    259 
    260    if (inst0->DstReg.WriteMask == 0)
    261       return NULL;
    262 
    263    dst_chan = ffs(inst0->DstReg.WriteMask) - 1;
    264    inst = get_fp_inst(c);
    265    *inst = *inst0;
    266    inst->DstReg.WriteMask = 1 << dst_chan;
    267 
    268    other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
    269    if (other_channel_mask != 0) {
    270       inst = emit_op(c,
    271 		     OPCODE_MOV,
    272 		     dst_mask(inst0->DstReg, other_channel_mask),
    273 		     0,
    274 		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
    275 		     src_undef(),
    276 		     src_undef());
    277    }
    278    return inst;
    279 }
    280 
    281 
    282 /***********************************************************************
    283  * Special instructions for interpolation and other tasks
    284  */
    285 
    286 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
    287 {
    288    if (src_is_undef(c->pixel_xy)) {
    289       struct prog_dst_register pixel_xy = get_temp(c);
    290       struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
    291 
    292 
    293       /* Emit the out calculations, and hold onto the results.  Use
    294        * two instructions as a temporary is required.
    295        */
    296       /* pixel_xy.xy = PIXELXY payload[0];
    297        */
    298       emit_op(c,
    299 	      WM_PIXELXY,
    300 	      dst_mask(pixel_xy, WRITEMASK_XY),
    301 	      0,
    302 	      payload_r0_depth,
    303 	      src_undef(),
    304 	      src_undef());
    305 
    306       c->pixel_xy = src_reg_from_dst(pixel_xy);
    307    }
    308 
    309    return c->pixel_xy;
    310 }
    311 
    312 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
    313 {
    314    if (src_is_undef(c->delta_xy)) {
    315       struct prog_dst_register delta_xy = get_temp(c);
    316       struct prog_src_register pixel_xy = get_pixel_xy(c);
    317       struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
    318 
    319       /* deltas.xy = DELTAXY pixel_xy, payload[0]
    320        */
    321       emit_op(c,
    322 	      WM_DELTAXY,
    323 	      dst_mask(delta_xy, WRITEMASK_XY),
    324 	      0,
    325 	      pixel_xy,
    326 	      payload_r0_depth,
    327 	      src_undef());
    328 
    329       c->delta_xy = src_reg_from_dst(delta_xy);
    330    }
    331 
    332    return c->delta_xy;
    333 }
    334 
    335 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
    336 {
    337    /* This is called for producing 1/w in pre-gen6 interp.  for gen6,
    338     * the interp opcodes don't use this argument.  But to keep the
    339     * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
    340     * into the slot.
    341     */
    342    if (c->func.brw->intel.gen >= 6)
    343       return c->delta_xy;
    344 
    345    if (src_is_undef(c->pixel_w)) {
    346       struct prog_dst_register pixel_w = get_temp(c);
    347       struct prog_src_register deltas = get_delta_xy(c);
    348       struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
    349 
    350       /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
    351        */
    352       emit_op(c,
    353 	      WM_PIXELW,
    354 	      dst_mask(pixel_w, WRITEMASK_W),
    355 	      0,
    356 	      interp_wpos,
    357 	      deltas,
    358 	      src_undef());
    359 
    360 
    361       c->pixel_w = src_reg_from_dst(pixel_w);
    362    }
    363 
    364    return c->pixel_w;
    365 }
    366 
    367 static void emit_interp( struct brw_wm_compile *c,
    368 			 GLuint idx )
    369 {
    370    struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
    371    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
    372    struct prog_src_register deltas;
    373 
    374    deltas = get_delta_xy(c);
    375 
    376    /* Need to use PINTERP on attributes which have been
    377     * multiplied by 1/W in the SF program, and LINTERP on those
    378     * which have not:
    379     */
    380    switch (idx) {
    381    case FRAG_ATTRIB_WPOS:
    382       /* Have to treat wpos.xy specially:
    383        */
    384       emit_op(c,
    385 	      WM_WPOSXY,
    386 	      dst_mask(dst, WRITEMASK_XY),
    387 	      0,
    388 	      get_pixel_xy(c),
    389 	      src_undef(),
    390 	      src_undef());
    391 
    392       dst = dst_mask(dst, WRITEMASK_ZW);
    393 
    394       /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
    395        */
    396       emit_op(c,
    397 	      WM_LINTERP,
    398 	      dst,
    399 	      0,
    400 	      interp,
    401 	      deltas,
    402 	      src_undef());
    403       break;
    404    case FRAG_ATTRIB_COL0:
    405    case FRAG_ATTRIB_COL1:
    406       if (c->key.flat_shade) {
    407 	 emit_op(c,
    408 		 WM_CINTERP,
    409 		 dst,
    410 		 0,
    411 		 interp,
    412 		 src_undef(),
    413 		 src_undef());
    414       }
    415       else {
    416 	 /* perspective-corrected color interpolation */
    417 	 emit_op(c,
    418 		 WM_PINTERP,
    419 		 dst,
    420 		 0,
    421 		 interp,
    422 		 deltas,
    423 		 get_pixel_w(c));
    424       }
    425       break;
    426    case FRAG_ATTRIB_FOGC:
    427       /* Interpolate the fog coordinate */
    428       emit_op(c,
    429 	      WM_PINTERP,
    430 	      dst_mask(dst, WRITEMASK_X),
    431 	      0,
    432 	      interp,
    433 	      deltas,
    434 	      get_pixel_w(c));
    435 
    436       emit_op(c,
    437 	      OPCODE_MOV,
    438 	      dst_mask(dst, WRITEMASK_YZW),
    439 	      0,
    440 	      src_swizzle(interp,
    441 			  SWIZZLE_ZERO,
    442 			  SWIZZLE_ZERO,
    443 			  SWIZZLE_ZERO,
    444 			  SWIZZLE_ONE),
    445 	      src_undef(),
    446 	      src_undef());
    447       break;
    448 
    449    case FRAG_ATTRIB_FACE:
    450       emit_op(c,
    451               WM_FRONTFACING,
    452               dst_mask(dst, WRITEMASK_X),
    453               0,
    454               src_undef(),
    455               src_undef(),
    456               src_undef());
    457       break;
    458 
    459    case FRAG_ATTRIB_PNTC:
    460       /* XXX review/test this case */
    461       emit_op(c,
    462 	      WM_PINTERP,
    463 	      dst_mask(dst, WRITEMASK_XY),
    464 	      0,
    465 	      interp,
    466 	      deltas,
    467 	      get_pixel_w(c));
    468 
    469       emit_op(c,
    470 	      OPCODE_MOV,
    471 	      dst_mask(dst, WRITEMASK_ZW),
    472 	      0,
    473 	      src_swizzle(interp,
    474 			  SWIZZLE_ZERO,
    475 			  SWIZZLE_ZERO,
    476 			  SWIZZLE_ZERO,
    477 			  SWIZZLE_ONE),
    478 	      src_undef(),
    479 	      src_undef());
    480       break;
    481 
    482    default:
    483       emit_op(c,
    484 	      WM_PINTERP,
    485 	      dst,
    486 	      0,
    487 	      interp,
    488 	      deltas,
    489 	      get_pixel_w(c));
    490       break;
    491    }
    492 
    493    c->fp_interp_emitted |= 1<<idx;
    494 }
    495 
    496 /***********************************************************************
    497  * Hacks to extend the program parameter and constant lists.
    498  */
    499 
    500 /* Add the fog parameters to the parameter list of the original
    501  * program, rather than creating a new list.  Doesn't really do any
    502  * harm and it's not as if the parameter handling isn't a big hack
    503  * anyway.
    504  */
    505 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
    506                                                      GLint s0,
    507                                                      GLint s1,
    508                                                      GLint s2,
    509                                                      GLint s3,
    510                                                      GLint s4)
    511 {
    512    struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
    513    gl_state_index tokens[STATE_LENGTH];
    514    GLuint idx;
    515    tokens[0] = s0;
    516    tokens[1] = s1;
    517    tokens[2] = s2;
    518    tokens[3] = s3;
    519    tokens[4] = s4;
    520 
    521    idx = _mesa_add_state_reference( paramList, tokens );
    522 
    523    return src_reg(PROGRAM_STATE_VAR, idx);
    524 }
    525 
    526 
    527 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
    528 						     GLfloat s0,
    529 						     GLfloat s1,
    530 						     GLfloat s2,
    531 						     GLfloat s3)
    532 {
    533    struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
    534    gl_constant_value values[4];
    535    GLuint idx;
    536    GLuint swizzle;
    537    struct prog_src_register reg;
    538 
    539    values[0].f = s0;
    540    values[1].f = s1;
    541    values[2].f = s2;
    542    values[3].f = s3;
    543 
    544    idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
    545    reg = src_reg(PROGRAM_STATE_VAR, idx);
    546    reg.Swizzle = swizzle;
    547 
    548    return reg;
    549 }
    550 
    551 
    552 
    553 /***********************************************************************
    554  * Expand various instructions here to simpler forms.
    555  */
    556 static void precalc_dst( struct brw_wm_compile *c,
    557 			       const struct prog_instruction *inst )
    558 {
    559    struct prog_src_register src0 = inst->SrcReg[0];
    560    struct prog_src_register src1 = inst->SrcReg[1];
    561    struct prog_dst_register dst = inst->DstReg;
    562    struct prog_dst_register temp = get_temp(c);
    563 
    564    if (dst.WriteMask & WRITEMASK_Y) {
    565       /* dst.y = mul src0.y, src1.y
    566        */
    567       emit_op(c,
    568 	      OPCODE_MUL,
    569 	      dst_mask(temp, WRITEMASK_Y),
    570 	      inst->SaturateMode,
    571 	      src0,
    572 	      src1,
    573 	      src_undef());
    574    }
    575 
    576    if (dst.WriteMask & WRITEMASK_XZ) {
    577       struct prog_instruction *swz;
    578       GLuint z = GET_SWZ(src0.Swizzle, Z);
    579 
    580       /* dst.xz = swz src0.1zzz
    581        */
    582       swz = emit_op(c,
    583 		    OPCODE_SWZ,
    584 		    dst_mask(temp, WRITEMASK_XZ),
    585 		    inst->SaturateMode,
    586 		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
    587 		    src_undef(),
    588 		    src_undef());
    589       /* Avoid letting negation flag of src0 affect our 1 constant. */
    590       swz->SrcReg[0].Negate &= ~NEGATE_X;
    591    }
    592    if (dst.WriteMask & WRITEMASK_W) {
    593       /* dst.w = mov src1.w
    594        */
    595       emit_op(c,
    596 	      OPCODE_MOV,
    597 	      dst_mask(temp, WRITEMASK_W),
    598 	      inst->SaturateMode,
    599 	      src1,
    600 	      src_undef(),
    601 	      src_undef());
    602    }
    603 
    604    /* This will get optimized out in general, but it ensures that we
    605     * don't overwrite src operands in our channel-wise splitting
    606     * above.  See piglit fp-dst-aliasing-[12].
    607     */
    608    emit_op(c,
    609 	   OPCODE_MOV,
    610 	   dst,
    611 	   0,
    612 	   src_reg_from_dst(temp),
    613 	   src_undef(),
    614 	   src_undef());
    615 
    616    release_temp(c, temp);
    617 }
    618 
    619 
    620 static void precalc_lit( struct brw_wm_compile *c,
    621 			 const struct prog_instruction *inst )
    622 {
    623    struct prog_src_register src0 = inst->SrcReg[0];
    624    struct prog_dst_register dst = inst->DstReg;
    625 
    626    if (dst.WriteMask & WRITEMASK_YZ) {
    627       emit_op(c,
    628 	      OPCODE_LIT,
    629 	      dst_mask(dst, WRITEMASK_YZ),
    630 	      inst->SaturateMode,
    631 	      src0,
    632 	      src_undef(),
    633 	      src_undef());
    634    }
    635 
    636    if (dst.WriteMask & WRITEMASK_XW) {
    637       struct prog_instruction *swz;
    638 
    639       /* dst.xw = swz src0.1111
    640        */
    641       swz = emit_op(c,
    642 		    OPCODE_SWZ,
    643 		    dst_mask(dst, WRITEMASK_XW),
    644 		    0,
    645 		    src_swizzle1(src0, SWIZZLE_ONE),
    646 		    src_undef(),
    647 		    src_undef());
    648       /* Avoid letting the negation flag of src0 affect our 1 constant. */
    649       swz->SrcReg[0].Negate = NEGATE_NONE;
    650    }
    651 }
    652 
    653 
    654 /**
    655  * Some TEX instructions require extra code, cube map coordinate
    656  * normalization, or coordinate scaling for RECT textures, etc.
    657  * This function emits those extra instructions and the TEX
    658  * instruction itself.
    659  */
    660 static void precalc_tex( struct brw_wm_compile *c,
    661 			 const struct prog_instruction *inst )
    662 {
    663    struct brw_compile *p = &c->func;
    664    struct intel_context *intel = &p->brw->intel;
    665    struct prog_src_register coord;
    666    struct prog_dst_register tmpcoord = { 0 };
    667    const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
    668    struct prog_dst_register unswizzled_tmp;
    669 
    670    /* If we are doing EXT_texture_swizzle, we need to write our result into a
    671     * temporary, otherwise writemasking of the real dst could lose some of our
    672     * channels.
    673     */
    674    if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) {
    675       unswizzled_tmp = get_temp(c);
    676    } else {
    677       unswizzled_tmp = inst->DstReg;
    678    }
    679 
    680    assert(unit < BRW_MAX_TEX_UNIT);
    681 
    682    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
    683        struct prog_instruction *out;
    684        struct prog_dst_register tmp0 = get_temp(c);
    685        struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
    686        struct prog_dst_register tmp1 = get_temp(c);
    687        struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
    688        struct prog_src_register src0 = inst->SrcReg[0];
    689 
    690        /* find longest component of coord vector and normalize it */
    691        tmpcoord = get_temp(c);
    692        coord = src_reg_from_dst(tmpcoord);
    693 
    694        /* tmpcoord = src0 (i.e.: coord = src0) */
    695        out = emit_op(c, OPCODE_MOV,
    696                      tmpcoord,
    697                      0,
    698                      src0,
    699                      src_undef(),
    700                      src_undef());
    701        out->SrcReg[0].Negate = NEGATE_NONE;
    702        out->SrcReg[0].Abs = 1;
    703 
    704        /* tmp0 = MAX(coord.X, coord.Y) */
    705        emit_op(c, OPCODE_MAX,
    706                tmp0,
    707                0,
    708                src_swizzle1(coord, X),
    709                src_swizzle1(coord, Y),
    710                src_undef());
    711 
    712        /* tmp1 = MAX(tmp0, coord.Z) */
    713        emit_op(c, OPCODE_MAX,
    714                tmp1,
    715                0,
    716                tmp0src,
    717                src_swizzle1(coord, Z),
    718                src_undef());
    719 
    720        /* tmp0 = 1 / tmp1 */
    721        emit_op(c, OPCODE_RCP,
    722                dst_mask(tmp0, WRITEMASK_X),
    723                0,
    724                tmp1src,
    725                src_undef(),
    726                src_undef());
    727 
    728        /* tmpCoord = src0 * tmp0 */
    729        emit_op(c, OPCODE_MUL,
    730                tmpcoord,
    731                0,
    732                src0,
    733                src_swizzle1(tmp0src, SWIZZLE_X),
    734                src_undef());
    735 
    736        release_temp(c, tmp0);
    737        release_temp(c, tmp1);
    738    }
    739    else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
    740       struct prog_src_register scale =
    741 	 search_or_add_param5( c,
    742 			       STATE_INTERNAL,
    743 			       STATE_TEXRECT_SCALE,
    744 			       unit,
    745 			       0,0 );
    746 
    747       tmpcoord = get_temp(c);
    748 
    749       /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
    750        */
    751       emit_op(c,
    752 	      OPCODE_MUL,
    753 	      tmpcoord,
    754 	      0,
    755 	      inst->SrcReg[0],
    756 	      src_swizzle(scale,
    757 			  SWIZZLE_X,
    758 			  SWIZZLE_Y,
    759 			  SWIZZLE_ONE,
    760 			  SWIZZLE_ONE),
    761 	      src_undef());
    762 
    763       coord = src_reg_from_dst(tmpcoord);
    764    }
    765    else {
    766       coord = inst->SrcReg[0];
    767    }
    768 
    769    /* Need to emit YUV texture conversions by hand.  Probably need to
    770     * do this here - the alternative is in brw_wm_emit.c, but the
    771     * conversion requires allocating a temporary variable which we
    772     * don't have the facility to do that late in the compilation.
    773     */
    774    if (c->key.tex.yuvtex_mask & (1 << unit)) {
    775       /* convert ycbcr to RGBA */
    776       bool swap_uv = c->key.tex.yuvtex_swap_mask & (1 << unit);
    777 
    778       /*
    779 	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
    780 	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
    781 	 UYV     = TEX ...
    782 	 UYV.xyz = ADD UYV,     C0
    783 	 UYV.y   = MUL UYV.y,   C0.w
    784  	 if (UV swaped)
    785 	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
    786 	 else
    787 	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y
    788 	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
    789       */
    790       struct prog_dst_register tmp = get_temp(c);
    791       struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
    792       struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
    793       struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
    794 
    795       /* tmp     = TEX ...
    796        */
    797       emit_tex_op(c,
    798                   OPCODE_TEX,
    799                   tmp,
    800                   inst->SaturateMode,
    801                   unit,
    802                   inst->TexSrcTarget,
    803                   inst->TexShadow,
    804                   coord,
    805                   src_undef(),
    806                   src_undef());
    807 
    808       /* tmp.xyz =  ADD TMP, C0
    809        */
    810       emit_op(c,
    811 	      OPCODE_ADD,
    812 	      dst_mask(tmp, WRITEMASK_XYZ),
    813 	      0,
    814 	      tmpsrc,
    815 	      C0,
    816 	      src_undef());
    817 
    818       /* YUV.y   = MUL YUV.y, C0.w
    819        */
    820 
    821       emit_op(c,
    822 	      OPCODE_MUL,
    823 	      dst_mask(tmp, WRITEMASK_Y),
    824 	      0,
    825 	      tmpsrc,
    826 	      src_swizzle1(C0, W),
    827 	      src_undef());
    828 
    829       /*
    830        * if (UV swaped)
    831        *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
    832        * else
    833        *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
    834        */
    835 
    836       emit_op(c,
    837 	      OPCODE_MAD,
    838 	      dst_mask(unswizzled_tmp, WRITEMASK_XYZ),
    839 	      0,
    840 	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
    841 	      C1,
    842 	      src_swizzle1(tmpsrc, Y));
    843 
    844       /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
    845        */
    846       emit_op(c,
    847 	      OPCODE_MAD,
    848 	      dst_mask(unswizzled_tmp, WRITEMASK_Y),
    849 	      0,
    850 	      src_swizzle1(tmpsrc, Z),
    851 	      src_swizzle1(C1, W),
    852 	      src_swizzle1(src_reg_from_dst(unswizzled_tmp), Y));
    853 
    854       release_temp(c, tmp);
    855    }
    856    else {
    857       /* ordinary RGBA tex instruction */
    858       emit_tex_op(c,
    859                   OPCODE_TEX,
    860                   unswizzled_tmp,
    861                   inst->SaturateMode,
    862                   unit,
    863                   inst->TexSrcTarget,
    864                   inst->TexShadow,
    865                   coord,
    866                   src_undef(),
    867                   src_undef());
    868    }
    869 
    870    /* For GL_EXT_texture_swizzle: */
    871    if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) {
    872       /* swizzle the result of the TEX instruction */
    873       struct prog_src_register tmpsrc = src_reg_from_dst(unswizzled_tmp);
    874       emit_op(c, OPCODE_SWZ,
    875               inst->DstReg,
    876               SATURATE_OFF, /* saturate already done above */
    877               src_swizzle4(tmpsrc, c->key.tex.swizzles[unit]),
    878               src_undef(),
    879               src_undef());
    880    }
    881 
    882    if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
    883        (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
    884       release_temp(c, tmpcoord);
    885 }
    886 
    887 
    888 /**
    889  * Check if the given TXP instruction really needs the divide-by-W step.
    890  */
    891 static bool
    892 projtex(struct brw_wm_compile *c, const struct prog_instruction *inst)
    893 {
    894    const struct prog_src_register src = inst->SrcReg[0];
    895    bool retVal;
    896 
    897    assert(inst->Opcode == OPCODE_TXP);
    898 
    899    /* Only try to detect the simplest cases.  Could detect (later)
    900     * cases where we are trying to emit code like RCP {1.0}, MUL x,
    901     * {1.0}, and so on.
    902     *
    903     * More complex cases than this typically only arise from
    904     * user-provided fragment programs anyway:
    905     */
    906    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
    907       retVal = false;  /* ut2004 gun rendering !?! */
    908    else if (src.File == PROGRAM_INPUT &&
    909 	    GET_SWZ(src.Swizzle, W) == W &&
    910             (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
    911       retVal = false;
    912    else
    913       retVal = true;
    914 
    915    return retVal;
    916 }
    917 
    918 
    919 /**
    920  * Emit code for TXP.
    921  */
    922 static void precalc_txp( struct brw_wm_compile *c,
    923 			       const struct prog_instruction *inst )
    924 {
    925    struct prog_src_register src0 = inst->SrcReg[0];
    926 
    927    if (projtex(c, inst)) {
    928       struct prog_dst_register tmp = get_temp(c);
    929       struct prog_instruction tmp_inst;
    930 
    931       /* tmp0.w = RCP inst.arg[0][3]
    932        */
    933       emit_op(c,
    934 	      OPCODE_RCP,
    935 	      dst_mask(tmp, WRITEMASK_W),
    936 	      0,
    937 	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
    938 	      src_undef(),
    939 	      src_undef());
    940 
    941       /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
    942        */
    943       emit_op(c,
    944 	      OPCODE_MUL,
    945 	      dst_mask(tmp, WRITEMASK_XYZ),
    946 	      0,
    947 	      src0,
    948 	      src_swizzle1(src_reg_from_dst(tmp), W),
    949 	      src_undef());
    950 
    951       /* dst = precalc(TEX tmp0)
    952        */
    953       tmp_inst = *inst;
    954       tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
    955       precalc_tex(c, &tmp_inst);
    956 
    957       release_temp(c, tmp);
    958    }
    959    else
    960    {
    961       /* dst = precalc(TEX src0)
    962        */
    963       precalc_tex(c, inst);
    964    }
    965 }
    966 
    967 
    968 
    969 static void emit_render_target_writes( struct brw_wm_compile *c )
    970 {
    971    struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
    972    struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
    973    struct prog_src_register outcolor;
    974    GLuint i;
    975 
    976    struct prog_instruction *inst = NULL;
    977 
    978    /* The inst->Aux field is used for FB write target and the EOT marker */
    979 
    980    for (i = 0; i < c->key.nr_color_regions; i++) {
    981       if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) {
    982 	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
    983       } else {
    984 	 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
    985       }
    986       inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
    987 		     0, outcolor, payload_r0_depth, outdepth);
    988       inst->Aux = INST_AUX_TARGET(i);
    989    }
    990 
    991    /* Mark the last FB write as final, or emit a dummy write if we had
    992     * no render targets bound.
    993     */
    994    if (c->key.nr_color_regions != 0) {
    995       inst->Aux |= INST_AUX_EOT;
    996    } else {
    997       inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
    998 		     0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR),
    999 		     payload_r0_depth, outdepth);
   1000       inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT;
   1001    }
   1002 }
   1003 
   1004 
   1005 
   1006 
   1007 /***********************************************************************
   1008  * Emit INTERP instructions ahead of first use of each attrib.
   1009  */
   1010 
   1011 static void validate_src_regs( struct brw_wm_compile *c,
   1012 			       const struct prog_instruction *inst )
   1013 {
   1014    GLuint nr_args = brw_wm_nr_args( inst->Opcode );
   1015    GLuint i;
   1016 
   1017    for (i = 0; i < nr_args; i++) {
   1018       if (inst->SrcReg[i].File == PROGRAM_INPUT) {
   1019 	 GLuint idx = inst->SrcReg[i].Index;
   1020 	 if (!(c->fp_interp_emitted & (1<<idx))) {
   1021 	    emit_interp(c, idx);
   1022 	 }
   1023       }
   1024    }
   1025 }
   1026 
   1027 static void print_insns( const struct prog_instruction *insn,
   1028 			 GLuint nr )
   1029 {
   1030    GLuint i;
   1031    for (i = 0; i < nr; i++, insn++) {
   1032       printf("%3d: ", i);
   1033       if (insn->Opcode < MAX_OPCODE)
   1034 	 _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
   1035       else if (insn->Opcode < MAX_WM_OPCODE) {
   1036 	 GLuint idx = insn->Opcode - MAX_OPCODE;
   1037 
   1038 	 _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
   1039 				      3, PROG_PRINT_DEBUG, NULL);
   1040       }
   1041       else
   1042 	 printf("965 Opcode %d\n", insn->Opcode);
   1043    }
   1044 }
   1045 
   1046 
   1047 /**
   1048  * Initial pass for fragment program code generation.
   1049  * This function is used by both the GLSL and non-GLSL paths.
   1050  */
   1051 void brw_wm_pass_fp( struct brw_wm_compile *c )
   1052 {
   1053    struct intel_context *intel = &c->func.brw->intel;
   1054    struct brw_fragment_program *fp = c->fp;
   1055    GLuint insn;
   1056 
   1057    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
   1058       printf("pre-fp:\n");
   1059       _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
   1060 			       true);
   1061       printf("\n");
   1062    }
   1063 
   1064    c->pixel_xy = src_undef();
   1065    if (intel->gen >= 6) {
   1066       /* The interpolation deltas come in as the perspective pixel
   1067        * location barycentric params.
   1068        */
   1069       c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
   1070    } else {
   1071       c->delta_xy = src_undef();
   1072    }
   1073    c->pixel_w = src_undef();
   1074    c->nr_fp_insns = 0;
   1075 
   1076    /* Emit preamble instructions.  This is where special instructions such as
   1077     * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
   1078     * compute shader inputs from varying vars.
   1079     */
   1080    for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
   1081       const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
   1082       validate_src_regs(c, inst);
   1083    }
   1084 
   1085    /* Loop over all instructions doing assorted simplifications and
   1086     * transformations.
   1087     */
   1088    for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
   1089       const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
   1090       struct prog_instruction *out;
   1091 
   1092       /* Check for INPUT values, emit INTERP instructions where
   1093        * necessary:
   1094        */
   1095 
   1096       switch (inst->Opcode) {
   1097       case OPCODE_SWZ:
   1098 	 out = emit_insn(c, inst);
   1099 	 out->Opcode = OPCODE_MOV;
   1100 	 break;
   1101 
   1102       case OPCODE_ABS:
   1103 	 out = emit_insn(c, inst);
   1104 	 out->Opcode = OPCODE_MOV;
   1105 	 out->SrcReg[0].Negate = NEGATE_NONE;
   1106 	 out->SrcReg[0].Abs = 1;
   1107 	 break;
   1108 
   1109       case OPCODE_SUB:
   1110 	 out = emit_insn(c, inst);
   1111 	 out->Opcode = OPCODE_ADD;
   1112 	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
   1113 	 break;
   1114 
   1115       case OPCODE_SCS:
   1116 	 out = emit_insn(c, inst);
   1117 	 /* This should probably be done in the parser.
   1118 	  */
   1119 	 out->DstReg.WriteMask &= WRITEMASK_XY;
   1120 	 break;
   1121 
   1122       case OPCODE_DST:
   1123 	 precalc_dst(c, inst);
   1124 	 break;
   1125 
   1126       case OPCODE_LIT:
   1127 	 precalc_lit(c, inst);
   1128 	 break;
   1129 
   1130       case OPCODE_RSQ:
   1131 	 out = emit_scalar_insn(c, inst);
   1132 	 out->SrcReg[0].Abs = true;
   1133 	 break;
   1134 
   1135       case OPCODE_TEX:
   1136 	 precalc_tex(c, inst);
   1137 	 break;
   1138 
   1139       case OPCODE_TXP:
   1140 	 precalc_txp(c, inst);
   1141 	 break;
   1142 
   1143       case OPCODE_TXB:
   1144 	 out = emit_insn(c, inst);
   1145 	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
   1146          assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
   1147 	 break;
   1148 
   1149       case OPCODE_XPD:
   1150 	 out = emit_insn(c, inst);
   1151 	 /* This should probably be done in the parser.
   1152 	  */
   1153 	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
   1154 	 break;
   1155 
   1156       case OPCODE_KIL:
   1157 	 out = emit_insn(c, inst);
   1158 	 /* This should probably be done in the parser.
   1159 	  */
   1160 	 out->DstReg.WriteMask = 0;
   1161 	 break;
   1162       case OPCODE_END:
   1163 	 emit_render_target_writes(c);
   1164 	 break;
   1165       case OPCODE_PRINT:
   1166 	 break;
   1167       default:
   1168 	 if (brw_wm_is_scalar_result(inst->Opcode))
   1169 	    emit_scalar_insn(c, inst);
   1170 	 else
   1171 	    emit_insn(c, inst);
   1172 	 break;
   1173       }
   1174    }
   1175 
   1176    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
   1177       printf("pass_fp:\n");
   1178       print_insns( c->prog_instructions, c->nr_fp_insns );
   1179       printf("\n");
   1180    }
   1181 }
   1182 
   1183