Home | History | Annotate | Download | only in i965
      1 /*
      2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
      3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
      4  develop this 3D driver.
      5 
      6  Permission is hereby granted, free of charge, to any person obtaining
      7  a copy of this software and associated documentation files (the
      8  "Software"), to deal in the Software without restriction, including
      9  without limitation the rights to use, copy, modify, merge, publish,
     10  distribute, sublicense, and/or sell copies of the Software, and to
     11  permit persons to whom the Software is furnished to do so, subject to
     12  the following conditions:
     13 
     14  The above copyright notice and this permission notice (including the
     15  next paragraph) shall be included in all copies or substantial
     16  portions of the Software.
     17 
     18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 
     26  **********************************************************************/
     27  /*
     28   * Authors:
     29   *   Keith Whitwell <keith (at) tungstengraphics.com>
     30   */
     31 
     32 
     33 #include "main/glheader.h"
     34 #include "main/macros.h"
     35 #include "main/enums.h"
     36 
     37 #include "intel_batchbuffer.h"
     38 
     39 #include "brw_defines.h"
     40 #include "brw_context.h"
     41 #include "brw_eu.h"
     42 #include "brw_util.h"
     43 #include "brw_sf.h"
     44 
     45 
     46 /**
     47  * Determine the vert_result corresponding to the given half of the given
     48  * register.  half=0 means the first half of a register, half=1 means the
     49  * second half.
     50  */
     51 static inline int vert_reg_to_vert_result(struct brw_sf_compile *c, GLuint reg,
     52                                           int half)
     53 {
     54    int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
     55    return c->vue_map.slot_to_vert_result[vue_slot];
     56 }
     57 
     58 /**
     59  * Determine the register corresponding to the given vert_result.
     60  */
     61 static struct brw_reg get_vert_result(struct brw_sf_compile *c,
     62                                       struct brw_reg vert,
     63                                       GLuint vert_result)
     64 {
     65    int vue_slot = c->vue_map.vert_result_to_slot[vert_result];
     66    assert (vue_slot >= c->urb_entry_read_offset);
     67    GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
     68    GLuint sub = vue_slot % 2;
     69 
     70    return brw_vec4_grf(vert.nr + off, sub * 4);
     71 }
     72 
     73 static bool
     74 have_attr(struct brw_sf_compile *c, GLuint attr)
     75 {
     76    return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
     77 }
     78 
     79 /***********************************************************************
     80  * Twoside lighting
     81  */
     82 static void copy_bfc( struct brw_sf_compile *c,
     83 		      struct brw_reg vert )
     84 {
     85    struct brw_compile *p = &c->func;
     86    GLuint i;
     87 
     88    for (i = 0; i < 2; i++) {
     89       if (have_attr(c, VERT_RESULT_COL0+i) &&
     90 	  have_attr(c, VERT_RESULT_BFC0+i))
     91 	 brw_MOV(p,
     92 		 get_vert_result(c, vert, VERT_RESULT_COL0+i),
     93 		 get_vert_result(c, vert, VERT_RESULT_BFC0+i));
     94    }
     95 }
     96 
     97 
     98 static void do_twoside_color( struct brw_sf_compile *c )
     99 {
    100    struct brw_compile *p = &c->func;
    101    GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
    102 
    103    /* Already done in clip program:
    104     */
    105    if (c->key.primitive == SF_UNFILLED_TRIS)
    106       return;
    107 
    108    /* XXX: What happens if BFC isn't present?  This could only happen
    109     * for user-supplied vertex programs, as t_vp_build.c always does
    110     * the right thing.
    111     */
    112    if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
    113        !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
    114       return;
    115 
    116    /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
    117     * to get all channels active inside the IF.  In the clipping code
    118     * we run with NoMask, so it's not an option and we can use
    119     * BRW_EXECUTE_1 for all comparisions.
    120     */
    121    brw_push_insn_state(p);
    122    brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
    123    brw_IF(p, BRW_EXECUTE_4);
    124    {
    125       switch (c->nr_verts) {
    126       case 3: copy_bfc(c, c->vert[2]);
    127       case 2: copy_bfc(c, c->vert[1]);
    128       case 1: copy_bfc(c, c->vert[0]);
    129       }
    130    }
    131    brw_ENDIF(p);
    132    brw_pop_insn_state(p);
    133 }
    134 
    135 
    136 
    137 /***********************************************************************
    138  * Flat shading
    139  */
    140 
    141 #define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
    142 				BITFIELD64_BIT(VERT_RESULT_COL1))
    143 
    144 static void copy_colors( struct brw_sf_compile *c,
    145 		     struct brw_reg dst,
    146 		     struct brw_reg src)
    147 {
    148    struct brw_compile *p = &c->func;
    149    GLuint i;
    150 
    151    for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
    152       if (have_attr(c,i))
    153 	 brw_MOV(p,
    154 		 get_vert_result(c, dst, i),
    155 		 get_vert_result(c, src, i));
    156    }
    157 }
    158 
    159 
    160 
    161 /* Need to use a computed jump to copy flatshaded attributes as the
    162  * vertices are ordered according to y-coordinate before reaching this
    163  * point, so the PV could be anywhere.
    164  */
    165 static void do_flatshade_triangle( struct brw_sf_compile *c )
    166 {
    167    struct brw_compile *p = &c->func;
    168    struct intel_context *intel = &p->brw->intel;
    169    struct brw_reg ip = brw_ip_reg();
    170    GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
    171    GLuint jmpi = 1;
    172 
    173    if (!nr)
    174       return;
    175 
    176    /* Already done in clip program:
    177     */
    178    if (c->key.primitive == SF_UNFILLED_TRIS)
    179       return;
    180 
    181    if (intel->gen == 5)
    182        jmpi = 2;
    183 
    184    brw_push_insn_state(p);
    185 
    186    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
    187    brw_JMPI(p, ip, ip, c->pv);
    188 
    189    copy_colors(c, c->vert[1], c->vert[0]);
    190    copy_colors(c, c->vert[2], c->vert[0]);
    191    brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
    192 
    193    copy_colors(c, c->vert[0], c->vert[1]);
    194    copy_colors(c, c->vert[2], c->vert[1]);
    195    brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
    196 
    197    copy_colors(c, c->vert[0], c->vert[2]);
    198    copy_colors(c, c->vert[1], c->vert[2]);
    199 
    200    brw_pop_insn_state(p);
    201 }
    202 
    203 
    204 static void do_flatshade_line( struct brw_sf_compile *c )
    205 {
    206    struct brw_compile *p = &c->func;
    207    struct intel_context *intel = &p->brw->intel;
    208    struct brw_reg ip = brw_ip_reg();
    209    GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
    210    GLuint jmpi = 1;
    211 
    212    if (!nr)
    213       return;
    214 
    215    /* Already done in clip program:
    216     */
    217    if (c->key.primitive == SF_UNFILLED_TRIS)
    218       return;
    219 
    220    if (intel->gen == 5)
    221        jmpi = 2;
    222 
    223    brw_push_insn_state(p);
    224 
    225    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
    226    brw_JMPI(p, ip, ip, c->pv);
    227    copy_colors(c, c->vert[1], c->vert[0]);
    228 
    229    brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
    230    copy_colors(c, c->vert[0], c->vert[1]);
    231 
    232    brw_pop_insn_state(p);
    233 }
    234 
    235 
    236 
    237 /***********************************************************************
    238  * Triangle setup.
    239  */
    240 
    241 
    242 static void alloc_regs( struct brw_sf_compile *c )
    243 {
    244    GLuint reg, i;
    245 
    246    /* Values computed by fixed function unit:
    247     */
    248    c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
    249    c->det = brw_vec1_grf(1, 2);
    250    c->dx0 = brw_vec1_grf(1, 3);
    251    c->dx2 = brw_vec1_grf(1, 4);
    252    c->dy0 = brw_vec1_grf(1, 5);
    253    c->dy2 = brw_vec1_grf(1, 6);
    254 
    255    /* z and 1/w passed in seperately:
    256     */
    257    c->z[0]     = brw_vec1_grf(2, 0);
    258    c->inv_w[0] = brw_vec1_grf(2, 1);
    259    c->z[1]     = brw_vec1_grf(2, 2);
    260    c->inv_w[1] = brw_vec1_grf(2, 3);
    261    c->z[2]     = brw_vec1_grf(2, 4);
    262    c->inv_w[2] = brw_vec1_grf(2, 5);
    263 
    264    /* The vertices:
    265     */
    266    reg = 3;
    267    for (i = 0; i < c->nr_verts; i++) {
    268       c->vert[i] = brw_vec8_grf(reg, 0);
    269       reg += c->nr_attr_regs;
    270    }
    271 
    272    /* Temporaries, allocated after last vertex reg.
    273     */
    274    c->inv_det = brw_vec1_grf(reg, 0);  reg++;
    275    c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
    276    c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
    277    c->tmp = brw_vec8_grf(reg, 0);  reg++;
    278 
    279    /* Note grf allocation:
    280     */
    281    c->prog_data.total_grf = reg;
    282 
    283 
    284    /* Outputs of this program - interpolation coefficients for
    285     * rasterization:
    286     */
    287    c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
    288    c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
    289    c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
    290 }
    291 
    292 
    293 static void copy_z_inv_w( struct brw_sf_compile *c )
    294 {
    295    struct brw_compile *p = &c->func;
    296    GLuint i;
    297 
    298    brw_push_insn_state(p);
    299 
    300    /* Copy both scalars with a single MOV:
    301     */
    302    for (i = 0; i < c->nr_verts; i++)
    303       brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
    304 
    305    brw_pop_insn_state(p);
    306 }
    307 
    308 
    309 static void invert_det( struct brw_sf_compile *c)
    310 {
    311    /* Looks like we invert all 8 elements just to get 1/det in
    312     * position 2 !?!
    313     */
    314    brw_math(&c->func,
    315 	    c->inv_det,
    316 	    BRW_MATH_FUNCTION_INV,
    317 	    0,
    318 	    c->det,
    319 	    BRW_MATH_DATA_SCALAR,
    320 	    BRW_MATH_PRECISION_FULL);
    321 
    322 }
    323 
    324 
    325 static bool
    326 calculate_masks(struct brw_sf_compile *c,
    327 	        GLuint reg,
    328 		GLushort *pc,
    329 		GLushort *pc_persp,
    330 		GLushort *pc_linear)
    331 {
    332    bool is_last_attr = (reg == c->nr_setup_regs - 1);
    333    GLbitfield64 persp_mask;
    334    GLbitfield64 linear_mask;
    335 
    336    if (c->key.do_flat_shading)
    337       persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) |
    338                                     BITFIELD64_BIT(VERT_RESULT_COL0) |
    339                                     BITFIELD64_BIT(VERT_RESULT_COL1));
    340    else
    341       persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS));
    342 
    343    if (c->key.do_flat_shading)
    344       linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) |
    345                                      BITFIELD64_BIT(VERT_RESULT_COL1));
    346    else
    347       linear_mask = c->key.attrs;
    348 
    349    *pc_persp = 0;
    350    *pc_linear = 0;
    351    *pc = 0xf;
    352 
    353    if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
    354       *pc_persp = 0xf;
    355 
    356    if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
    357       *pc_linear = 0xf;
    358 
    359    /* Maybe only processs one attribute on the final round:
    360     */
    361    if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) {
    362       *pc |= 0xf0;
    363 
    364       if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
    365 	 *pc_persp |= 0xf0;
    366 
    367       if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
    368 	 *pc_linear |= 0xf0;
    369    }
    370 
    371    return is_last_attr;
    372 }
    373 
    374 /* Calculates the predicate control for which channels of a reg
    375  * (containing 2 attrs) to do point sprite coordinate replacement on.
    376  */
    377 static uint16_t
    378 calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
    379 {
    380    int vert_result1, vert_result2;
    381    uint16_t pc = 0;
    382 
    383    vert_result1 = vert_reg_to_vert_result(c, reg, 0);
    384    if (vert_result1 >= VERT_RESULT_TEX0 && vert_result1 <= VERT_RESULT_TEX7) {
    385       if (c->key.point_sprite_coord_replace & (1 << (vert_result1 - VERT_RESULT_TEX0)))
    386 	 pc |= 0x0f;
    387    }
    388    if (vert_result1 == BRW_VERT_RESULT_PNTC)
    389       pc |= 0x0f;
    390 
    391    vert_result2 = vert_reg_to_vert_result(c, reg, 1);
    392    if (vert_result2 >= VERT_RESULT_TEX0 && vert_result2 <= VERT_RESULT_TEX7) {
    393       if (c->key.point_sprite_coord_replace & (1 << (vert_result2 -
    394                                                      VERT_RESULT_TEX0)))
    395          pc |= 0xf0;
    396    }
    397    if (vert_result2 == BRW_VERT_RESULT_PNTC)
    398       pc |= 0xf0;
    399 
    400    return pc;
    401 }
    402 
    403 
    404 
    405 void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
    406 {
    407    struct brw_compile *p = &c->func;
    408    GLuint i;
    409 
    410    c->nr_verts = 3;
    411 
    412    if (allocate)
    413       alloc_regs(c);
    414 
    415    invert_det(c);
    416    copy_z_inv_w(c);
    417 
    418    if (c->key.do_twoside_color)
    419       do_twoside_color(c);
    420 
    421    if (c->key.do_flat_shading)
    422       do_flatshade_triangle(c);
    423 
    424 
    425    for (i = 0; i < c->nr_setup_regs; i++)
    426    {
    427       /* Pair of incoming attributes:
    428        */
    429       struct brw_reg a0 = offset(c->vert[0], i);
    430       struct brw_reg a1 = offset(c->vert[1], i);
    431       struct brw_reg a2 = offset(c->vert[2], i);
    432       GLushort pc, pc_persp, pc_linear;
    433       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    434 
    435       if (pc_persp)
    436       {
    437 	 brw_set_predicate_control_flag_value(p, pc_persp);
    438 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    439 	 brw_MUL(p, a1, a1, c->inv_w[1]);
    440 	 brw_MUL(p, a2, a2, c->inv_w[2]);
    441       }
    442 
    443 
    444       /* Calculate coefficients for interpolated values:
    445        */
    446       if (pc_linear)
    447       {
    448 	 brw_set_predicate_control_flag_value(p, pc_linear);
    449 
    450 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
    451 	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
    452 
    453 	 /* calculate dA/dx
    454 	  */
    455 	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
    456 	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
    457 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
    458 
    459 	 /* calculate dA/dy
    460 	  */
    461 	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
    462 	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
    463 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
    464       }
    465 
    466       {
    467 	 brw_set_predicate_control_flag_value(p, pc);
    468 	 /* start point for interpolation
    469 	  */
    470 	 brw_MOV(p, c->m3C0, a0);
    471 
    472 	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
    473 	  * the send instruction:
    474 	  */
    475 	 brw_urb_WRITE(p,
    476 		       brw_null_reg(),
    477 		       0,
    478 		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
    479 		       0, 	/* allocate */
    480 		       1,	/* used */
    481 		       4, 	/* msg len */
    482 		       0,	/* response len */
    483 		       last,	/* eot */
    484 		       last, 	/* writes complete */
    485 		       i*4,	/* offset */
    486 		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
    487       }
    488    }
    489 }
    490 
    491 
    492 
    493 void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
    494 {
    495    struct brw_compile *p = &c->func;
    496    GLuint i;
    497 
    498 
    499    c->nr_verts = 2;
    500 
    501    if (allocate)
    502       alloc_regs(c);
    503 
    504    invert_det(c);
    505    copy_z_inv_w(c);
    506 
    507    if (c->key.do_flat_shading)
    508       do_flatshade_line(c);
    509 
    510    for (i = 0; i < c->nr_setup_regs; i++)
    511    {
    512       /* Pair of incoming attributes:
    513        */
    514       struct brw_reg a0 = offset(c->vert[0], i);
    515       struct brw_reg a1 = offset(c->vert[1], i);
    516       GLushort pc, pc_persp, pc_linear;
    517       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    518 
    519       if (pc_persp)
    520       {
    521 	 brw_set_predicate_control_flag_value(p, pc_persp);
    522 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    523 	 brw_MUL(p, a1, a1, c->inv_w[1]);
    524       }
    525 
    526       /* Calculate coefficients for position, color:
    527        */
    528       if (pc_linear) {
    529 	 brw_set_predicate_control_flag_value(p, pc_linear);
    530 
    531 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
    532 
    533  	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
    534 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
    535 
    536 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
    537 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
    538       }
    539 
    540       {
    541 	 brw_set_predicate_control_flag_value(p, pc);
    542 
    543 	 /* start point for interpolation
    544 	  */
    545 	 brw_MOV(p, c->m3C0, a0);
    546 
    547 	 /* Copy m0..m3 to URB.
    548 	  */
    549 	 brw_urb_WRITE(p,
    550 		       brw_null_reg(),
    551 		       0,
    552 		       brw_vec8_grf(0, 0),
    553 		       0, 	/* allocate */
    554 		       1, 	/* used */
    555 		       4, 	/* msg len */
    556 		       0,	/* response len */
    557 		       last, 	/* eot */
    558 		       last, 	/* writes complete */
    559 		       i*4,	/* urb destination offset */
    560 		       BRW_URB_SWIZZLE_TRANSPOSE);
    561       }
    562    }
    563 }
    564 
    565 void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
    566 {
    567    struct brw_compile *p = &c->func;
    568    GLuint i;
    569 
    570    c->nr_verts = 1;
    571 
    572    if (allocate)
    573       alloc_regs(c);
    574 
    575    copy_z_inv_w(c);
    576    for (i = 0; i < c->nr_setup_regs; i++)
    577    {
    578       struct brw_reg a0 = offset(c->vert[0], i);
    579       GLushort pc, pc_persp, pc_linear, pc_coord_replace;
    580       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    581 
    582       pc_coord_replace = calculate_point_sprite_mask(c, i);
    583       pc_persp &= ~pc_coord_replace;
    584 
    585       if (pc_persp) {
    586 	 brw_set_predicate_control_flag_value(p, pc_persp);
    587 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    588       }
    589 
    590       /* Point sprite coordinate replacement: A texcoord with this
    591        * enabled gets replaced with the value (x, y, 0, 1) where x and
    592        * y vary from 0 to 1 across the horizontal and vertical of the
    593        * point.
    594        */
    595       if (pc_coord_replace) {
    596 	 brw_set_predicate_control_flag_value(p, pc_coord_replace);
    597 	 /* Caculate 1.0/PointWidth */
    598 	 brw_math(&c->func,
    599 		  c->tmp,
    600 		  BRW_MATH_FUNCTION_INV,
    601 		  0,
    602 		  c->dx0,
    603 		  BRW_MATH_DATA_SCALAR,
    604 		  BRW_MATH_PRECISION_FULL);
    605 
    606 	 brw_set_access_mode(p, BRW_ALIGN_16);
    607 
    608 	 /* dA/dx, dA/dy */
    609 	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
    610 	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
    611 	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
    612 	 if (c->key.sprite_origin_lower_left) {
    613 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
    614 	 } else {
    615 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
    616 	 }
    617 
    618 	 /* attribute constant offset */
    619 	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
    620 	 if (c->key.sprite_origin_lower_left) {
    621 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
    622 	 } else {
    623 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
    624 	 }
    625 
    626 	 brw_set_access_mode(p, BRW_ALIGN_1);
    627       }
    628 
    629       if (pc & ~pc_coord_replace) {
    630 	 brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
    631 	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
    632 	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
    633 	 brw_MOV(p, c->m3C0, a0); /* constant value */
    634       }
    635 
    636 
    637       brw_set_predicate_control_flag_value(p, pc);
    638       /* Copy m0..m3 to URB. */
    639       brw_urb_WRITE(p,
    640 		    brw_null_reg(),
    641 		    0,
    642 		    brw_vec8_grf(0, 0),
    643 		    0, 	/* allocate */
    644 		    1,	/* used */
    645 		    4, 	/* msg len */
    646 		    0,	/* response len */
    647 		    last, 	/* eot */
    648 		    last, 	/* writes complete */
    649 		    i*4,	/* urb destination offset */
    650 		    BRW_URB_SWIZZLE_TRANSPOSE);
    651    }
    652 }
    653 
    654 /* Points setup - several simplifications as all attributes are
    655  * constant across the face of the point (point sprites excluded!)
    656  */
    657 void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
    658 {
    659    struct brw_compile *p = &c->func;
    660    GLuint i;
    661 
    662    c->nr_verts = 1;
    663 
    664    if (allocate)
    665       alloc_regs(c);
    666 
    667    copy_z_inv_w(c);
    668 
    669    brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
    670    brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
    671 
    672    for (i = 0; i < c->nr_setup_regs; i++)
    673    {
    674       struct brw_reg a0 = offset(c->vert[0], i);
    675       GLushort pc, pc_persp, pc_linear;
    676       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    677 
    678       if (pc_persp)
    679       {
    680 	 /* This seems odd as the values are all constant, but the
    681 	  * fragment shader will be expecting it:
    682 	  */
    683 	 brw_set_predicate_control_flag_value(p, pc_persp);
    684 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    685       }
    686 
    687 
    688       /* The delta values are always zero, just send the starting
    689        * coordinate.  Again, this is to fit in with the interpolation
    690        * code in the fragment shader.
    691        */
    692       {
    693 	 brw_set_predicate_control_flag_value(p, pc);
    694 
    695 	 brw_MOV(p, c->m3C0, a0); /* constant value */
    696 
    697 	 /* Copy m0..m3 to URB.
    698 	  */
    699 	 brw_urb_WRITE(p,
    700 		       brw_null_reg(),
    701 		       0,
    702 		       brw_vec8_grf(0, 0),
    703 		       0, 	/* allocate */
    704 		       1,	/* used */
    705 		       4, 	/* msg len */
    706 		       0,	/* response len */
    707 		       last, 	/* eot */
    708 		       last, 	/* writes complete */
    709 		       i*4,	/* urb destination offset */
    710 		       BRW_URB_SWIZZLE_TRANSPOSE);
    711       }
    712    }
    713 }
    714 
    715 void brw_emit_anyprim_setup( struct brw_sf_compile *c )
    716 {
    717    struct brw_compile *p = &c->func;
    718    struct brw_reg ip = brw_ip_reg();
    719    struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
    720    struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
    721    struct brw_reg primmask;
    722    int jmp;
    723    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
    724 
    725    GLuint saveflag;
    726 
    727    c->nr_verts = 3;
    728    alloc_regs(c);
    729 
    730    primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
    731 
    732    brw_MOV(p, primmask, brw_imm_ud(1));
    733    brw_SHL(p, primmask, primmask, payload_prim);
    734 
    735    brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
    736    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
    737 					       (1<<_3DPRIM_TRISTRIP) |
    738 					       (1<<_3DPRIM_TRIFAN) |
    739 					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
    740 					       (1<<_3DPRIM_POLYGON) |
    741 					       (1<<_3DPRIM_RECTLIST) |
    742 					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
    743    jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
    744    {
    745       saveflag = p->flag_value;
    746       brw_push_insn_state(p);
    747       brw_emit_tri_setup( c, false );
    748       brw_pop_insn_state(p);
    749       p->flag_value = saveflag;
    750       /* note - thread killed in subroutine, so must
    751        * restore the flag which is changed when building
    752        * the subroutine. fix #13240
    753        */
    754    }
    755    brw_land_fwd_jump(p, jmp);
    756 
    757    brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
    758    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
    759 					       (1<<_3DPRIM_LINESTRIP) |
    760 					       (1<<_3DPRIM_LINELOOP) |
    761 					       (1<<_3DPRIM_LINESTRIP_CONT) |
    762 					       (1<<_3DPRIM_LINESTRIP_BF) |
    763 					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
    764    jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
    765    {
    766       saveflag = p->flag_value;
    767       brw_push_insn_state(p);
    768       brw_emit_line_setup( c, false );
    769       brw_pop_insn_state(p);
    770       p->flag_value = saveflag;
    771       /* note - thread killed in subroutine */
    772    }
    773    brw_land_fwd_jump(p, jmp);
    774 
    775    brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
    776    brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
    777    jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
    778    {
    779       saveflag = p->flag_value;
    780       brw_push_insn_state(p);
    781       brw_emit_point_sprite_setup( c, false );
    782       brw_pop_insn_state(p);
    783       p->flag_value = saveflag;
    784    }
    785    brw_land_fwd_jump(p, jmp);
    786 
    787    brw_emit_point_setup( c, false );
    788 }
    789 
    790 
    791 
    792 
    793