Home | History | Annotate | Download | only in i965
      1 /*
      2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
      3  Intel funded Tungsten Graphics to
      4  develop this 3D driver.
      5 
      6  Permission is hereby granted, free of charge, to any person obtaining
      7  a copy of this software and associated documentation files (the
      8  "Software"), to deal in the Software without restriction, including
      9  without limitation the rights to use, copy, modify, merge, publish,
     10  distribute, sublicense, and/or sell copies of the Software, and to
     11  permit persons to whom the Software is furnished to do so, subject to
     12  the following conditions:
     13 
     14  The above copyright notice and this permission notice (including the
     15  next paragraph) shall be included in all copies or substantial
     16  portions of the Software.
     17 
     18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 
     26  **********************************************************************/
     27  /*
     28   * Authors:
     29   *   Keith Whitwell <keithw (at) vmware.com>
     30   */
     31 
     32 
     33 #include "main/macros.h"
     34 #include "main/enums.h"
     35 
     36 #include "intel_batchbuffer.h"
     37 
     38 #include "brw_defines.h"
     39 #include "brw_context.h"
     40 #include "brw_eu.h"
     41 #include "brw_util.h"
     42 #include "brw_sf.h"
     43 
     44 
     45 /**
     46  * Determine the vue slot corresponding to the given half of the given register.
     47  */
     48 static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
     49                                        int half)
     50 {
     51    return (reg + c->urb_entry_read_offset) * 2 + half;
     52 }
     53 
     54 /**
     55  * Determine the varying corresponding to the given half of the given
     56  * register.  half=0 means the first half of a register, half=1 means the
     57  * second half.
     58  */
     59 static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
     60                                       int half)
     61 {
     62    int vue_slot = vert_reg_to_vue_slot(c, reg, half);
     63    return c->vue_map.slot_to_varying[vue_slot];
     64 }
     65 
     66 /**
     67  * Determine the register corresponding to the given vue slot
     68  */
     69 static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
     70                                    struct brw_reg vert,
     71                                    int vue_slot)
     72 {
     73    GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
     74    GLuint sub = vue_slot % 2;
     75 
     76    return brw_vec4_grf(vert.nr + off, sub * 4);
     77 }
     78 
     79 /**
     80  * Determine the register corresponding to the given varying.
     81  */
     82 static struct brw_reg get_varying(struct brw_sf_compile *c,
     83                                   struct brw_reg vert,
     84                                   GLuint varying)
     85 {
     86    int vue_slot = c->vue_map.varying_to_slot[varying];
     87    assert (vue_slot >= c->urb_entry_read_offset);
     88    return get_vue_slot(c, vert, vue_slot);
     89 }
     90 
     91 static bool
     92 have_attr(struct brw_sf_compile *c, GLuint attr)
     93 {
     94    return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
     95 }
     96 
     97 /***********************************************************************
     98  * Twoside lighting
     99  */
    100 static void copy_bfc( struct brw_sf_compile *c,
    101 		      struct brw_reg vert )
    102 {
    103    struct brw_codegen *p = &c->func;
    104    GLuint i;
    105 
    106    for (i = 0; i < 2; i++) {
    107       if (have_attr(c, VARYING_SLOT_COL0+i) &&
    108 	  have_attr(c, VARYING_SLOT_BFC0+i))
    109 	 brw_MOV(p,
    110 		 get_varying(c, vert, VARYING_SLOT_COL0+i),
    111 		 get_varying(c, vert, VARYING_SLOT_BFC0+i));
    112    }
    113 }
    114 
    115 
    116 static void do_twoside_color( struct brw_sf_compile *c )
    117 {
    118    struct brw_codegen *p = &c->func;
    119    GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
    120 
    121    /* Already done in clip program:
    122     */
    123    if (c->key.primitive == SF_UNFILLED_TRIS)
    124       return;
    125 
    126    /* If the vertex shader provides backface color, do the selection. The VS
    127     * promises to set up the front color if the backface color is provided, but
    128     * it may contain junk if never written to.
    129     */
    130    if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
    131        !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
    132       return;
    133 
    134    /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
    135     * to get all channels active inside the IF.  In the clipping code
    136     * we run with NoMask, so it's not an option and we can use
    137     * BRW_EXECUTE_1 for all comparisions.
    138     */
    139    brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
    140    brw_IF(p, BRW_EXECUTE_4);
    141    {
    142       switch (c->nr_verts) {
    143       case 3: copy_bfc(c, c->vert[2]);
    144       case 2: copy_bfc(c, c->vert[1]);
    145       case 1: copy_bfc(c, c->vert[0]);
    146       }
    147    }
    148    brw_ENDIF(p);
    149 }
    150 
    151 
    152 
    153 /***********************************************************************
    154  * Flat shading
    155  */
    156 
    157 static void copy_flatshaded_attributes(struct brw_sf_compile *c,
    158                                        struct brw_reg dst,
    159                                        struct brw_reg src)
    160 {
    161    struct brw_codegen *p = &c->func;
    162    int i;
    163 
    164    for (i = 0; i < c->vue_map.num_slots; i++) {
    165       if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
    166          brw_MOV(p,
    167                  get_vue_slot(c, dst, i),
    168                  get_vue_slot(c, src, i));
    169       }
    170    }
    171 }
    172 
    173 static int count_flatshaded_attributes(struct brw_sf_compile *c)
    174 {
    175    int i;
    176    int count = 0;
    177 
    178    for (i = 0; i < c->vue_map.num_slots; i++)
    179       if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
    180          count++;
    181 
    182    return count;
    183 }
    184 
    185 
    186 
    187 /* Need to use a computed jump to copy flatshaded attributes as the
    188  * vertices are ordered according to y-coordinate before reaching this
    189  * point, so the PV could be anywhere.
    190  */
    191 static void do_flatshade_triangle( struct brw_sf_compile *c )
    192 {
    193    struct brw_codegen *p = &c->func;
    194    GLuint nr;
    195    GLuint jmpi = 1;
    196 
    197    /* Already done in clip program:
    198     */
    199    if (c->key.primitive == SF_UNFILLED_TRIS)
    200       return;
    201 
    202    if (p->devinfo->gen == 5)
    203        jmpi = 2;
    204 
    205    nr = count_flatshaded_attributes(c);
    206 
    207    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
    208    brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
    209 
    210    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
    211    copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
    212    brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
    213 
    214    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
    215    copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
    216    brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
    217 
    218    copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
    219    copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
    220 }
    221 
    222 
    223 static void do_flatshade_line( struct brw_sf_compile *c )
    224 {
    225    struct brw_codegen *p = &c->func;
    226    GLuint nr;
    227    GLuint jmpi = 1;
    228 
    229    /* Already done in clip program:
    230     */
    231    if (c->key.primitive == SF_UNFILLED_TRIS)
    232       return;
    233 
    234    if (p->devinfo->gen == 5)
    235        jmpi = 2;
    236 
    237    nr = count_flatshaded_attributes(c);
    238 
    239    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
    240    brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
    241    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
    242 
    243    brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
    244    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
    245 }
    246 
    247 
    248 /***********************************************************************
    249  * Triangle setup.
    250  */
    251 
    252 
    253 static void alloc_regs( struct brw_sf_compile *c )
    254 {
    255    GLuint reg, i;
    256 
    257    /* Values computed by fixed function unit:
    258     */
    259    c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
    260    c->det = brw_vec1_grf(1, 2);
    261    c->dx0 = brw_vec1_grf(1, 3);
    262    c->dx2 = brw_vec1_grf(1, 4);
    263    c->dy0 = brw_vec1_grf(1, 5);
    264    c->dy2 = brw_vec1_grf(1, 6);
    265 
    266    /* z and 1/w passed in seperately:
    267     */
    268    c->z[0]     = brw_vec1_grf(2, 0);
    269    c->inv_w[0] = brw_vec1_grf(2, 1);
    270    c->z[1]     = brw_vec1_grf(2, 2);
    271    c->inv_w[1] = brw_vec1_grf(2, 3);
    272    c->z[2]     = brw_vec1_grf(2, 4);
    273    c->inv_w[2] = brw_vec1_grf(2, 5);
    274 
    275    /* The vertices:
    276     */
    277    reg = 3;
    278    for (i = 0; i < c->nr_verts; i++) {
    279       c->vert[i] = brw_vec8_grf(reg, 0);
    280       reg += c->nr_attr_regs;
    281    }
    282 
    283    /* Temporaries, allocated after last vertex reg.
    284     */
    285    c->inv_det = brw_vec1_grf(reg, 0);  reg++;
    286    c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
    287    c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
    288    c->tmp = brw_vec8_grf(reg, 0);  reg++;
    289 
    290    /* Note grf allocation:
    291     */
    292    c->prog_data.total_grf = reg;
    293 
    294 
    295    /* Outputs of this program - interpolation coefficients for
    296     * rasterization:
    297     */
    298    c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
    299    c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
    300    c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
    301 }
    302 
    303 
    304 static void copy_z_inv_w( struct brw_sf_compile *c )
    305 {
    306    struct brw_codegen *p = &c->func;
    307    GLuint i;
    308 
    309    /* Copy both scalars with a single MOV:
    310     */
    311    for (i = 0; i < c->nr_verts; i++)
    312       brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
    313 }
    314 
    315 
    316 static void invert_det( struct brw_sf_compile *c)
    317 {
    318    /* Looks like we invert all 8 elements just to get 1/det in
    319     * position 2 !?!
    320     */
    321    gen4_math(&c->func,
    322 	     c->inv_det,
    323 	     BRW_MATH_FUNCTION_INV,
    324 	     0,
    325 	     c->det,
    326 	     BRW_MATH_PRECISION_FULL);
    327 
    328 }
    329 
    330 
    331 static bool
    332 calculate_masks(struct brw_sf_compile *c,
    333                 GLuint reg,
    334                 GLushort *pc,
    335                 GLushort *pc_persp,
    336                 GLushort *pc_linear)
    337 {
    338    bool is_last_attr = (reg == c->nr_setup_regs - 1);
    339    enum glsl_interp_mode interp;
    340 
    341    *pc_persp = 0;
    342    *pc_linear = 0;
    343    *pc = 0xf;
    344 
    345    interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
    346    if (interp == INTERP_MODE_SMOOTH) {
    347       *pc_linear = 0xf;
    348       *pc_persp = 0xf;
    349    } else if (interp == INTERP_MODE_NOPERSPECTIVE)
    350       *pc_linear = 0xf;
    351 
    352    /* Maybe only processs one attribute on the final round:
    353     */
    354    if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
    355       *pc |= 0xf0;
    356 
    357       interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
    358       if (interp == INTERP_MODE_SMOOTH) {
    359          *pc_linear |= 0xf0;
    360          *pc_persp |= 0xf0;
    361       } else if (interp == INTERP_MODE_NOPERSPECTIVE)
    362          *pc_linear |= 0xf0;
    363    }
    364 
    365    return is_last_attr;
    366 }
    367 
    368 /* Calculates the predicate control for which channels of a reg
    369  * (containing 2 attrs) to do point sprite coordinate replacement on.
    370  */
    371 static uint16_t
    372 calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
    373 {
    374    int varying1, varying2;
    375    uint16_t pc = 0;
    376 
    377    varying1 = vert_reg_to_varying(c, reg, 0);
    378    if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
    379       if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
    380 	 pc |= 0x0f;
    381    }
    382    if (varying1 == BRW_VARYING_SLOT_PNTC)
    383       pc |= 0x0f;
    384 
    385    varying2 = vert_reg_to_varying(c, reg, 1);
    386    if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
    387       if (c->key.point_sprite_coord_replace & (1 << (varying2 -
    388                                                      VARYING_SLOT_TEX0)))
    389          pc |= 0xf0;
    390    }
    391    if (varying2 == BRW_VARYING_SLOT_PNTC)
    392       pc |= 0xf0;
    393 
    394    return pc;
    395 }
    396 
    397 static void
    398 set_predicate_control_flag_value(struct brw_codegen *p,
    399                                  struct brw_sf_compile *c,
    400                                  unsigned value)
    401 {
    402    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    403 
    404    if (value != 0xff) {
    405       if (value != c->flag_value) {
    406          brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
    407          c->flag_value = value;
    408       }
    409 
    410       brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
    411    }
    412 }
    413 
    414 void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
    415 {
    416    struct brw_codegen *p = &c->func;
    417    GLuint i;
    418 
    419    c->flag_value = 0xff;
    420    c->nr_verts = 3;
    421 
    422    if (allocate)
    423       alloc_regs(c);
    424 
    425    invert_det(c);
    426    copy_z_inv_w(c);
    427 
    428    if (c->key.do_twoside_color)
    429       do_twoside_color(c);
    430 
    431    if (c->key.contains_flat_varying)
    432       do_flatshade_triangle(c);
    433 
    434 
    435    for (i = 0; i < c->nr_setup_regs; i++)
    436    {
    437       /* Pair of incoming attributes:
    438        */
    439       struct brw_reg a0 = offset(c->vert[0], i);
    440       struct brw_reg a1 = offset(c->vert[1], i);
    441       struct brw_reg a2 = offset(c->vert[2], i);
    442       GLushort pc, pc_persp, pc_linear;
    443       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    444 
    445       if (pc_persp)
    446       {
    447 	 set_predicate_control_flag_value(p, c, pc_persp);
    448 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    449 	 brw_MUL(p, a1, a1, c->inv_w[1]);
    450 	 brw_MUL(p, a2, a2, c->inv_w[2]);
    451       }
    452 
    453 
    454       /* Calculate coefficients for interpolated values:
    455        */
    456       if (pc_linear)
    457       {
    458 	 set_predicate_control_flag_value(p, c, pc_linear);
    459 
    460 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
    461 	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
    462 
    463 	 /* calculate dA/dx
    464 	  */
    465 	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
    466 	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
    467 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
    468 
    469 	 /* calculate dA/dy
    470 	  */
    471 	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
    472 	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
    473 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
    474       }
    475 
    476       {
    477 	 set_predicate_control_flag_value(p, c, pc);
    478 	 /* start point for interpolation
    479 	  */
    480 	 brw_MOV(p, c->m3C0, a0);
    481 
    482 	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
    483 	  * the send instruction:
    484 	  */
    485 	 brw_urb_WRITE(p,
    486 		       brw_null_reg(),
    487 		       0,
    488 		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
    489                        last ? BRW_URB_WRITE_EOT_COMPLETE
    490                        : BRW_URB_WRITE_NO_FLAGS,
    491 		       4, 	/* msg len */
    492 		       0,	/* response len */
    493 		       i*4,	/* offset */
    494 		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
    495       }
    496    }
    497 
    498    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    499 }
    500 
    501 
    502 
    503 void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
    504 {
    505    struct brw_codegen *p = &c->func;
    506    GLuint i;
    507 
    508    c->flag_value = 0xff;
    509    c->nr_verts = 2;
    510 
    511    if (allocate)
    512       alloc_regs(c);
    513 
    514    invert_det(c);
    515    copy_z_inv_w(c);
    516 
    517    if (c->key.contains_flat_varying)
    518       do_flatshade_line(c);
    519 
    520    for (i = 0; i < c->nr_setup_regs; i++)
    521    {
    522       /* Pair of incoming attributes:
    523        */
    524       struct brw_reg a0 = offset(c->vert[0], i);
    525       struct brw_reg a1 = offset(c->vert[1], i);
    526       GLushort pc, pc_persp, pc_linear;
    527       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    528 
    529       if (pc_persp)
    530       {
    531 	 set_predicate_control_flag_value(p, c, pc_persp);
    532 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    533 	 brw_MUL(p, a1, a1, c->inv_w[1]);
    534       }
    535 
    536       /* Calculate coefficients for position, color:
    537        */
    538       if (pc_linear) {
    539 	 set_predicate_control_flag_value(p, c, pc_linear);
    540 
    541 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
    542 
    543  	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
    544 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
    545 
    546 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
    547 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
    548       }
    549 
    550       {
    551 	 set_predicate_control_flag_value(p, c, pc);
    552 
    553 	 /* start point for interpolation
    554 	  */
    555 	 brw_MOV(p, c->m3C0, a0);
    556 
    557 	 /* Copy m0..m3 to URB.
    558 	  */
    559 	 brw_urb_WRITE(p,
    560 		       brw_null_reg(),
    561 		       0,
    562 		       brw_vec8_grf(0, 0),
    563                        last ? BRW_URB_WRITE_EOT_COMPLETE
    564                        : BRW_URB_WRITE_NO_FLAGS,
    565 		       4, 	/* msg len */
    566 		       0,	/* response len */
    567 		       i*4,	/* urb destination offset */
    568 		       BRW_URB_SWIZZLE_TRANSPOSE);
    569       }
    570    }
    571 
    572    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    573 }
    574 
    575 void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
    576 {
    577    struct brw_codegen *p = &c->func;
    578    GLuint i;
    579 
    580    c->flag_value = 0xff;
    581    c->nr_verts = 1;
    582 
    583    if (allocate)
    584       alloc_regs(c);
    585 
    586    copy_z_inv_w(c);
    587    for (i = 0; i < c->nr_setup_regs; i++)
    588    {
    589       struct brw_reg a0 = offset(c->vert[0], i);
    590       GLushort pc, pc_persp, pc_linear, pc_coord_replace;
    591       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    592 
    593       pc_coord_replace = calculate_point_sprite_mask(c, i);
    594       pc_persp &= ~pc_coord_replace;
    595 
    596       if (pc_persp) {
    597 	 set_predicate_control_flag_value(p, c, pc_persp);
    598 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    599       }
    600 
    601       /* Point sprite coordinate replacement: A texcoord with this
    602        * enabled gets replaced with the value (x, y, 0, 1) where x and
    603        * y vary from 0 to 1 across the horizontal and vertical of the
    604        * point.
    605        */
    606       if (pc_coord_replace) {
    607 	 set_predicate_control_flag_value(p, c, pc_coord_replace);
    608 	 /* Caculate 1.0/PointWidth */
    609 	 gen4_math(&c->func,
    610 		   c->tmp,
    611 		   BRW_MATH_FUNCTION_INV,
    612 		   0,
    613 		   c->dx0,
    614 		   BRW_MATH_PRECISION_FULL);
    615 
    616 	 brw_set_default_access_mode(p, BRW_ALIGN_16);
    617 
    618 	 /* dA/dx, dA/dy */
    619 	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
    620 	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
    621 	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
    622 	 if (c->key.sprite_origin_lower_left) {
    623 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
    624 	 } else {
    625 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
    626 	 }
    627 
    628 	 /* attribute constant offset */
    629 	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
    630 	 if (c->key.sprite_origin_lower_left) {
    631 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
    632 	 } else {
    633 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
    634 	 }
    635 
    636 	 brw_set_default_access_mode(p, BRW_ALIGN_1);
    637       }
    638 
    639       if (pc & ~pc_coord_replace) {
    640 	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
    641 	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
    642 	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
    643 	 brw_MOV(p, c->m3C0, a0); /* constant value */
    644       }
    645 
    646 
    647       set_predicate_control_flag_value(p, c, pc);
    648       /* Copy m0..m3 to URB. */
    649       brw_urb_WRITE(p,
    650 		    brw_null_reg(),
    651 		    0,
    652 		    brw_vec8_grf(0, 0),
    653                     last ? BRW_URB_WRITE_EOT_COMPLETE
    654                     : BRW_URB_WRITE_NO_FLAGS,
    655 		    4, 	/* msg len */
    656 		    0,	/* response len */
    657 		    i*4,	/* urb destination offset */
    658 		    BRW_URB_SWIZZLE_TRANSPOSE);
    659    }
    660 
    661    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    662 }
    663 
    664 /* Points setup - several simplifications as all attributes are
    665  * constant across the face of the point (point sprites excluded!)
    666  */
    667 void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
    668 {
    669    struct brw_codegen *p = &c->func;
    670    GLuint i;
    671 
    672    c->flag_value = 0xff;
    673    c->nr_verts = 1;
    674 
    675    if (allocate)
    676       alloc_regs(c);
    677 
    678    copy_z_inv_w(c);
    679 
    680    brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
    681    brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
    682 
    683    for (i = 0; i < c->nr_setup_regs; i++)
    684    {
    685       struct brw_reg a0 = offset(c->vert[0], i);
    686       GLushort pc, pc_persp, pc_linear;
    687       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    688 
    689       if (pc_persp)
    690       {
    691 	 /* This seems odd as the values are all constant, but the
    692 	  * fragment shader will be expecting it:
    693 	  */
    694 	 set_predicate_control_flag_value(p, c, pc_persp);
    695 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    696       }
    697 
    698 
    699       /* The delta values are always zero, just send the starting
    700        * coordinate.  Again, this is to fit in with the interpolation
    701        * code in the fragment shader.
    702        */
    703       {
    704 	 set_predicate_control_flag_value(p, c, pc);
    705 
    706 	 brw_MOV(p, c->m3C0, a0); /* constant value */
    707 
    708 	 /* Copy m0..m3 to URB.
    709 	  */
    710 	 brw_urb_WRITE(p,
    711 		       brw_null_reg(),
    712 		       0,
    713 		       brw_vec8_grf(0, 0),
    714                        last ? BRW_URB_WRITE_EOT_COMPLETE
    715                        : BRW_URB_WRITE_NO_FLAGS,
    716 		       4, 	/* msg len */
    717 		       0,	/* response len */
    718 		       i*4,	/* urb destination offset */
    719 		       BRW_URB_SWIZZLE_TRANSPOSE);
    720       }
    721    }
    722 
    723    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    724 }
    725 
    726 void brw_emit_anyprim_setup( struct brw_sf_compile *c )
    727 {
    728    struct brw_codegen *p = &c->func;
    729    struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
    730    struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
    731    struct brw_reg primmask;
    732    int jmp;
    733    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
    734 
    735    c->nr_verts = 3;
    736    alloc_regs(c);
    737 
    738    primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
    739 
    740    brw_MOV(p, primmask, brw_imm_ud(1));
    741    brw_SHL(p, primmask, primmask, payload_prim);
    742 
    743    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
    744 					       (1<<_3DPRIM_TRISTRIP) |
    745 					       (1<<_3DPRIM_TRIFAN) |
    746 					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
    747 					       (1<<_3DPRIM_POLYGON) |
    748 					       (1<<_3DPRIM_RECTLIST) |
    749 					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
    750    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
    751    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
    752    brw_emit_tri_setup(c, false);
    753    brw_land_fwd_jump(p, jmp);
    754 
    755    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
    756 					       (1<<_3DPRIM_LINESTRIP) |
    757 					       (1<<_3DPRIM_LINELOOP) |
    758 					       (1<<_3DPRIM_LINESTRIP_CONT) |
    759 					       (1<<_3DPRIM_LINESTRIP_BF) |
    760 					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
    761    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
    762    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
    763    brw_emit_line_setup(c, false);
    764    brw_land_fwd_jump(p, jmp);
    765 
    766    brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
    767    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
    768    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
    769    brw_emit_point_sprite_setup(c, false);
    770    brw_land_fwd_jump(p, jmp);
    771 
    772    brw_emit_point_setup( c, false );
    773 }
    774 
    775 
    776 
    777 
    778