Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright  2006 - 2017 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_compiler.h"
     25 #include "brw_eu.h"
     26 
     27 #include "common/gen_debug.h"
     28 
     29 struct brw_sf_compile {
     30    struct brw_codegen func;
     31    struct brw_sf_prog_key key;
     32    struct brw_sf_prog_data prog_data;
     33 
     34    struct brw_reg pv;
     35    struct brw_reg det;
     36    struct brw_reg dx0;
     37    struct brw_reg dx2;
     38    struct brw_reg dy0;
     39    struct brw_reg dy2;
     40 
     41    /* z and 1/w passed in seperately:
     42     */
     43    struct brw_reg z[3];
     44    struct brw_reg inv_w[3];
     45 
     46    /* The vertices:
     47     */
     48    struct brw_reg vert[3];
     49 
     50     /* Temporaries, allocated after last vertex reg.
     51     */
     52    struct brw_reg inv_det;
     53    struct brw_reg a1_sub_a0;
     54    struct brw_reg a2_sub_a0;
     55    struct brw_reg tmp;
     56 
     57    struct brw_reg m1Cx;
     58    struct brw_reg m2Cy;
     59    struct brw_reg m3C0;
     60 
     61    GLuint nr_verts;
     62    GLuint nr_attr_regs;
     63    GLuint nr_setup_regs;
     64    int urb_entry_read_offset;
     65 
     66    /** The last known value of the f0.0 flag register. */
     67    unsigned flag_value;
     68 
     69    struct brw_vue_map vue_map;
     70 };
     71 
     72 /**
     73  * Determine the vue slot corresponding to the given half of the given register.
     74  */
     75 static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
     76                                        int half)
     77 {
     78    return (reg + c->urb_entry_read_offset) * 2 + half;
     79 }
     80 
     81 /**
     82  * Determine the varying corresponding to the given half of the given
     83  * register.  half=0 means the first half of a register, half=1 means the
     84  * second half.
     85  */
     86 static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
     87                                       int half)
     88 {
     89    int vue_slot = vert_reg_to_vue_slot(c, reg, half);
     90    return c->vue_map.slot_to_varying[vue_slot];
     91 }
     92 
     93 /**
     94  * Determine the register corresponding to the given vue slot
     95  */
     96 static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
     97                                    struct brw_reg vert,
     98                                    int vue_slot)
     99 {
    100    GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
    101    GLuint sub = vue_slot % 2;
    102 
    103    return brw_vec4_grf(vert.nr + off, sub * 4);
    104 }
    105 
    106 /**
    107  * Determine the register corresponding to the given varying.
    108  */
    109 static struct brw_reg get_varying(struct brw_sf_compile *c,
    110                                   struct brw_reg vert,
    111                                   GLuint varying)
    112 {
    113    int vue_slot = c->vue_map.varying_to_slot[varying];
    114    assert (vue_slot >= c->urb_entry_read_offset);
    115    return get_vue_slot(c, vert, vue_slot);
    116 }
    117 
    118 static bool
    119 have_attr(struct brw_sf_compile *c, GLuint attr)
    120 {
    121    return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
    122 }
    123 
    124 /***********************************************************************
    125  * Twoside lighting
    126  */
    127 static void copy_bfc( struct brw_sf_compile *c,
    128 		      struct brw_reg vert )
    129 {
    130    struct brw_codegen *p = &c->func;
    131    GLuint i;
    132 
    133    for (i = 0; i < 2; i++) {
    134       if (have_attr(c, VARYING_SLOT_COL0+i) &&
    135 	  have_attr(c, VARYING_SLOT_BFC0+i))
    136 	 brw_MOV(p,
    137 		 get_varying(c, vert, VARYING_SLOT_COL0+i),
    138 		 get_varying(c, vert, VARYING_SLOT_BFC0+i));
    139    }
    140 }
    141 
    142 
    143 static void do_twoside_color( struct brw_sf_compile *c )
    144 {
    145    struct brw_codegen *p = &c->func;
    146    GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
    147 
    148    /* Already done in clip program:
    149     */
    150    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
    151       return;
    152 
    153    /* If the vertex shader provides backface color, do the selection. The VS
    154     * promises to set up the front color if the backface color is provided, but
    155     * it may contain junk if never written to.
    156     */
    157    if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
    158        !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
    159       return;
    160 
    161    /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
    162     * to get all channels active inside the IF.  In the clipping code
    163     * we run with NoMask, so it's not an option and we can use
    164     * BRW_EXECUTE_1 for all comparisions.
    165     */
    166    brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
    167    brw_IF(p, BRW_EXECUTE_4);
    168    {
    169       switch (c->nr_verts) {
    170       case 3: copy_bfc(c, c->vert[2]);
    171       case 2: copy_bfc(c, c->vert[1]);
    172       case 1: copy_bfc(c, c->vert[0]);
    173       }
    174    }
    175    brw_ENDIF(p);
    176 }
    177 
    178 
    179 
    180 /***********************************************************************
    181  * Flat shading
    182  */
    183 
    184 static void copy_flatshaded_attributes(struct brw_sf_compile *c,
    185                                        struct brw_reg dst,
    186                                        struct brw_reg src)
    187 {
    188    struct brw_codegen *p = &c->func;
    189    int i;
    190 
    191    for (i = 0; i < c->vue_map.num_slots; i++) {
    192       if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
    193          brw_MOV(p,
    194                  get_vue_slot(c, dst, i),
    195                  get_vue_slot(c, src, i));
    196       }
    197    }
    198 }
    199 
    200 static int count_flatshaded_attributes(struct brw_sf_compile *c)
    201 {
    202    int i;
    203    int count = 0;
    204 
    205    for (i = 0; i < c->vue_map.num_slots; i++)
    206       if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
    207          count++;
    208 
    209    return count;
    210 }
    211 
    212 
    213 
    214 /* Need to use a computed jump to copy flatshaded attributes as the
    215  * vertices are ordered according to y-coordinate before reaching this
    216  * point, so the PV could be anywhere.
    217  */
    218 static void do_flatshade_triangle( struct brw_sf_compile *c )
    219 {
    220    struct brw_codegen *p = &c->func;
    221    GLuint nr;
    222    GLuint jmpi = 1;
    223 
    224    /* Already done in clip program:
    225     */
    226    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
    227       return;
    228 
    229    if (p->devinfo->gen == 5)
    230        jmpi = 2;
    231 
    232    nr = count_flatshaded_attributes(c);
    233 
    234    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
    235    brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
    236 
    237    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
    238    copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
    239    brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
    240 
    241    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
    242    copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
    243    brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
    244 
    245    copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
    246    copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
    247 }
    248 
    249 
    250 static void do_flatshade_line( struct brw_sf_compile *c )
    251 {
    252    struct brw_codegen *p = &c->func;
    253    GLuint nr;
    254    GLuint jmpi = 1;
    255 
    256    /* Already done in clip program:
    257     */
    258    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
    259       return;
    260 
    261    if (p->devinfo->gen == 5)
    262        jmpi = 2;
    263 
    264    nr = count_flatshaded_attributes(c);
    265 
    266    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
    267    brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
    268    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
    269 
    270    brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
    271    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
    272 }
    273 
    274 
    275 /***********************************************************************
    276  * Triangle setup.
    277  */
    278 
    279 
    280 static void alloc_regs( struct brw_sf_compile *c )
    281 {
    282    GLuint reg, i;
    283 
    284    /* Values computed by fixed function unit:
    285     */
    286    c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
    287    c->det = brw_vec1_grf(1, 2);
    288    c->dx0 = brw_vec1_grf(1, 3);
    289    c->dx2 = brw_vec1_grf(1, 4);
    290    c->dy0 = brw_vec1_grf(1, 5);
    291    c->dy2 = brw_vec1_grf(1, 6);
    292 
    293    /* z and 1/w passed in seperately:
    294     */
    295    c->z[0]     = brw_vec1_grf(2, 0);
    296    c->inv_w[0] = brw_vec1_grf(2, 1);
    297    c->z[1]     = brw_vec1_grf(2, 2);
    298    c->inv_w[1] = brw_vec1_grf(2, 3);
    299    c->z[2]     = brw_vec1_grf(2, 4);
    300    c->inv_w[2] = brw_vec1_grf(2, 5);
    301 
    302    /* The vertices:
    303     */
    304    reg = 3;
    305    for (i = 0; i < c->nr_verts; i++) {
    306       c->vert[i] = brw_vec8_grf(reg, 0);
    307       reg += c->nr_attr_regs;
    308    }
    309 
    310    /* Temporaries, allocated after last vertex reg.
    311     */
    312    c->inv_det = brw_vec1_grf(reg, 0);  reg++;
    313    c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
    314    c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
    315    c->tmp = brw_vec8_grf(reg, 0);  reg++;
    316 
    317    /* Note grf allocation:
    318     */
    319    c->prog_data.total_grf = reg;
    320 
    321 
    322    /* Outputs of this program - interpolation coefficients for
    323     * rasterization:
    324     */
    325    c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
    326    c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
    327    c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
    328 }
    329 
    330 
    331 static void copy_z_inv_w( struct brw_sf_compile *c )
    332 {
    333    struct brw_codegen *p = &c->func;
    334    GLuint i;
    335 
    336    /* Copy both scalars with a single MOV:
    337     */
    338    for (i = 0; i < c->nr_verts; i++)
    339       brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
    340 }
    341 
    342 
    343 static void invert_det( struct brw_sf_compile *c)
    344 {
    345    /* Looks like we invert all 8 elements just to get 1/det in
    346     * position 2 !?!
    347     */
    348    gen4_math(&c->func,
    349 	     c->inv_det,
    350 	     BRW_MATH_FUNCTION_INV,
    351 	     0,
    352 	     c->det,
    353 	     BRW_MATH_PRECISION_FULL);
    354 
    355 }
    356 
    357 
    358 static bool
    359 calculate_masks(struct brw_sf_compile *c,
    360                 GLuint reg,
    361                 GLushort *pc,
    362                 GLushort *pc_persp,
    363                 GLushort *pc_linear)
    364 {
    365    bool is_last_attr = (reg == c->nr_setup_regs - 1);
    366    enum glsl_interp_mode interp;
    367 
    368    *pc_persp = 0;
    369    *pc_linear = 0;
    370    *pc = 0xf;
    371 
    372    interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
    373    if (interp == INTERP_MODE_SMOOTH) {
    374       *pc_linear = 0xf;
    375       *pc_persp = 0xf;
    376    } else if (interp == INTERP_MODE_NOPERSPECTIVE)
    377       *pc_linear = 0xf;
    378 
    379    /* Maybe only processs one attribute on the final round:
    380     */
    381    if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
    382       *pc |= 0xf0;
    383 
    384       interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
    385       if (interp == INTERP_MODE_SMOOTH) {
    386          *pc_linear |= 0xf0;
    387          *pc_persp |= 0xf0;
    388       } else if (interp == INTERP_MODE_NOPERSPECTIVE)
    389          *pc_linear |= 0xf0;
    390    }
    391 
    392    return is_last_attr;
    393 }
    394 
    395 /* Calculates the predicate control for which channels of a reg
    396  * (containing 2 attrs) to do point sprite coordinate replacement on.
    397  */
    398 static uint16_t
    399 calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
    400 {
    401    int varying1, varying2;
    402    uint16_t pc = 0;
    403 
    404    varying1 = vert_reg_to_varying(c, reg, 0);
    405    if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
    406       if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
    407 	 pc |= 0x0f;
    408    }
    409    if (varying1 == BRW_VARYING_SLOT_PNTC)
    410       pc |= 0x0f;
    411 
    412    varying2 = vert_reg_to_varying(c, reg, 1);
    413    if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
    414       if (c->key.point_sprite_coord_replace & (1 << (varying2 -
    415                                                      VARYING_SLOT_TEX0)))
    416          pc |= 0xf0;
    417    }
    418    if (varying2 == BRW_VARYING_SLOT_PNTC)
    419       pc |= 0xf0;
    420 
    421    return pc;
    422 }
    423 
    424 static void
    425 set_predicate_control_flag_value(struct brw_codegen *p,
    426                                  struct brw_sf_compile *c,
    427                                  unsigned value)
    428 {
    429    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    430 
    431    if (value != 0xff) {
    432       if (value != c->flag_value) {
    433          brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
    434          c->flag_value = value;
    435       }
    436 
    437       brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
    438    }
    439 }
    440 
    441 static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
    442 {
    443    struct brw_codegen *p = &c->func;
    444    GLuint i;
    445 
    446    c->flag_value = 0xff;
    447    c->nr_verts = 3;
    448 
    449    if (allocate)
    450       alloc_regs(c);
    451 
    452    invert_det(c);
    453    copy_z_inv_w(c);
    454 
    455    if (c->key.do_twoside_color)
    456       do_twoside_color(c);
    457 
    458    if (c->key.contains_flat_varying)
    459       do_flatshade_triangle(c);
    460 
    461 
    462    for (i = 0; i < c->nr_setup_regs; i++)
    463    {
    464       /* Pair of incoming attributes:
    465        */
    466       struct brw_reg a0 = offset(c->vert[0], i);
    467       struct brw_reg a1 = offset(c->vert[1], i);
    468       struct brw_reg a2 = offset(c->vert[2], i);
    469       GLushort pc, pc_persp, pc_linear;
    470       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    471 
    472       if (pc_persp)
    473       {
    474 	 set_predicate_control_flag_value(p, c, pc_persp);
    475 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    476 	 brw_MUL(p, a1, a1, c->inv_w[1]);
    477 	 brw_MUL(p, a2, a2, c->inv_w[2]);
    478       }
    479 
    480 
    481       /* Calculate coefficients for interpolated values:
    482        */
    483       if (pc_linear)
    484       {
    485 	 set_predicate_control_flag_value(p, c, pc_linear);
    486 
    487 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
    488 	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
    489 
    490 	 /* calculate dA/dx
    491 	  */
    492 	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
    493 	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
    494 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
    495 
    496 	 /* calculate dA/dy
    497 	  */
    498 	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
    499 	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
    500 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
    501       }
    502 
    503       {
    504 	 set_predicate_control_flag_value(p, c, pc);
    505 	 /* start point for interpolation
    506 	  */
    507 	 brw_MOV(p, c->m3C0, a0);
    508 
    509 	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
    510 	  * the send instruction:
    511 	  */
    512 	 brw_urb_WRITE(p,
    513 		       brw_null_reg(),
    514 		       0,
    515 		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
    516                        last ? BRW_URB_WRITE_EOT_COMPLETE
    517                        : BRW_URB_WRITE_NO_FLAGS,
    518 		       4, 	/* msg len */
    519 		       0,	/* response len */
    520 		       i*4,	/* offset */
    521 		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
    522       }
    523    }
    524 
    525    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    526 }
    527 
    528 
    529 
    530 static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
    531 {
    532    struct brw_codegen *p = &c->func;
    533    GLuint i;
    534 
    535    c->flag_value = 0xff;
    536    c->nr_verts = 2;
    537 
    538    if (allocate)
    539       alloc_regs(c);
    540 
    541    invert_det(c);
    542    copy_z_inv_w(c);
    543 
    544    if (c->key.contains_flat_varying)
    545       do_flatshade_line(c);
    546 
    547    for (i = 0; i < c->nr_setup_regs; i++)
    548    {
    549       /* Pair of incoming attributes:
    550        */
    551       struct brw_reg a0 = offset(c->vert[0], i);
    552       struct brw_reg a1 = offset(c->vert[1], i);
    553       GLushort pc, pc_persp, pc_linear;
    554       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    555 
    556       if (pc_persp)
    557       {
    558 	 set_predicate_control_flag_value(p, c, pc_persp);
    559 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    560 	 brw_MUL(p, a1, a1, c->inv_w[1]);
    561       }
    562 
    563       /* Calculate coefficients for position, color:
    564        */
    565       if (pc_linear) {
    566 	 set_predicate_control_flag_value(p, c, pc_linear);
    567 
    568 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
    569 
    570 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
    571 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
    572 
    573 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
    574 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
    575       }
    576 
    577       {
    578 	 set_predicate_control_flag_value(p, c, pc);
    579 
    580 	 /* start point for interpolation
    581 	  */
    582 	 brw_MOV(p, c->m3C0, a0);
    583 
    584 	 /* Copy m0..m3 to URB.
    585 	  */
    586 	 brw_urb_WRITE(p,
    587 		       brw_null_reg(),
    588 		       0,
    589 		       brw_vec8_grf(0, 0),
    590                        last ? BRW_URB_WRITE_EOT_COMPLETE
    591                        : BRW_URB_WRITE_NO_FLAGS,
    592 		       4, 	/* msg len */
    593 		       0,	/* response len */
    594 		       i*4,	/* urb destination offset */
    595 		       BRW_URB_SWIZZLE_TRANSPOSE);
    596       }
    597    }
    598 
    599    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    600 }
    601 
    602 static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
    603 {
    604    struct brw_codegen *p = &c->func;
    605    GLuint i;
    606 
    607    c->flag_value = 0xff;
    608    c->nr_verts = 1;
    609 
    610    if (allocate)
    611       alloc_regs(c);
    612 
    613    copy_z_inv_w(c);
    614    for (i = 0; i < c->nr_setup_regs; i++)
    615    {
    616       struct brw_reg a0 = offset(c->vert[0], i);
    617       GLushort pc, pc_persp, pc_linear, pc_coord_replace;
    618       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    619 
    620       pc_coord_replace = calculate_point_sprite_mask(c, i);
    621       pc_persp &= ~pc_coord_replace;
    622 
    623       if (pc_persp) {
    624 	 set_predicate_control_flag_value(p, c, pc_persp);
    625 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    626       }
    627 
    628       /* Point sprite coordinate replacement: A texcoord with this
    629        * enabled gets replaced with the value (x, y, 0, 1) where x and
    630        * y vary from 0 to 1 across the horizontal and vertical of the
    631        * point.
    632        */
    633       if (pc_coord_replace) {
    634 	 set_predicate_control_flag_value(p, c, pc_coord_replace);
    635 	 /* Caculate 1.0/PointWidth */
    636 	 gen4_math(&c->func,
    637 		   c->tmp,
    638 		   BRW_MATH_FUNCTION_INV,
    639 		   0,
    640 		   c->dx0,
    641 		   BRW_MATH_PRECISION_FULL);
    642 
    643 	 brw_set_default_access_mode(p, BRW_ALIGN_16);
    644 
    645 	 /* dA/dx, dA/dy */
    646 	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
    647 	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
    648 	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
    649 	 if (c->key.sprite_origin_lower_left) {
    650 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
    651 	 } else {
    652 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
    653 	 }
    654 
    655 	 /* attribute constant offset */
    656 	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
    657 	 if (c->key.sprite_origin_lower_left) {
    658 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
    659 	 } else {
    660 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
    661 	 }
    662 
    663 	 brw_set_default_access_mode(p, BRW_ALIGN_1);
    664       }
    665 
    666       if (pc & ~pc_coord_replace) {
    667 	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
    668 	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
    669 	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
    670 	 brw_MOV(p, c->m3C0, a0); /* constant value */
    671       }
    672 
    673 
    674       set_predicate_control_flag_value(p, c, pc);
    675       /* Copy m0..m3 to URB. */
    676       brw_urb_WRITE(p,
    677 		    brw_null_reg(),
    678 		    0,
    679 		    brw_vec8_grf(0, 0),
    680                     last ? BRW_URB_WRITE_EOT_COMPLETE
    681                     : BRW_URB_WRITE_NO_FLAGS,
    682 		    4, 	/* msg len */
    683 		    0,	/* response len */
    684 		    i*4,	/* urb destination offset */
    685 		    BRW_URB_SWIZZLE_TRANSPOSE);
    686    }
    687 
    688    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    689 }
    690 
    691 /* Points setup - several simplifications as all attributes are
    692  * constant across the face of the point (point sprites excluded!)
    693  */
    694 static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
    695 {
    696    struct brw_codegen *p = &c->func;
    697    GLuint i;
    698 
    699    c->flag_value = 0xff;
    700    c->nr_verts = 1;
    701 
    702    if (allocate)
    703       alloc_regs(c);
    704 
    705    copy_z_inv_w(c);
    706 
    707    brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
    708    brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
    709 
    710    for (i = 0; i < c->nr_setup_regs; i++)
    711    {
    712       struct brw_reg a0 = offset(c->vert[0], i);
    713       GLushort pc, pc_persp, pc_linear;
    714       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
    715 
    716       if (pc_persp)
    717       {
    718 	 /* This seems odd as the values are all constant, but the
    719 	  * fragment shader will be expecting it:
    720 	  */
    721 	 set_predicate_control_flag_value(p, c, pc_persp);
    722 	 brw_MUL(p, a0, a0, c->inv_w[0]);
    723       }
    724 
    725 
    726       /* The delta values are always zero, just send the starting
    727        * coordinate.  Again, this is to fit in with the interpolation
    728        * code in the fragment shader.
    729        */
    730       {
    731 	 set_predicate_control_flag_value(p, c, pc);
    732 
    733 	 brw_MOV(p, c->m3C0, a0); /* constant value */
    734 
    735 	 /* Copy m0..m3 to URB.
    736 	  */
    737 	 brw_urb_WRITE(p,
    738 		       brw_null_reg(),
    739 		       0,
    740 		       brw_vec8_grf(0, 0),
    741                        last ? BRW_URB_WRITE_EOT_COMPLETE
    742                        : BRW_URB_WRITE_NO_FLAGS,
    743 		       4, 	/* msg len */
    744 		       0,	/* response len */
    745 		       i*4,	/* urb destination offset */
    746 		       BRW_URB_SWIZZLE_TRANSPOSE);
    747       }
    748    }
    749 
    750    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    751 }
    752 
    753 static void brw_emit_anyprim_setup( struct brw_sf_compile *c )
    754 {
    755    struct brw_codegen *p = &c->func;
    756    struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
    757    struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
    758    struct brw_reg primmask;
    759    int jmp;
    760    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
    761 
    762    c->nr_verts = 3;
    763    alloc_regs(c);
    764 
    765    primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
    766 
    767    brw_MOV(p, primmask, brw_imm_ud(1));
    768    brw_SHL(p, primmask, primmask, payload_prim);
    769 
    770    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
    771 					       (1<<_3DPRIM_TRISTRIP) |
    772 					       (1<<_3DPRIM_TRIFAN) |
    773 					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
    774 					       (1<<_3DPRIM_POLYGON) |
    775 					       (1<<_3DPRIM_RECTLIST) |
    776 					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
    777    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
    778    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
    779    brw_emit_tri_setup(c, false);
    780    brw_land_fwd_jump(p, jmp);
    781 
    782    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
    783 					       (1<<_3DPRIM_LINESTRIP) |
    784 					       (1<<_3DPRIM_LINELOOP) |
    785 					       (1<<_3DPRIM_LINESTRIP_CONT) |
    786 					       (1<<_3DPRIM_LINESTRIP_BF) |
    787 					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
    788    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
    789    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
    790    brw_emit_line_setup(c, false);
    791    brw_land_fwd_jump(p, jmp);
    792 
    793    brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
    794    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
    795    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
    796    brw_emit_point_sprite_setup(c, false);
    797    brw_land_fwd_jump(p, jmp);
    798 
    799    brw_emit_point_setup( c, false );
    800 }
    801 
    802 const unsigned *
    803 brw_compile_sf(const struct brw_compiler *compiler,
    804                void *mem_ctx,
    805                const struct brw_sf_prog_key *key,
    806                struct brw_sf_prog_data *prog_data,
    807                struct brw_vue_map *vue_map,
    808                unsigned *final_assembly_size)
    809 {
    810    struct brw_sf_compile c;
    811    memset(&c, 0, sizeof(c));
    812 
    813    /* Begin the compilation:
    814     */
    815    brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
    816 
    817    c.key = *key;
    818    c.vue_map = *vue_map;
    819    if (c.key.do_point_coord) {
    820       /*
    821        * gl_PointCoord is a FS instead of VS builtin variable, thus it's
    822        * not included in c.vue_map generated in VS stage. Here we add
    823        * it manually to let SF shader generate the needed interpolation
    824        * coefficient for FS shader.
    825        */
    826       c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
    827       c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
    828    }
    829    c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
    830    c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
    831    c.nr_setup_regs = c.nr_attr_regs;
    832 
    833    c.prog_data.urb_read_length = c.nr_attr_regs;
    834    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
    835 
    836    /* Which primitive?  Or all three?
    837     */
    838    switch (key->primitive) {
    839    case BRW_SF_PRIM_TRIANGLES:
    840       c.nr_verts = 3;
    841       brw_emit_tri_setup( &c, true );
    842       break;
    843    case BRW_SF_PRIM_LINES:
    844       c.nr_verts = 2;
    845       brw_emit_line_setup( &c, true );
    846       break;
    847    case BRW_SF_PRIM_POINTS:
    848       c.nr_verts = 1;
    849       if (key->do_point_sprite)
    850 	  brw_emit_point_sprite_setup( &c, true );
    851       else
    852 	  brw_emit_point_setup( &c, true );
    853       break;
    854    case BRW_SF_PRIM_UNFILLED_TRIS:
    855       c.nr_verts = 3;
    856       brw_emit_anyprim_setup( &c );
    857       break;
    858    default:
    859       unreachable("not reached");
    860    }
    861 
    862    /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
    863     * source). Compacting would be difficult.
    864     */
    865    /* brw_compact_instructions(&c.func, 0, 0, NULL); */
    866 
    867    *prog_data = c.prog_data;
    868 
    869    const unsigned *program = brw_get_program(&c.func, final_assembly_size);
    870 
    871    if (unlikely(INTEL_DEBUG & DEBUG_SF)) {
    872       fprintf(stderr, "sf:\n");
    873       brw_disassemble(compiler->devinfo,
    874                       program, 0, *final_assembly_size, stderr);
    875       fprintf(stderr, "\n");
    876    }
    877 
    878    return program;
    879 }
    880