Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * @file brw_vue_map.c
     26  *
     27  * This file computes the "VUE map" for a (non-fragment) shader stage, which
     28  * describes the layout of its output varyings.  The VUE map is used to match
     29  * outputs from one stage with the inputs of the next.
     30  *
     31  * Largely, varyings can be placed however we like - producers/consumers simply
     32  * have to agree on the layout.  However, there is also a "VUE Header" that
     33  * prescribes a fixed-layout for items that interact with fixed function
     34  * hardware, such as the clipper and rasterizer.
     35  *
     36  * Authors:
     37  *   Paul Berry <stereotype441 (at) gmail.com>
     38  *   Chris Forbes <chrisf (at) ijw.co.nz>
     39  *   Eric Anholt <eric (at) anholt.net>
     40  */
     41 
     42 
     43 #include "brw_context.h"
     44 
     45 static inline void
     46 assign_vue_slot(struct brw_vue_map *vue_map, int varying, int slot)
     47 {
     48    /* Make sure this varying hasn't been assigned a slot already */
     49    assert (vue_map->varying_to_slot[varying] == -1);
     50 
     51    vue_map->varying_to_slot[varying] = slot;
     52    vue_map->slot_to_varying[slot] = varying;
     53 }
     54 
     55 /**
     56  * Compute the VUE map for a shader stage.
     57  */
     58 void
     59 brw_compute_vue_map(const struct gen_device_info *devinfo,
     60                     struct brw_vue_map *vue_map,
     61                     GLbitfield64 slots_valid,
     62                     bool separate)
     63 {
     64    /* Keep using the packed/contiguous layout on old hardware - we only need
     65     * the SSO layout when using geometry/tessellation shaders or 32 FS input
     66     * varyings, which only exist on Gen >= 6.  It's also a bit more efficient.
     67     */
     68    if (devinfo->gen < 6)
     69       separate = false;
     70 
     71    if (separate) {
     72       /* In SSO mode, we don't know whether the adjacent stage will
     73        * read/write gl_ClipDistance, which has a fixed slot location.
     74        * We have to assume the worst and reserve a slot for it, or else
     75        * the rest of our varyings will be off by a slot.
     76        *
     77        * Note that we don't have to worry about COL/BFC, as those built-in
     78        * variables only exist in legacy GL, which only supports VS and FS.
     79        */
     80       slots_valid |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
     81       slots_valid |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
     82    }
     83 
     84    vue_map->slots_valid = slots_valid;
     85    vue_map->separate = separate;
     86 
     87    /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
     88     * are stored in the first VUE slot (VARYING_SLOT_PSIZ).
     89     */
     90    slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
     91 
     92    /* Make sure that the values we store in vue_map->varying_to_slot and
     93     * vue_map->slot_to_varying won't overflow the signed chars that are used
     94     * to store them.  Note that since vue_map->slot_to_varying sometimes holds
     95     * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that
     96     * BRW_VARYING_SLOT_COUNT is <= 127, not 128.
     97     */
     98    STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
     99 
    100    for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
    101       vue_map->varying_to_slot[i] = -1;
    102       vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
    103    }
    104 
    105    int slot = 0;
    106 
    107    /* VUE header: format depends on chip generation and whether clipping is
    108     * enabled.
    109     *
    110     * See the Sandybridge PRM, Volume 2 Part 1, section 1.5.1 (page 30),
    111     * "Vertex URB Entry (VUE) Formats" which describes the VUE header layout.
    112     */
    113    if (devinfo->gen < 6) {
    114       /* There are 8 dwords in VUE header pre-Ironlake:
    115        * dword 0-3 is indices, point width, clip flags.
    116        * dword 4-7 is ndc position
    117        * dword 8-11 is the first vertex data.
    118        *
    119        * On Ironlake the VUE header is nominally 20 dwords, but the hardware
    120        * will accept the same header layout as Gen4 [and should be a bit faster]
    121        */
    122       assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
    123       assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC, slot++);
    124       assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
    125    } else {
    126       /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
    127        * dword 0-3 of the header is indices, point width, clip flags.
    128        * dword 4-7 is the 4D space position
    129        * dword 8-15 of the vertex header is the user clip distance if
    130        * enabled.
    131        * dword 8-11 or 16-19 is the first vertex element data we fill.
    132        */
    133       assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
    134       assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
    135       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
    136          assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0, slot++);
    137       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
    138          assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1, slot++);
    139 
    140       /* front and back colors need to be consecutive so that we can use
    141        * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
    142        * two-sided color.
    143        */
    144       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
    145          assign_vue_slot(vue_map, VARYING_SLOT_COL0, slot++);
    146       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
    147          assign_vue_slot(vue_map, VARYING_SLOT_BFC0, slot++);
    148       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
    149          assign_vue_slot(vue_map, VARYING_SLOT_COL1, slot++);
    150       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
    151          assign_vue_slot(vue_map, VARYING_SLOT_BFC1, slot++);
    152    }
    153 
    154    /* The hardware doesn't care about the rest of the vertex outputs, so we
    155     * can assign them however we like.  For normal programs, we simply assign
    156     * them contiguously.
    157     *
    158     * For separate shader pipelines, we first assign built-in varyings
    159     * contiguous slots.  This works because ARB_separate_shader_objects
    160     * requires that all shaders have matching built-in varying interface
    161     * blocks.  Next, we assign generic varyings based on their location
    162     * (either explicit or linker assigned).  This guarantees a fixed layout.
    163     *
    164     * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
    165     * since it's encoded as the clip distances by emit_clip_distances().
    166     * However, it may be output by transform feedback, and we'd rather not
    167     * recompute state when TF changes, so we just always include it.
    168     */
    169    GLbitfield64 builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
    170    while (builtins != 0) {
    171       const int varying = ffsll(builtins) - 1;
    172       if (vue_map->varying_to_slot[varying] == -1) {
    173          assign_vue_slot(vue_map, varying, slot++);
    174       }
    175       builtins &= ~BITFIELD64_BIT(varying);
    176    }
    177 
    178    const int first_generic_slot = slot;
    179    GLbitfield64 generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
    180    while (generics != 0) {
    181       const int varying = ffsll(generics) - 1;
    182       if (separate) {
    183          slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
    184       }
    185       assign_vue_slot(vue_map, varying, slot++);
    186       generics &= ~BITFIELD64_BIT(varying);
    187    }
    188 
    189    vue_map->num_slots = slot;
    190    vue_map->num_per_vertex_slots = 0;
    191    vue_map->num_per_patch_slots = 0;
    192 }
    193 
    194 /**
    195  * Compute the VUE map for tessellation control shader outputs and
    196  * tessellation evaluation shader inputs.
    197  */
    198 void
    199 brw_compute_tess_vue_map(struct brw_vue_map *vue_map,
    200                          GLbitfield64 vertex_slots,
    201                          GLbitfield patch_slots)
    202 {
    203    /* I don't think anything actually uses this... */
    204    vue_map->slots_valid = vertex_slots;
    205 
    206    /* separate isn't really meaningful, but make sure it's initialized */
    207    vue_map->separate = false;
    208 
    209    vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
    210                      VARYING_BIT_TESS_LEVEL_INNER);
    211 
    212    /* Make sure that the values we store in vue_map->varying_to_slot and
    213     * vue_map->slot_to_varying won't overflow the signed chars that are used
    214     * to store them.  Note that since vue_map->slot_to_varying sometimes holds
    215     * values equal to VARYING_SLOT_TESS_MAX , we need to ensure that
    216     * VARYING_SLOT_TESS_MAX is <= 127, not 128.
    217     */
    218    STATIC_ASSERT(VARYING_SLOT_TESS_MAX <= 127);
    219 
    220    for (int i = 0; i < VARYING_SLOT_TESS_MAX ; ++i) {
    221       vue_map->varying_to_slot[i] = -1;
    222       vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
    223    }
    224 
    225    int slot = 0;
    226 
    227    /* The first 8 DWords are reserved for the "Patch Header".
    228     *
    229     * VARYING_SLOT_TESS_LEVEL_OUTER / INNER live here, but the exact layout
    230     * depends on the domain type.  They might not be in slots 0 and 1 as
    231     * described here, but pretending they're separate allows us to uniquely
    232     * identify them by distinct slot locations.
    233     */
    234    assign_vue_slot(vue_map, VARYING_SLOT_TESS_LEVEL_INNER, slot++);
    235    assign_vue_slot(vue_map, VARYING_SLOT_TESS_LEVEL_OUTER, slot++);
    236 
    237    /* first assign per-patch varyings */
    238    while (patch_slots != 0) {
    239       const int varying = ffsll(patch_slots) - 1;
    240       if (vue_map->varying_to_slot[varying + VARYING_SLOT_PATCH0] == -1) {
    241          assign_vue_slot(vue_map, varying + VARYING_SLOT_PATCH0, slot++);
    242       }
    243       patch_slots &= ~BITFIELD64_BIT(varying);
    244    }
    245 
    246    /* apparently, including the patch header... */
    247    vue_map->num_per_patch_slots = slot;
    248 
    249    /* then assign per-vertex varyings for each vertex in our patch */
    250    while (vertex_slots != 0) {
    251       const int varying = ffsll(vertex_slots) - 1;
    252       if (vue_map->varying_to_slot[varying] == -1) {
    253          assign_vue_slot(vue_map, varying, slot++);
    254       }
    255       vertex_slots &= ~BITFIELD64_BIT(varying);
    256    }
    257 
    258    vue_map->num_per_vertex_slots = slot - vue_map->num_per_patch_slots;
    259    vue_map->num_slots = slot;
    260 }
    261 
    262 static const char *
    263 varying_name(brw_varying_slot slot)
    264 {
    265    assume(slot < BRW_VARYING_SLOT_COUNT);
    266 
    267    if (slot < VARYING_SLOT_MAX)
    268       return gl_varying_slot_name(slot);
    269 
    270    static const char *brw_names[] = {
    271       [BRW_VARYING_SLOT_NDC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_NDC",
    272       [BRW_VARYING_SLOT_PAD - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PAD",
    273       [BRW_VARYING_SLOT_PNTC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PNTC",
    274    };
    275 
    276    return brw_names[slot - VARYING_SLOT_MAX];
    277 }
    278 
    279 void
    280 brw_print_vue_map(FILE *fp, const struct brw_vue_map *vue_map)
    281 {
    282    if (vue_map->num_per_vertex_slots > 0 || vue_map->num_per_patch_slots > 0) {
    283       fprintf(fp, "PUE map (%d slots, %d/patch, %d/vertex, %s)\n",
    284               vue_map->num_slots,
    285               vue_map->num_per_patch_slots,
    286               vue_map->num_per_vertex_slots,
    287               vue_map->separate ? "SSO" : "non-SSO");
    288       for (int i = 0; i < vue_map->num_slots; i++) {
    289          if (vue_map->slot_to_varying[i] >= VARYING_SLOT_PATCH0) {
    290             fprintf(fp, "  [%d] VARYING_SLOT_PATCH%d\n", i,
    291                     vue_map->slot_to_varying[i] - VARYING_SLOT_PATCH0);
    292          } else {
    293             fprintf(fp, "  [%d] %s\n", i,
    294                     varying_name(vue_map->slot_to_varying[i]));
    295          }
    296       }
    297    } else {
    298       fprintf(fp, "VUE map (%d slots, %s)\n",
    299               vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO");
    300       for (int i = 0; i < vue_map->num_slots; i++) {
    301          fprintf(fp, "  [%d] %s\n", i,
    302                  varying_name(vue_map->slot_to_varying[i]));
    303       }
    304    }
    305    fprintf(fp, "\n");
    306 }
    307