Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright 2003 VMware, Inc.
      3  * All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sublicense, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial portions
     15  * of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     20  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 #include "main/bufferobj.h"
     27 #include "main/context.h"
     28 #include "main/enums.h"
     29 #include "main/macros.h"
     30 #include "main/glformats.h"
     31 
     32 #include "brw_draw.h"
     33 #include "brw_defines.h"
     34 #include "brw_context.h"
     35 #include "brw_state.h"
     36 
     37 #include "intel_batchbuffer.h"
     38 #include "intel_buffer_objects.h"
     39 
     40 static const GLuint double_types_float[5] = {
     41    0,
     42    BRW_SURFACEFORMAT_R64_FLOAT,
     43    BRW_SURFACEFORMAT_R64G64_FLOAT,
     44    BRW_SURFACEFORMAT_R64G64B64_FLOAT,
     45    BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
     46 };
     47 
     48 static const GLuint double_types_passthru[5] = {
     49    0,
     50    BRW_SURFACEFORMAT_R64_PASSTHRU,
     51    BRW_SURFACEFORMAT_R64G64_PASSTHRU,
     52    BRW_SURFACEFORMAT_R64G64B64_PASSTHRU,
     53    BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU
     54 };
     55 
     56 static const GLuint float_types[5] = {
     57    0,
     58    BRW_SURFACEFORMAT_R32_FLOAT,
     59    BRW_SURFACEFORMAT_R32G32_FLOAT,
     60    BRW_SURFACEFORMAT_R32G32B32_FLOAT,
     61    BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
     62 };
     63 
     64 static const GLuint half_float_types[5] = {
     65    0,
     66    BRW_SURFACEFORMAT_R16_FLOAT,
     67    BRW_SURFACEFORMAT_R16G16_FLOAT,
     68    BRW_SURFACEFORMAT_R16G16B16_FLOAT,
     69    BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
     70 };
     71 
     72 static const GLuint fixed_point_types[5] = {
     73    0,
     74    BRW_SURFACEFORMAT_R32_SFIXED,
     75    BRW_SURFACEFORMAT_R32G32_SFIXED,
     76    BRW_SURFACEFORMAT_R32G32B32_SFIXED,
     77    BRW_SURFACEFORMAT_R32G32B32A32_SFIXED,
     78 };
     79 
     80 static const GLuint uint_types_direct[5] = {
     81    0,
     82    BRW_SURFACEFORMAT_R32_UINT,
     83    BRW_SURFACEFORMAT_R32G32_UINT,
     84    BRW_SURFACEFORMAT_R32G32B32_UINT,
     85    BRW_SURFACEFORMAT_R32G32B32A32_UINT
     86 };
     87 
     88 static const GLuint uint_types_norm[5] = {
     89    0,
     90    BRW_SURFACEFORMAT_R32_UNORM,
     91    BRW_SURFACEFORMAT_R32G32_UNORM,
     92    BRW_SURFACEFORMAT_R32G32B32_UNORM,
     93    BRW_SURFACEFORMAT_R32G32B32A32_UNORM
     94 };
     95 
     96 static const GLuint uint_types_scale[5] = {
     97    0,
     98    BRW_SURFACEFORMAT_R32_USCALED,
     99    BRW_SURFACEFORMAT_R32G32_USCALED,
    100    BRW_SURFACEFORMAT_R32G32B32_USCALED,
    101    BRW_SURFACEFORMAT_R32G32B32A32_USCALED
    102 };
    103 
    104 static const GLuint int_types_direct[5] = {
    105    0,
    106    BRW_SURFACEFORMAT_R32_SINT,
    107    BRW_SURFACEFORMAT_R32G32_SINT,
    108    BRW_SURFACEFORMAT_R32G32B32_SINT,
    109    BRW_SURFACEFORMAT_R32G32B32A32_SINT
    110 };
    111 
    112 static const GLuint int_types_norm[5] = {
    113    0,
    114    BRW_SURFACEFORMAT_R32_SNORM,
    115    BRW_SURFACEFORMAT_R32G32_SNORM,
    116    BRW_SURFACEFORMAT_R32G32B32_SNORM,
    117    BRW_SURFACEFORMAT_R32G32B32A32_SNORM
    118 };
    119 
    120 static const GLuint int_types_scale[5] = {
    121    0,
    122    BRW_SURFACEFORMAT_R32_SSCALED,
    123    BRW_SURFACEFORMAT_R32G32_SSCALED,
    124    BRW_SURFACEFORMAT_R32G32B32_SSCALED,
    125    BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
    126 };
    127 
    128 static const GLuint ushort_types_direct[5] = {
    129    0,
    130    BRW_SURFACEFORMAT_R16_UINT,
    131    BRW_SURFACEFORMAT_R16G16_UINT,
    132    BRW_SURFACEFORMAT_R16G16B16_UINT,
    133    BRW_SURFACEFORMAT_R16G16B16A16_UINT
    134 };
    135 
    136 static const GLuint ushort_types_norm[5] = {
    137    0,
    138    BRW_SURFACEFORMAT_R16_UNORM,
    139    BRW_SURFACEFORMAT_R16G16_UNORM,
    140    BRW_SURFACEFORMAT_R16G16B16_UNORM,
    141    BRW_SURFACEFORMAT_R16G16B16A16_UNORM
    142 };
    143 
    144 static const GLuint ushort_types_scale[5] = {
    145    0,
    146    BRW_SURFACEFORMAT_R16_USCALED,
    147    BRW_SURFACEFORMAT_R16G16_USCALED,
    148    BRW_SURFACEFORMAT_R16G16B16_USCALED,
    149    BRW_SURFACEFORMAT_R16G16B16A16_USCALED
    150 };
    151 
    152 static const GLuint short_types_direct[5] = {
    153    0,
    154    BRW_SURFACEFORMAT_R16_SINT,
    155    BRW_SURFACEFORMAT_R16G16_SINT,
    156    BRW_SURFACEFORMAT_R16G16B16_SINT,
    157    BRW_SURFACEFORMAT_R16G16B16A16_SINT
    158 };
    159 
    160 static const GLuint short_types_norm[5] = {
    161    0,
    162    BRW_SURFACEFORMAT_R16_SNORM,
    163    BRW_SURFACEFORMAT_R16G16_SNORM,
    164    BRW_SURFACEFORMAT_R16G16B16_SNORM,
    165    BRW_SURFACEFORMAT_R16G16B16A16_SNORM
    166 };
    167 
    168 static const GLuint short_types_scale[5] = {
    169    0,
    170    BRW_SURFACEFORMAT_R16_SSCALED,
    171    BRW_SURFACEFORMAT_R16G16_SSCALED,
    172    BRW_SURFACEFORMAT_R16G16B16_SSCALED,
    173    BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
    174 };
    175 
    176 static const GLuint ubyte_types_direct[5] = {
    177    0,
    178    BRW_SURFACEFORMAT_R8_UINT,
    179    BRW_SURFACEFORMAT_R8G8_UINT,
    180    BRW_SURFACEFORMAT_R8G8B8_UINT,
    181    BRW_SURFACEFORMAT_R8G8B8A8_UINT
    182 };
    183 
    184 static const GLuint ubyte_types_norm[5] = {
    185    0,
    186    BRW_SURFACEFORMAT_R8_UNORM,
    187    BRW_SURFACEFORMAT_R8G8_UNORM,
    188    BRW_SURFACEFORMAT_R8G8B8_UNORM,
    189    BRW_SURFACEFORMAT_R8G8B8A8_UNORM
    190 };
    191 
    192 static const GLuint ubyte_types_scale[5] = {
    193    0,
    194    BRW_SURFACEFORMAT_R8_USCALED,
    195    BRW_SURFACEFORMAT_R8G8_USCALED,
    196    BRW_SURFACEFORMAT_R8G8B8_USCALED,
    197    BRW_SURFACEFORMAT_R8G8B8A8_USCALED
    198 };
    199 
    200 static const GLuint byte_types_direct[5] = {
    201    0,
    202    BRW_SURFACEFORMAT_R8_SINT,
    203    BRW_SURFACEFORMAT_R8G8_SINT,
    204    BRW_SURFACEFORMAT_R8G8B8_SINT,
    205    BRW_SURFACEFORMAT_R8G8B8A8_SINT
    206 };
    207 
    208 static const GLuint byte_types_norm[5] = {
    209    0,
    210    BRW_SURFACEFORMAT_R8_SNORM,
    211    BRW_SURFACEFORMAT_R8G8_SNORM,
    212    BRW_SURFACEFORMAT_R8G8B8_SNORM,
    213    BRW_SURFACEFORMAT_R8G8B8A8_SNORM
    214 };
    215 
    216 static const GLuint byte_types_scale[5] = {
    217    0,
    218    BRW_SURFACEFORMAT_R8_SSCALED,
    219    BRW_SURFACEFORMAT_R8G8_SSCALED,
    220    BRW_SURFACEFORMAT_R8G8B8_SSCALED,
    221    BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
    222 };
    223 
    224 static GLuint
    225 double_types(struct brw_context *brw,
    226              int size,
    227              GLboolean doubles)
    228 {
    229    /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
    230     * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
    231     * 64-bit components are stored in the URB without any conversion."
    232     * Also included on BDW PRM, Volume 7, page 470, table "Source Element
    233     * Formats Supported in VF Unit"
    234     *
    235     * Previous PRMs don't include those references, so for gen7 we can't use
    236     * PASSTHRU formats directly. But in any case, we prefer to return passthru
    237     * even in that case, because that reflects what we want to achieve, even
    238     * if we would need to workaround on gen < 8.
    239     */
    240    return (doubles
    241            ? double_types_passthru[size]
    242            : double_types_float[size]);
    243 }
    244 
    245 static bool
    246 is_passthru_format(uint32_t format)
    247 {
    248    switch (format) {
    249    case BRW_SURFACEFORMAT_R64_PASSTHRU:
    250    case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
    251    case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
    252    case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
    253       return true;
    254    default:
    255       return false;
    256    }
    257 }
    258 
    259 static int
    260 uploads_needed(uint32_t format)
    261 {
    262    if (!is_passthru_format(format))
    263       return 1;
    264 
    265    switch (format) {
    266    case BRW_SURFACEFORMAT_R64_PASSTHRU:
    267    case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
    268       return 1;
    269    case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
    270    case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
    271       return 2;
    272    default:
    273       unreachable("not reached");
    274    }
    275 }
    276 
    277 /*
    278  * Returns the number of componentes associated with a format that is used on
    279  * a 64 to 32 format split. See downsize_format()
    280  */
    281 static int
    282 upload_format_size(uint32_t upload_format)
    283 {
    284    switch (upload_format) {
    285    case BRW_SURFACEFORMAT_R32G32_FLOAT:
    286       return 2;
    287    case BRW_SURFACEFORMAT_R32G32B32A32_FLOAT:
    288       return 4;
    289    default:
    290       unreachable("not reached");
    291    }
    292 }
    293 
    294 /*
    295  * Returns the format that we are finally going to use when upload a vertex
    296  * element. It will only change if we are using *64*PASSTHRU formats, as for
    297  * gen < 8 they need to be splitted on two *32*FLOAT formats.
    298  *
    299  * @upload points in which upload we are. Valid values are [0,1]
    300  */
    301 static uint32_t
    302 downsize_format_if_needed(uint32_t format,
    303                           int upload)
    304 {
    305    assert(upload == 0 || upload == 1);
    306 
    307    if (!is_passthru_format(format))
    308       return format;
    309 
    310    switch (format) {
    311    case BRW_SURFACEFORMAT_R64_PASSTHRU:
    312       return BRW_SURFACEFORMAT_R32G32_FLOAT;
    313    case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
    314       return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
    315    case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
    316       return !upload ? BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
    317                      : BRW_SURFACEFORMAT_R32G32_FLOAT;
    318    case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
    319       return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
    320    default:
    321       unreachable("not reached");
    322    }
    323 }
    324 
    325 /**
    326  * Given vertex array type/size/format/normalized info, return
    327  * the appopriate hardware surface type.
    328  * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
    329  */
    330 unsigned
    331 brw_get_vertex_surface_type(struct brw_context *brw,
    332                             const struct gl_vertex_array *glarray)
    333 {
    334    int size = glarray->Size;
    335    const bool is_ivybridge_or_older =
    336       brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell;
    337 
    338    if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
    339       fprintf(stderr, "type %s size %d normalized %d\n",
    340               _mesa_enum_to_string(glarray->Type),
    341               glarray->Size, glarray->Normalized);
    342 
    343    if (glarray->Integer) {
    344       assert(glarray->Format == GL_RGBA); /* sanity check */
    345       switch (glarray->Type) {
    346       case GL_INT: return int_types_direct[size];
    347       case GL_SHORT:
    348          if (is_ivybridge_or_older && size == 3)
    349             return short_types_direct[4];
    350          else
    351             return short_types_direct[size];
    352       case GL_BYTE:
    353          if (is_ivybridge_or_older && size == 3)
    354             return byte_types_direct[4];
    355          else
    356             return byte_types_direct[size];
    357       case GL_UNSIGNED_INT: return uint_types_direct[size];
    358       case GL_UNSIGNED_SHORT:
    359          if (is_ivybridge_or_older && size == 3)
    360             return ushort_types_direct[4];
    361          else
    362             return ushort_types_direct[size];
    363       case GL_UNSIGNED_BYTE:
    364          if (is_ivybridge_or_older && size == 3)
    365             return ubyte_types_direct[4];
    366          else
    367             return ubyte_types_direct[size];
    368       default: unreachable("not reached");
    369       }
    370    } else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
    371       return BRW_SURFACEFORMAT_R11G11B10_FLOAT;
    372    } else if (glarray->Normalized) {
    373       switch (glarray->Type) {
    374       case GL_DOUBLE: return double_types(brw, size, glarray->Doubles);
    375       case GL_FLOAT: return float_types[size];
    376       case GL_HALF_FLOAT:
    377       case GL_HALF_FLOAT_OES:
    378          if (brw->gen < 6 && size == 3)
    379             return half_float_types[4];
    380          else
    381             return half_float_types[size];
    382       case GL_INT: return int_types_norm[size];
    383       case GL_SHORT: return short_types_norm[size];
    384       case GL_BYTE: return byte_types_norm[size];
    385       case GL_UNSIGNED_INT: return uint_types_norm[size];
    386       case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
    387       case GL_UNSIGNED_BYTE:
    388          if (glarray->Format == GL_BGRA) {
    389             /* See GL_EXT_vertex_array_bgra */
    390             assert(size == 4);
    391             return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
    392          }
    393          else {
    394             return ubyte_types_norm[size];
    395          }
    396       case GL_FIXED:
    397          if (brw->gen >= 8 || brw->is_haswell)
    398             return fixed_point_types[size];
    399 
    400          /* This produces GL_FIXED inputs as values between INT32_MIN and
    401           * INT32_MAX, which will be scaled down by 1/65536 by the VS.
    402           */
    403          return int_types_scale[size];
    404       /* See GL_ARB_vertex_type_2_10_10_10_rev.
    405        * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
    406        * like to use here, so upload everything as UINT and fix
    407        * it in the shader
    408        */
    409       case GL_INT_2_10_10_10_REV:
    410          assert(size == 4);
    411          if (brw->gen >= 8 || brw->is_haswell) {
    412             return glarray->Format == GL_BGRA
    413                ? BRW_SURFACEFORMAT_B10G10R10A2_SNORM
    414                : BRW_SURFACEFORMAT_R10G10B10A2_SNORM;
    415          }
    416          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
    417       case GL_UNSIGNED_INT_2_10_10_10_REV:
    418          assert(size == 4);
    419          if (brw->gen >= 8 || brw->is_haswell) {
    420             return glarray->Format == GL_BGRA
    421                ? BRW_SURFACEFORMAT_B10G10R10A2_UNORM
    422                : BRW_SURFACEFORMAT_R10G10B10A2_UNORM;
    423          }
    424          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
    425       default: unreachable("not reached");
    426       }
    427    }
    428    else {
    429       /* See GL_ARB_vertex_type_2_10_10_10_rev.
    430        * W/A: the hardware doesn't really support the formats we'd
    431        * like to use here, so upload everything as UINT and fix
    432        * it in the shader
    433        */
    434       if (glarray->Type == GL_INT_2_10_10_10_REV) {
    435          assert(size == 4);
    436          if (brw->gen >= 8 || brw->is_haswell) {
    437             return glarray->Format == GL_BGRA
    438                ? BRW_SURFACEFORMAT_B10G10R10A2_SSCALED
    439                : BRW_SURFACEFORMAT_R10G10B10A2_SSCALED;
    440          }
    441          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
    442       } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
    443          assert(size == 4);
    444          if (brw->gen >= 8 || brw->is_haswell) {
    445             return glarray->Format == GL_BGRA
    446                ? BRW_SURFACEFORMAT_B10G10R10A2_USCALED
    447                : BRW_SURFACEFORMAT_R10G10B10A2_USCALED;
    448          }
    449          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
    450       }
    451       assert(glarray->Format == GL_RGBA); /* sanity check */
    452       switch (glarray->Type) {
    453       case GL_DOUBLE: return double_types(brw, size, glarray->Doubles);
    454       case GL_FLOAT: return float_types[size];
    455       case GL_HALF_FLOAT:
    456       case GL_HALF_FLOAT_OES:
    457          if (brw->gen < 6 && size == 3)
    458             return half_float_types[4];
    459          else
    460             return half_float_types[size];
    461       case GL_INT: return int_types_scale[size];
    462       case GL_SHORT: return short_types_scale[size];
    463       case GL_BYTE: return byte_types_scale[size];
    464       case GL_UNSIGNED_INT: return uint_types_scale[size];
    465       case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
    466       case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
    467       case GL_FIXED:
    468          if (brw->gen >= 8 || brw->is_haswell)
    469             return fixed_point_types[size];
    470 
    471          /* This produces GL_FIXED inputs as values between INT32_MIN and
    472           * INT32_MAX, which will be scaled down by 1/65536 by the VS.
    473           */
    474          return int_types_scale[size];
    475       default: unreachable("not reached");
    476       }
    477    }
    478 }
    479 
    480 static void
    481 copy_array_to_vbo_array(struct brw_context *brw,
    482 			struct brw_vertex_element *element,
    483 			int min, int max,
    484 			struct brw_vertex_buffer *buffer,
    485 			GLuint dst_stride)
    486 {
    487    const int src_stride = element->glarray->StrideB;
    488 
    489    /* If the source stride is zero, we just want to upload the current
    490     * attribute once and set the buffer's stride to 0.  There's no need
    491     * to replicate it out.
    492     */
    493    if (src_stride == 0) {
    494       intel_upload_data(brw, element->glarray->Ptr,
    495                         element->glarray->_ElementSize,
    496                         element->glarray->_ElementSize,
    497 			&buffer->bo, &buffer->offset);
    498 
    499       buffer->stride = 0;
    500       buffer->size = element->glarray->_ElementSize;
    501       return;
    502    }
    503 
    504    const unsigned char *src = element->glarray->Ptr + min * src_stride;
    505    int count = max - min + 1;
    506    GLuint size = count * dst_stride;
    507    uint8_t *dst = intel_upload_space(brw, size, dst_stride,
    508                                      &buffer->bo, &buffer->offset);
    509 
    510    /* The GL 4.5 spec says:
    511     *      "If any enabled arrays buffer binding is zero when DrawArrays or
    512     *      one of the other drawing commands defined in section 10.4 is called,
    513     *      the result is undefined."
    514     *
    515     * In this case, let's the dst with undefined values
    516     */
    517    if (src != NULL) {
    518       if (dst_stride == src_stride) {
    519          memcpy(dst, src, size);
    520       } else {
    521          while (count--) {
    522             memcpy(dst, src, dst_stride);
    523             src += src_stride;
    524             dst += dst_stride;
    525          }
    526       }
    527    }
    528    buffer->stride = dst_stride;
    529    buffer->size = size;
    530 }
    531 
    532 void
    533 brw_prepare_vertices(struct brw_context *brw)
    534 {
    535    struct gl_context *ctx = &brw->ctx;
    536    /* BRW_NEW_VS_PROG_DATA */
    537    const struct brw_vs_prog_data *vs_prog_data =
    538       brw_vs_prog_data(brw->vs.base.prog_data);
    539    GLbitfield64 vs_inputs = vs_prog_data->inputs_read;
    540    const unsigned char *ptr = NULL;
    541    GLuint interleaved = 0;
    542    unsigned int min_index = brw->vb.min_index + brw->basevertex;
    543    unsigned int max_index = brw->vb.max_index + brw->basevertex;
    544    unsigned i;
    545    int delta, j;
    546 
    547    struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
    548    GLuint nr_uploads = 0;
    549 
    550    /* _NEW_POLYGON
    551     *
    552     * On gen6+, edge flags don't end up in the VUE (either in or out of the
    553     * VS).  Instead, they're uploaded as the last vertex element, and the data
    554     * is passed sideband through the fixed function units.  So, we need to
    555     * prepare the vertex buffer for it, but it's not present in inputs_read.
    556     */
    557    if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
    558                            ctx->Polygon.BackMode != GL_FILL)) {
    559       vs_inputs |= VERT_BIT_EDGEFLAG;
    560    }
    561 
    562    if (0)
    563       fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
    564 
    565    /* Accumulate the list of enabled arrays. */
    566    brw->vb.nr_enabled = 0;
    567    while (vs_inputs) {
    568       GLuint first = ffsll(vs_inputs) - 1;
    569       assert (first < 64);
    570       GLuint index =
    571          first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read &
    572                                                 BITFIELD64_MASK(first)), 2);
    573       struct brw_vertex_element *input = &brw->vb.inputs[index];
    574       input->is_dual_slot = (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0;
    575       vs_inputs &= ~BITFIELD64_BIT(first);
    576       if (input->is_dual_slot)
    577          vs_inputs &= ~BITFIELD64_BIT(first + 1);
    578       brw->vb.enabled[brw->vb.nr_enabled++] = input;
    579    }
    580 
    581    if (brw->vb.nr_enabled == 0)
    582       return;
    583 
    584    if (brw->vb.nr_buffers)
    585       return;
    586 
    587    /* The range of data in a given buffer represented as [min, max) */
    588    struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
    589    uint32_t buffer_range_start[VERT_ATTRIB_MAX];
    590    uint32_t buffer_range_end[VERT_ATTRIB_MAX];
    591 
    592    for (i = j = 0; i < brw->vb.nr_enabled; i++) {
    593       struct brw_vertex_element *input = brw->vb.enabled[i];
    594       const struct gl_vertex_array *glarray = input->glarray;
    595 
    596       if (_mesa_is_bufferobj(glarray->BufferObj)) {
    597 	 struct intel_buffer_object *intel_buffer =
    598 	    intel_buffer_object(glarray->BufferObj);
    599 
    600          const uint32_t offset = (uintptr_t)glarray->Ptr;
    601 
    602          /* Start with the worst case */
    603          uint32_t start = 0;
    604          uint32_t range = intel_buffer->Base.Size;
    605          if (glarray->InstanceDivisor) {
    606             if (brw->num_instances) {
    607                start = offset + glarray->StrideB * brw->baseinstance;
    608                range = (glarray->StrideB * ((brw->num_instances - 1) /
    609                                             glarray->InstanceDivisor) +
    610                         glarray->_ElementSize);
    611             }
    612          } else {
    613             if (brw->vb.index_bounds_valid) {
    614                start = offset + min_index * glarray->StrideB;
    615                range = (glarray->StrideB * (max_index - min_index) +
    616                         glarray->_ElementSize);
    617             }
    618          }
    619 
    620 	 /* If we have a VB set to be uploaded for this buffer object
    621 	  * already, reuse that VB state so that we emit fewer
    622 	  * relocations.
    623 	  */
    624 	 unsigned k;
    625 	 for (k = 0; k < i; k++) {
    626 	    const struct gl_vertex_array *other = brw->vb.enabled[k]->glarray;
    627 	    if (glarray->BufferObj == other->BufferObj &&
    628 		glarray->StrideB == other->StrideB &&
    629 		glarray->InstanceDivisor == other->InstanceDivisor &&
    630 		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
    631 	    {
    632 	       input->buffer = brw->vb.enabled[k]->buffer;
    633 	       input->offset = glarray->Ptr - other->Ptr;
    634 
    635                buffer_range_start[input->buffer] =
    636                   MIN2(buffer_range_start[input->buffer], start);
    637                buffer_range_end[input->buffer] =
    638                   MAX2(buffer_range_end[input->buffer], start + range);
    639 	       break;
    640 	    }
    641 	 }
    642 	 if (k == i) {
    643 	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
    644 
    645 	    /* Named buffer object: Just reference its contents directly. */
    646 	    buffer->offset = offset;
    647 	    buffer->stride = glarray->StrideB;
    648 	    buffer->step_rate = glarray->InstanceDivisor;
    649             buffer->size = glarray->BufferObj->Size - offset;
    650 
    651             enabled_buffer[j] = intel_buffer;
    652             buffer_range_start[j] = start;
    653             buffer_range_end[j] = start + range;
    654 
    655 	    input->buffer = j++;
    656 	    input->offset = 0;
    657 	 }
    658       } else {
    659 	 /* Queue the buffer object up to be uploaded in the next pass,
    660 	  * when we've decided if we're doing interleaved or not.
    661 	  */
    662 	 if (nr_uploads == 0) {
    663 	    interleaved = glarray->StrideB;
    664 	    ptr = glarray->Ptr;
    665 	 }
    666 	 else if (interleaved != glarray->StrideB ||
    667                   glarray->Ptr < ptr ||
    668                   (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
    669 	 {
    670             /* If our stride is different from the first attribute's stride,
    671              * or if the first attribute's stride didn't cover our element,
    672              * disable the interleaved upload optimization.  The second case
    673              * can most commonly occur in cases where there is a single vertex
    674              * and, for example, the data is stored on the application's
    675              * stack.
    676              *
    677              * NOTE: This will also disable the optimization in cases where
    678              * the data is in a different order than the array indices.
    679              * Something like:
    680              *
    681              *     float data[...];
    682              *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
    683              *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
    684              */
    685 	    interleaved = 0;
    686 	 }
    687 
    688 	 upload[nr_uploads++] = input;
    689       }
    690    }
    691 
    692    /* Now that we've set up all of the buffers, we walk through and reference
    693     * each of them.  We do this late so that we get the right size in each
    694     * buffer and don't reference too little data.
    695     */
    696    for (i = 0; i < j; i++) {
    697       struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
    698       if (buffer->bo)
    699          continue;
    700 
    701       const uint32_t start = buffer_range_start[i];
    702       const uint32_t range = buffer_range_end[i] - buffer_range_start[i];
    703 
    704       buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range);
    705       drm_intel_bo_reference(buffer->bo);
    706    }
    707 
    708    /* If we need to upload all the arrays, then we can trim those arrays to
    709     * only the used elements [min_index, max_index] so long as we adjust all
    710     * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    711     */
    712    brw->vb.start_vertex_bias = 0;
    713    delta = min_index;
    714    if (nr_uploads == brw->vb.nr_enabled) {
    715       brw->vb.start_vertex_bias = -delta;
    716       delta = 0;
    717    }
    718 
    719    /* Handle any arrays to be uploaded. */
    720    if (nr_uploads > 1) {
    721       if (interleaved) {
    722 	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
    723 	 /* All uploads are interleaved, so upload the arrays together as
    724 	  * interleaved.  First, upload the contents and set up upload[0].
    725 	  */
    726 	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
    727 				 buffer, interleaved);
    728 	 buffer->offset -= delta * interleaved;
    729          buffer->size += delta * interleaved;
    730 
    731 	 for (i = 0; i < nr_uploads; i++) {
    732 	    /* Then, just point upload[i] at upload[0]'s buffer. */
    733 	    upload[i]->offset =
    734 	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
    735 	    upload[i]->buffer = j;
    736 	 }
    737 	 j++;
    738 
    739 	 nr_uploads = 0;
    740       }
    741    }
    742    /* Upload non-interleaved arrays */
    743    for (i = 0; i < nr_uploads; i++) {
    744       struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
    745       if (upload[i]->glarray->InstanceDivisor == 0) {
    746          copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
    747                                  buffer, upload[i]->glarray->_ElementSize);
    748       } else {
    749          /* This is an instanced attribute, since its InstanceDivisor
    750           * is not zero. Therefore, its data will be stepped after the
    751           * instanced draw has been run InstanceDivisor times.
    752           */
    753          uint32_t instanced_attr_max_index =
    754             (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
    755          copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
    756                                  buffer, upload[i]->glarray->_ElementSize);
    757       }
    758       buffer->offset -= delta * buffer->stride;
    759       buffer->size += delta * buffer->stride;
    760       buffer->step_rate = upload[i]->glarray->InstanceDivisor;
    761       upload[i]->buffer = j++;
    762       upload[i]->offset = 0;
    763    }
    764 
    765    brw->vb.nr_buffers = j;
    766 }
    767 
    768 void
    769 brw_prepare_shader_draw_parameters(struct brw_context *brw)
    770 {
    771    const struct brw_vs_prog_data *vs_prog_data =
    772       brw_vs_prog_data(brw->vs.base.prog_data);
    773 
    774    /* For non-indirect draws, upload gl_BaseVertex. */
    775    if ((vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) &&
    776        brw->draw.draw_params_bo == NULL) {
    777       intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
    778 			&brw->draw.draw_params_bo,
    779                         &brw->draw.draw_params_offset);
    780    }
    781 
    782    if (vs_prog_data->uses_drawid) {
    783       intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
    784                         &brw->draw.draw_id_bo,
    785                         &brw->draw.draw_id_offset);
    786    }
    787 }
    788 
    789 /**
    790  * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
    791  */
    792 uint32_t *
    793 brw_emit_vertex_buffer_state(struct brw_context *brw,
    794                              unsigned buffer_nr,
    795                              drm_intel_bo *bo,
    796                              unsigned start_offset,
    797                              unsigned end_offset,
    798                              unsigned stride,
    799                              unsigned step_rate,
    800                              uint32_t *__map)
    801 {
    802    struct gl_context *ctx = &brw->ctx;
    803    uint32_t dw0;
    804 
    805    if (brw->gen >= 8) {
    806       dw0 = buffer_nr << GEN6_VB0_INDEX_SHIFT;
    807    } else if (brw->gen >= 6) {
    808       dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) |
    809             (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA
    810                        : GEN6_VB0_ACCESS_VERTEXDATA);
    811    } else {
    812       dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) |
    813             (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA
    814                        : BRW_VB0_ACCESS_VERTEXDATA);
    815    }
    816 
    817    if (brw->gen >= 7)
    818       dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
    819 
    820    switch (brw->gen) {
    821    case 7:
    822       dw0 |= GEN7_MOCS_L3 << 16;
    823       break;
    824    case 8:
    825       dw0 |= BDW_MOCS_WB << 16;
    826       break;
    827    case 9:
    828       dw0 |= SKL_MOCS_WB << 16;
    829       break;
    830    }
    831 
    832    WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047),
    833              "VBO stride %d too large, bad rendering may occur\n",
    834              stride);
    835    OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));
    836    if (brw->gen >= 8) {
    837       OUT_RELOC64(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
    838       /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry -
    839        *                 Vertex Fetch (VF) Stage - State
    840        *
    841        * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x
    842        * VBState.BufferPitch", the address of the byte immediately beyond the
    843        * last valid byte of the buffer is determined by
    844        * "VBState.StartingBufferAddress + VBState.BufferSize".
    845        */
    846       OUT_BATCH(end_offset - start_offset);
    847    } else if (brw->gen >= 5) {
    848       OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
    849       /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry -
    850        *                 Vertex Fetch (VF) Stage - State
    851        *
    852        *  Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x
    853        *  VBState.BufferPitch", the address of the byte immediately beyond the
    854        *  last valid byte of the buffer is determined by
    855        *  "VBState.EndAddress + 1".
    856        */
    857       OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, end_offset - 1);
    858       OUT_BATCH(step_rate);
    859    } else {
    860       OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
    861       OUT_BATCH(0);
    862       OUT_BATCH(step_rate);
    863    }
    864 
    865    return __map;
    866 }
    867 
    868 static void
    869 brw_emit_vertices(struct brw_context *brw)
    870 {
    871    GLuint i;
    872 
    873    brw_prepare_vertices(brw);
    874    brw_prepare_shader_draw_parameters(brw);
    875 
    876    brw_emit_query_begin(brw);
    877 
    878    const struct brw_vs_prog_data *vs_prog_data =
    879       brw_vs_prog_data(brw->vs.base.prog_data);
    880 
    881    unsigned nr_elements = brw->vb.nr_enabled;
    882    if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
    883        vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
    884       ++nr_elements;
    885    if (vs_prog_data->uses_drawid)
    886       nr_elements++;
    887 
    888    /* If any of the formats of vb.enabled needs more that one upload, we need
    889     * to add it to nr_elements */
    890    unsigned extra_uploads = 0;
    891    for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
    892       struct brw_vertex_element *input = brw->vb.enabled[i];
    893       uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
    894 
    895       if (uploads_needed(format) > 1)
    896          extra_uploads++;
    897    }
    898    nr_elements += extra_uploads;
    899 
    900    /* If the VS doesn't read any inputs (calculating vertex position from
    901     * a state variable for some reason, for example), emit a single pad
    902     * VERTEX_ELEMENT struct and bail.
    903     *
    904     * The stale VB state stays in place, but they don't do anything unless
    905     * a VE loads from them.
    906     */
    907    if (nr_elements == 0) {
    908       BEGIN_BATCH(3);
    909       OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
    910       if (brw->gen >= 6) {
    911 	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
    912 		   GEN6_VE0_VALID |
    913 		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
    914 		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
    915       } else {
    916 	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
    917 		   BRW_VE0_VALID |
    918 		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
    919 		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
    920       }
    921       OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
    922 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
    923 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
    924 		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
    925       ADVANCE_BATCH();
    926       return;
    927    }
    928 
    929    /* Now emit VB and VEP state packets.
    930     */
    931 
    932    const bool uses_draw_params =
    933       vs_prog_data->uses_basevertex ||
    934       vs_prog_data->uses_baseinstance;
    935    const unsigned nr_buffers = brw->vb.nr_buffers +
    936       uses_draw_params + vs_prog_data->uses_drawid;
    937 
    938    if (nr_buffers) {
    939       if (brw->gen >= 6) {
    940 	 assert(nr_buffers <= 33);
    941       } else {
    942 	 assert(nr_buffers <= 17);
    943       }
    944 
    945       BEGIN_BATCH(1 + 4 * nr_buffers);
    946       OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
    947       for (i = 0; i < brw->vb.nr_buffers; i++) {
    948 	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
    949          /* Prior to Haswell and Bay Trail we have to use 4-component formats
    950           * to fake 3-component ones.  In particular, we do this for
    951           * half-float and 8 and 16-bit integer formats.  This means that the
    952           * vertex element may poke over the end of the buffer by 2 bytes.
    953           */
    954          unsigned padding =
    955             (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2;
    956          EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset,
    957                                   buffer->offset + buffer->size + padding,
    958                                   buffer->stride, buffer->step_rate);
    959 
    960       }
    961 
    962       if (uses_draw_params) {
    963          EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
    964                                   brw->draw.draw_params_bo,
    965                                   brw->draw.draw_params_offset,
    966                                   brw->draw.draw_params_bo->size,
    967                                   0,  /* stride */
    968                                   0); /* step rate */
    969       }
    970 
    971       if (vs_prog_data->uses_drawid) {
    972          EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1,
    973                                   brw->draw.draw_id_bo,
    974                                   brw->draw.draw_id_offset,
    975                                   brw->draw.draw_id_bo->size,
    976                                   0,  /* stride */
    977                                   0); /* step rate */
    978       }
    979 
    980       ADVANCE_BATCH();
    981    }
    982 
    983    /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
    984     * for VertexID/InstanceID.
    985     */
    986    if (brw->gen >= 6) {
    987       assert(nr_elements <= 34);
    988    } else {
    989       assert(nr_elements <= 18);
    990    }
    991 
    992    struct brw_vertex_element *gen6_edgeflag_input = NULL;
    993 
    994    BEGIN_BATCH(1 + nr_elements * 2);
    995    OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
    996    for (i = 0; i < brw->vb.nr_enabled; i++) {
    997       struct brw_vertex_element *input = brw->vb.enabled[i];
    998       uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
    999       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
   1000       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
   1001       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
   1002       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
   1003       unsigned num_uploads = 1;
   1004       unsigned c;
   1005 
   1006       num_uploads = uploads_needed(format);
   1007 
   1008       if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
   1009          /* Gen6+ passes edgeflag as sideband along with the vertex, instead
   1010           * of in the VUE.  We have to upload it sideband as the last vertex
   1011           * element according to the B-Spec.
   1012           */
   1013          if (brw->gen >= 6) {
   1014             gen6_edgeflag_input = input;
   1015             continue;
   1016          }
   1017       }
   1018 
   1019       for (c = 0; c < num_uploads; c++) {
   1020          uint32_t upload_format = downsize_format_if_needed(format, c);
   1021          /* If we need more that one upload, the offset stride would be 128
   1022           * bits (16 bytes), as for previous uploads we are using the full
   1023           * entry. */
   1024          unsigned int offset = input->offset + c * 16;
   1025          int size = input->glarray->Size;
   1026 
   1027          if (is_passthru_format(format))
   1028             size = upload_format_size(upload_format);
   1029 
   1030          switch (size) {
   1031          case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
   1032          case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
   1033          case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
   1034          case 3: comp3 = input->glarray->Integer
   1035                          ? BRW_VE1_COMPONENT_STORE_1_INT
   1036                          : BRW_VE1_COMPONENT_STORE_1_FLT;
   1037             break;
   1038          }
   1039 
   1040          if (brw->gen >= 6) {
   1041             OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
   1042                       GEN6_VE0_VALID |
   1043                       (upload_format << BRW_VE0_FORMAT_SHIFT) |
   1044                       (offset << BRW_VE0_SRC_OFFSET_SHIFT));
   1045          } else {
   1046             OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
   1047                       BRW_VE0_VALID |
   1048                       (upload_format << BRW_VE0_FORMAT_SHIFT) |
   1049                       (offset << BRW_VE0_SRC_OFFSET_SHIFT));
   1050          }
   1051 
   1052          if (brw->gen >= 5)
   1053             OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
   1054                       (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
   1055                       (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
   1056                       (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
   1057          else
   1058             OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
   1059                       (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
   1060                       (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
   1061                       (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
   1062                       ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   1063       }
   1064    }
   1065 
   1066    if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
   1067        vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
   1068       uint32_t dw0 = 0, dw1 = 0;
   1069       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
   1070       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
   1071       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
   1072       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
   1073 
   1074       if (vs_prog_data->uses_basevertex)
   1075          comp0 = BRW_VE1_COMPONENT_STORE_SRC;
   1076 
   1077       if (vs_prog_data->uses_baseinstance)
   1078          comp1 = BRW_VE1_COMPONENT_STORE_SRC;
   1079 
   1080       if (vs_prog_data->uses_vertexid)
   1081          comp2 = BRW_VE1_COMPONENT_STORE_VID;
   1082 
   1083       if (vs_prog_data->uses_instanceid)
   1084          comp3 = BRW_VE1_COMPONENT_STORE_IID;
   1085 
   1086       dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
   1087             (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
   1088             (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
   1089             (comp3 << BRW_VE1_COMPONENT_3_SHIFT);
   1090 
   1091       if (brw->gen >= 6) {
   1092          dw0 |= GEN6_VE0_VALID |
   1093                 brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
   1094                 BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
   1095       } else {
   1096          dw0 |= BRW_VE0_VALID |
   1097                 brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
   1098                 BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
   1099 	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
   1100       }
   1101 
   1102       /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
   1103        * the format is ignored and the value is always int.
   1104        */
   1105 
   1106       OUT_BATCH(dw0);
   1107       OUT_BATCH(dw1);
   1108    }
   1109 
   1110    if (vs_prog_data->uses_drawid) {
   1111       uint32_t dw0 = 0, dw1 = 0;
   1112 
   1113       dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
   1114             (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_1_SHIFT) |
   1115             (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_2_SHIFT) |
   1116             (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_3_SHIFT);
   1117 
   1118       if (brw->gen >= 6) {
   1119          dw0 |= GEN6_VE0_VALID |
   1120                 ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
   1121                 (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
   1122       } else {
   1123          dw0 |= BRW_VE0_VALID |
   1124                 ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) |
   1125                 (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
   1126 
   1127 	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
   1128       }
   1129 
   1130       OUT_BATCH(dw0);
   1131       OUT_BATCH(dw1);
   1132    }
   1133 
   1134    if (brw->gen >= 6 && gen6_edgeflag_input) {
   1135       uint32_t format =
   1136          brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
   1137 
   1138       OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
   1139                 GEN6_VE0_VALID |
   1140                 GEN6_VE0_EDGE_FLAG_ENABLE |
   1141                 (format << BRW_VE0_FORMAT_SHIFT) |
   1142                 (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
   1143       OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
   1144                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
   1145                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
   1146                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   1147    }
   1148 
   1149    ADVANCE_BATCH();
   1150 }
   1151 
   1152 const struct brw_tracked_state brw_vertices = {
   1153    .dirty = {
   1154       .mesa = _NEW_POLYGON,
   1155       .brw = BRW_NEW_BATCH |
   1156              BRW_NEW_BLORP |
   1157              BRW_NEW_VERTICES |
   1158              BRW_NEW_VS_PROG_DATA,
   1159    },
   1160    .emit = brw_emit_vertices,
   1161 };
   1162 
   1163 static void
   1164 brw_upload_indices(struct brw_context *brw)
   1165 {
   1166    struct gl_context *ctx = &brw->ctx;
   1167    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
   1168    GLuint ib_size;
   1169    drm_intel_bo *old_bo = brw->ib.bo;
   1170    struct gl_buffer_object *bufferobj;
   1171    GLuint offset;
   1172    GLuint ib_type_size;
   1173 
   1174    if (index_buffer == NULL)
   1175       return;
   1176 
   1177    ib_type_size = _mesa_sizeof_type(index_buffer->type);
   1178    ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
   1179                                    index_buffer->obj->Size;
   1180    bufferobj = index_buffer->obj;
   1181 
   1182    /* Turn into a proper VBO:
   1183     */
   1184    if (!_mesa_is_bufferobj(bufferobj)) {
   1185       /* Get new bufferobj, offset:
   1186        */
   1187       intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size,
   1188 			&brw->ib.bo, &offset);
   1189       brw->ib.size = brw->ib.bo->size;
   1190    } else {
   1191       offset = (GLuint) (unsigned long) index_buffer->ptr;
   1192 
   1193       /* If the index buffer isn't aligned to its element size, we have to
   1194        * rebase it into a temporary.
   1195        */
   1196       if ((ib_type_size - 1) & offset) {
   1197          perf_debug("copying index buffer to a temporary to work around "
   1198                     "misaligned offset %d\n", offset);
   1199 
   1200          GLubyte *map = ctx->Driver.MapBufferRange(ctx,
   1201                                                    offset,
   1202                                                    ib_size,
   1203                                                    GL_MAP_READ_BIT,
   1204                                                    bufferobj,
   1205                                                    MAP_INTERNAL);
   1206 
   1207          intel_upload_data(brw, map, ib_size, ib_type_size,
   1208                            &brw->ib.bo, &offset);
   1209          brw->ib.size = brw->ib.bo->size;
   1210 
   1211          ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL);
   1212       } else {
   1213          drm_intel_bo *bo =
   1214             intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj),
   1215                                    offset, ib_size);
   1216          if (bo != brw->ib.bo) {
   1217             drm_intel_bo_unreference(brw->ib.bo);
   1218             brw->ib.bo = bo;
   1219             brw->ib.size = bufferobj->Size;
   1220             drm_intel_bo_reference(bo);
   1221          }
   1222       }
   1223    }
   1224 
   1225    /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
   1226     * the index buffer state when we're just moving the start index
   1227     * of our drawing.
   1228     */
   1229    brw->ib.start_vertex_offset = offset / ib_type_size;
   1230 
   1231    if (brw->ib.bo != old_bo)
   1232       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
   1233 
   1234    if (index_buffer->type != brw->ib.type) {
   1235       brw->ib.type = index_buffer->type;
   1236       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
   1237    }
   1238 }
   1239 
   1240 const struct brw_tracked_state brw_indices = {
   1241    .dirty = {
   1242       .mesa = 0,
   1243       .brw = BRW_NEW_BLORP |
   1244              BRW_NEW_INDICES,
   1245    },
   1246    .emit = brw_upload_indices,
   1247 };
   1248 
   1249 static void
   1250 brw_emit_index_buffer(struct brw_context *brw)
   1251 {
   1252    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
   1253    GLuint cut_index_setting;
   1254 
   1255    if (index_buffer == NULL)
   1256       return;
   1257 
   1258    if (brw->prim_restart.enable_cut_index && !brw->is_haswell) {
   1259       cut_index_setting = BRW_CUT_INDEX_ENABLE;
   1260    } else {
   1261       cut_index_setting = 0;
   1262    }
   1263 
   1264    BEGIN_BATCH(3);
   1265    OUT_BATCH(CMD_INDEX_BUFFER << 16 |
   1266              cut_index_setting |
   1267              brw_get_index_type(index_buffer->type) |
   1268              1);
   1269    OUT_RELOC(brw->ib.bo,
   1270              I915_GEM_DOMAIN_VERTEX, 0,
   1271              0);
   1272    OUT_RELOC(brw->ib.bo,
   1273              I915_GEM_DOMAIN_VERTEX, 0,
   1274 	     brw->ib.size - 1);
   1275    ADVANCE_BATCH();
   1276 }
   1277 
   1278 const struct brw_tracked_state brw_index_buffer = {
   1279    .dirty = {
   1280       .mesa = 0,
   1281       .brw = BRW_NEW_BATCH |
   1282              BRW_NEW_BLORP |
   1283              BRW_NEW_INDEX_BUFFER,
   1284    },
   1285    .emit = brw_emit_index_buffer,
   1286 };
   1287