Home | History | Annotate | Download | only in util
      1 /**************************************************************************
      2  *
      3  * Copyright 2011 Marek Olk <maraeo (at) gmail.com>
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * This module uploads user buffers and translates the vertex buffers which
     30  * contain incompatible vertices (i.e. not supported by the driver/hardware)
     31  * into compatible ones, based on the Gallium CAPs.
     32  *
     33  * It does not upload index buffers.
     34  *
     35  * The module heavily uses bitmasks to represent per-buffer and
     36  * per-vertex-element flags to avoid looping over the list of buffers just
     37  * to see if there's a non-zero stride, or user buffer, or unsupported format,
     38  * etc.
     39  *
     40  * There are 3 categories of vertex elements, which are processed separately:
     41  * - per-vertex attribs (stride != 0, instance_divisor == 0)
     42  * - instanced attribs (stride != 0, instance_divisor > 0)
     43  * - constant attribs (stride == 0)
     44  *
     45  * All needed uploads and translations are performed every draw command, but
     46  * only the subset of vertices needed for that draw command is uploaded or
     47  * translated. (the module never translates whole buffers)
     48  *
     49  *
     50  * The module consists of two main parts:
     51  *
     52  *
     53  * 1) Translate (u_vbuf_translate_begin/end)
     54  *
     55  * This is pretty much a vertex fetch fallback. It translates vertices from
     56  * one vertex buffer to another in an unused vertex buffer slot. It does
     57  * whatever is needed to make the vertices readable by the hardware (changes
     58  * vertex formats and aligns offsets and strides). The translate module is
     59  * used here.
     60  *
     61  * Each of the 3 categories is translated to a separate buffer.
     62  * Only the [min_index, max_index] range is translated. For instanced attribs,
     63  * the range is [start_instance, start_instance+instance_count]. For constant
     64  * attribs, the range is [0, 1].
     65  *
     66  *
     67  * 2) User buffer uploading (u_vbuf_upload_buffers)
     68  *
     69  * Only the [min_index, max_index] range is uploaded (just like Translate)
     70  * with a single memcpy.
     71  *
     72  * This method works best for non-indexed draw operations or indexed draw
     73  * operations where the [min_index, max_index] range is not being way bigger
     74  * than the vertex count.
     75  *
     76  * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
     77  * the per-vertex attribs are uploaded via the translate module, all packed
     78  * into one vertex buffer, and the indexed draw call is turned into
     79  * a non-indexed one in the process. This adds additional complexity
     80  * to the translate part, but it prevents bad apps from bringing your frame
     81  * rate down.
     82  *
     83  *
     84  * If there is nothing to do, it forwards every command to the driver.
     85  * The module also has its own CSO cache of vertex element states.
     86  */
     87 
     88 #include "util/u_vbuf.h"
     89 
     90 #include "util/u_dump.h"
     91 #include "util/u_format.h"
     92 #include "util/u_inlines.h"
     93 #include "util/u_memory.h"
     94 #include "util/u_upload_mgr.h"
     95 #include "translate/translate.h"
     96 #include "translate/translate_cache.h"
     97 #include "cso_cache/cso_cache.h"
     98 #include "cso_cache/cso_hash.h"
     99 
    100 struct u_vbuf_elements {
    101    unsigned count;
    102    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
    103 
    104    unsigned src_format_size[PIPE_MAX_ATTRIBS];
    105 
    106    /* If (velem[i].src_format != native_format[i]), the vertex buffer
    107     * referenced by the vertex element cannot be used for rendering and
    108     * its vertex data must be translated to native_format[i]. */
    109    enum pipe_format native_format[PIPE_MAX_ATTRIBS];
    110    unsigned native_format_size[PIPE_MAX_ATTRIBS];
    111 
    112    /* Which buffers are used by the vertex element state. */
    113    uint32_t used_vb_mask;
    114    /* This might mean two things:
    115     * - src_format != native_format, as discussed above.
    116     * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
    117    uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
    118    /* Which buffer has at least one vertex element referencing it
    119     * incompatible. */
    120    uint32_t incompatible_vb_mask_any;
    121    /* Which buffer has all vertex elements referencing it incompatible. */
    122    uint32_t incompatible_vb_mask_all;
    123    /* Which buffer has at least one vertex element referencing it
    124     * compatible. */
    125    uint32_t compatible_vb_mask_any;
    126    /* Which buffer has all vertex elements referencing it compatible. */
    127    uint32_t compatible_vb_mask_all;
    128 
    129    /* Which buffer has at least one vertex element referencing it
    130     * non-instanced. */
    131    uint32_t noninstance_vb_mask_any;
    132 
    133    void *driver_cso;
    134 };
    135 
    136 enum {
    137    VB_VERTEX = 0,
    138    VB_INSTANCE = 1,
    139    VB_CONST = 2,
    140    VB_NUM = 3
    141 };
    142 
    143 struct u_vbuf {
    144    struct u_vbuf_caps caps;
    145    bool has_signed_vb_offset;
    146 
    147    struct pipe_context *pipe;
    148    struct translate_cache *translate_cache;
    149    struct cso_cache *cso_cache;
    150 
    151    /* This is what was set in set_vertex_buffers.
    152     * May contain user buffers. */
    153    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
    154    uint32_t enabled_vb_mask;
    155 
    156    /* Saved vertex buffer. */
    157    unsigned aux_vertex_buffer_slot;
    158    struct pipe_vertex_buffer aux_vertex_buffer_saved;
    159 
    160    /* Vertex buffers for the driver.
    161     * There are usually no user buffers. */
    162    struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
    163    uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
    164                                    call of set_vertex_buffers */
    165 
    166    /* Vertex elements. */
    167    struct u_vbuf_elements *ve, *ve_saved;
    168 
    169    /* Vertex elements used for the translate fallback. */
    170    struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS];
    171    /* If non-NULL, this is a vertex element state used for the translate
    172     * fallback and therefore used for rendering too. */
    173    boolean using_translate;
    174    /* The vertex buffer slot index where translated vertices have been
    175     * stored in. */
    176    unsigned fallback_vbs[VB_NUM];
    177 
    178    /* Which buffer is a user buffer. */
    179    uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
    180    /* Which buffer is incompatible (unaligned). */
    181    uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
    182    /* Which buffer has a non-zero stride. */
    183    uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
    184 };
    185 
    186 static void *
    187 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
    188                               const struct pipe_vertex_element *attribs);
    189 static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso);
    190 
    191 static const struct {
    192    enum pipe_format from, to;
    193 } vbuf_format_fallbacks[] = {
    194    { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
    195    { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
    196    { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
    197    { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    198    { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
    199    { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
    200    { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
    201    { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    202    { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
    203    { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
    204    { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
    205    { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    206    { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
    207    { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
    208    { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
    209    { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    210    { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
    211    { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
    212    { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
    213    { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    214    { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
    215    { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
    216    { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
    217    { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
    218    { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
    219    { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
    220    { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
    221    { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
    222    { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
    223    { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
    224    { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
    225    { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    226    { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
    227    { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
    228    { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
    229    { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    230    { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
    231    { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
    232    { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
    233    { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
    234    { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
    235    { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
    236    { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
    237    { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
    238    { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
    239    { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
    240    { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
    241    { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
    242    { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
    243    { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
    244    { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
    245    { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
    246    { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
    247    { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
    248    { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
    249    { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
    250    { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
    251    { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
    252    { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
    253    { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
    254 };
    255 
    256 boolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
    257                         unsigned flags)
    258 {
    259    unsigned i;
    260    boolean fallback = FALSE;
    261 
    262    /* I'd rather have a bitfield of which formats are supported and a static
    263     * table of the translations indexed by format, but since we don't have C99
    264     * we can't easily make a sparsely-populated table indexed by format.  So,
    265     * we construct the sparse table here.
    266     */
    267    for (i = 0; i < PIPE_FORMAT_COUNT; i++)
    268       caps->format_translation[i] = i;
    269 
    270    for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
    271       enum pipe_format format = vbuf_format_fallbacks[i].from;
    272 
    273       if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0,
    274                                        PIPE_BIND_VERTEX_BUFFER)) {
    275          caps->format_translation[format] = vbuf_format_fallbacks[i].to;
    276          fallback = TRUE;
    277       }
    278    }
    279 
    280    caps->buffer_offset_unaligned =
    281       !screen->get_param(screen,
    282                          PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
    283    caps->buffer_stride_unaligned =
    284      !screen->get_param(screen,
    285                         PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
    286    caps->velem_src_offset_unaligned =
    287       !screen->get_param(screen,
    288                          PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
    289    caps->user_vertex_buffers =
    290       screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
    291 
    292    if (!caps->buffer_offset_unaligned ||
    293        !caps->buffer_stride_unaligned ||
    294        !caps->velem_src_offset_unaligned ||
    295        (!(flags & U_VBUF_FLAG_NO_USER_VBOS) && !caps->user_vertex_buffers)) {
    296       fallback = TRUE;
    297    }
    298 
    299    return fallback;
    300 }
    301 
    302 struct u_vbuf *
    303 u_vbuf_create(struct pipe_context *pipe,
    304               struct u_vbuf_caps *caps, unsigned aux_vertex_buffer_index)
    305 {
    306    struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
    307 
    308    mgr->caps = *caps;
    309    mgr->aux_vertex_buffer_slot = aux_vertex_buffer_index;
    310    mgr->pipe = pipe;
    311    mgr->cso_cache = cso_cache_create();
    312    mgr->translate_cache = translate_cache_create();
    313    memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
    314 
    315    mgr->has_signed_vb_offset =
    316       pipe->screen->get_param(pipe->screen,
    317                               PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET);
    318 
    319    return mgr;
    320 }
    321 
    322 /* u_vbuf uses its own caching for vertex elements, because it needs to keep
    323  * its own preprocessed state per vertex element CSO. */
    324 static struct u_vbuf_elements *
    325 u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count,
    326                                     const struct pipe_vertex_element *states)
    327 {
    328    struct pipe_context *pipe = mgr->pipe;
    329    unsigned key_size, hash_key;
    330    struct cso_hash_iter iter;
    331    struct u_vbuf_elements *ve;
    332    struct cso_velems_state velems_state;
    333 
    334    /* need to include the count into the stored state data too. */
    335    key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
    336    velems_state.count = count;
    337    memcpy(velems_state.velems, states,
    338           sizeof(struct pipe_vertex_element) * count);
    339    hash_key = cso_construct_key((void*)&velems_state, key_size);
    340    iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
    341                                   (void*)&velems_state, key_size);
    342 
    343    if (cso_hash_iter_is_null(iter)) {
    344       struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
    345       memcpy(&cso->state, &velems_state, key_size);
    346       cso->data = u_vbuf_create_vertex_elements(mgr, count, states);
    347       cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements;
    348       cso->context = (void*)mgr;
    349 
    350       iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
    351       ve = cso->data;
    352    } else {
    353       ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
    354    }
    355 
    356    assert(ve);
    357 
    358    if (ve != mgr->ve)
    359       pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
    360 
    361    return ve;
    362 }
    363 
    364 void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
    365                                const struct pipe_vertex_element *states)
    366 {
    367    mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
    368 }
    369 
    370 void u_vbuf_destroy(struct u_vbuf *mgr)
    371 {
    372    struct pipe_screen *screen = mgr->pipe->screen;
    373    unsigned i;
    374    const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
    375                                                     PIPE_SHADER_CAP_MAX_INPUTS);
    376 
    377    mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);
    378 
    379    for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
    380       pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]);
    381    for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
    382       pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]);
    383 
    384    pipe_vertex_buffer_unreference(&mgr->aux_vertex_buffer_saved);
    385 
    386    translate_cache_destroy(mgr->translate_cache);
    387    cso_cache_delete(mgr->cso_cache);
    388    FREE(mgr);
    389 }
    390 
    391 static enum pipe_error
    392 u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
    393                          const struct pipe_draw_info *info,
    394                          unsigned vb_mask, unsigned out_vb,
    395                          int start_vertex, unsigned num_vertices,
    396                          int min_index, boolean unroll_indices)
    397 {
    398    struct translate *tr;
    399    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
    400    struct pipe_resource *out_buffer = NULL;
    401    uint8_t *out_map;
    402    unsigned out_offset, mask;
    403 
    404    /* Get a translate object. */
    405    tr = translate_cache_find(mgr->translate_cache, key);
    406 
    407    /* Map buffers we want to translate. */
    408    mask = vb_mask;
    409    while (mask) {
    410       struct pipe_vertex_buffer *vb;
    411       unsigned offset;
    412       uint8_t *map;
    413       unsigned i = u_bit_scan(&mask);
    414 
    415       vb = &mgr->vertex_buffer[i];
    416       offset = vb->buffer_offset + vb->stride * start_vertex;
    417 
    418       if (vb->is_user_buffer) {
    419          map = (uint8_t*)vb->buffer.user + offset;
    420       } else {
    421          unsigned size = vb->stride ? num_vertices * vb->stride
    422                                     : sizeof(double)*4;
    423 
    424          if (offset + size > vb->buffer.resource->width0) {
    425             /* Don't try to map past end of buffer.  This often happens when
    426              * we're translating an attribute that's at offset > 0 from the
    427              * start of the vertex.  If we'd subtract attrib's offset from
    428              * the size, this probably wouldn't happen.
    429              */
    430             size = vb->buffer.resource->width0 - offset;
    431 
    432             /* Also adjust num_vertices.  A common user error is to call
    433              * glDrawRangeElements() with incorrect 'end' argument.  The 'end
    434              * value should be the max index value, but people often
    435              * accidentally add one to this value.  This adjustment avoids
    436              * crashing (by reading past the end of a hardware buffer mapping)
    437              * when people do that.
    438              */
    439             num_vertices = (size + vb->stride - 1) / vb->stride;
    440          }
    441 
    442          map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size,
    443                                      PIPE_TRANSFER_READ, &vb_transfer[i]);
    444       }
    445 
    446       /* Subtract min_index so that indexing with the index buffer works. */
    447       if (unroll_indices) {
    448          map -= (ptrdiff_t)vb->stride * min_index;
    449       }
    450 
    451       tr->set_buffer(tr, i, map, vb->stride, ~0);
    452    }
    453 
    454    /* Translate. */
    455    if (unroll_indices) {
    456       struct pipe_transfer *transfer = NULL;
    457       const unsigned offset = info->start * info->index_size;
    458       uint8_t *map;
    459 
    460       /* Create and map the output buffer. */
    461       u_upload_alloc(mgr->pipe->stream_uploader, 0,
    462                      key->output_stride * info->count, 4,
    463                      &out_offset, &out_buffer,
    464                      (void**)&out_map);
    465       if (!out_buffer)
    466          return PIPE_ERROR_OUT_OF_MEMORY;
    467 
    468       if (info->has_user_indices) {
    469          map = (uint8_t*)info->index.user + offset;
    470       } else {
    471          map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset,
    472                                      info->count * info->index_size,
    473                                      PIPE_TRANSFER_READ, &transfer);
    474       }
    475 
    476       switch (info->index_size) {
    477       case 4:
    478          tr->run_elts(tr, (unsigned*)map, info->count, 0, 0, out_map);
    479          break;
    480       case 2:
    481          tr->run_elts16(tr, (uint16_t*)map, info->count, 0, 0, out_map);
    482          break;
    483       case 1:
    484          tr->run_elts8(tr, map, info->count, 0, 0, out_map);
    485          break;
    486       }
    487 
    488       if (transfer) {
    489          pipe_buffer_unmap(mgr->pipe, transfer);
    490       }
    491    } else {
    492       /* Create and map the output buffer. */
    493       u_upload_alloc(mgr->pipe->stream_uploader,
    494                      mgr->has_signed_vb_offset ?
    495                         0 : key->output_stride * start_vertex,
    496                      key->output_stride * num_vertices, 4,
    497                      &out_offset, &out_buffer,
    498                      (void**)&out_map);
    499       if (!out_buffer)
    500          return PIPE_ERROR_OUT_OF_MEMORY;
    501 
    502       out_offset -= key->output_stride * start_vertex;
    503 
    504       tr->run(tr, 0, num_vertices, 0, 0, out_map);
    505    }
    506 
    507    /* Unmap all buffers. */
    508    mask = vb_mask;
    509    while (mask) {
    510       unsigned i = u_bit_scan(&mask);
    511 
    512       if (vb_transfer[i]) {
    513          pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
    514       }
    515    }
    516 
    517    /* Setup the new vertex buffer. */
    518    mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
    519    mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
    520 
    521    /* Move the buffer reference. */
    522    pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]);
    523    mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer;
    524    mgr->real_vertex_buffer[out_vb].is_user_buffer = false;
    525 
    526    return PIPE_OK;
    527 }
    528 
    529 static boolean
    530 u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
    531                                     unsigned mask[VB_NUM])
    532 {
    533    unsigned type;
    534    unsigned fallback_vbs[VB_NUM];
    535    /* Set the bit for each buffer which is incompatible, or isn't set. */
    536    uint32_t unused_vb_mask =
    537       mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
    538       ~mgr->enabled_vb_mask;
    539 
    540    memset(fallback_vbs, ~0, sizeof(fallback_vbs));
    541 
    542    /* Find free slots for each type if needed. */
    543    for (type = 0; type < VB_NUM; type++) {
    544       if (mask[type]) {
    545          uint32_t index;
    546 
    547          if (!unused_vb_mask) {
    548             return FALSE;
    549          }
    550 
    551          index = ffs(unused_vb_mask) - 1;
    552          fallback_vbs[type] = index;
    553          unused_vb_mask &= ~(1 << index);
    554          /*printf("found slot=%i for type=%i\n", index, type);*/
    555       }
    556    }
    557 
    558    for (type = 0; type < VB_NUM; type++) {
    559       if (mask[type]) {
    560          mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
    561       }
    562    }
    563 
    564    memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
    565    return TRUE;
    566 }
    567 
    568 static boolean
    569 u_vbuf_translate_begin(struct u_vbuf *mgr,
    570                        const struct pipe_draw_info *info,
    571                        int start_vertex, unsigned num_vertices,
    572                        int min_index, boolean unroll_indices)
    573 {
    574    unsigned mask[VB_NUM] = {0};
    575    struct translate_key key[VB_NUM];
    576    unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
    577    unsigned i, type;
    578    const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
    579                                          mgr->ve->used_vb_mask;
    580 
    581    const int start[VB_NUM] = {
    582       start_vertex,           /* VERTEX */
    583       info->start_instance,   /* INSTANCE */
    584       0                       /* CONST */
    585    };
    586 
    587    const unsigned num[VB_NUM] = {
    588       num_vertices,           /* VERTEX */
    589       info->instance_count,   /* INSTANCE */
    590       1                       /* CONST */
    591    };
    592 
    593    memset(key, 0, sizeof(key));
    594    memset(elem_index, ~0, sizeof(elem_index));
    595 
    596    /* See if there are vertex attribs of each type to translate and
    597     * which ones. */
    598    for (i = 0; i < mgr->ve->count; i++) {
    599       unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
    600 
    601       if (!mgr->vertex_buffer[vb_index].stride) {
    602          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
    603              !(incompatible_vb_mask & (1 << vb_index))) {
    604             continue;
    605          }
    606          mask[VB_CONST] |= 1 << vb_index;
    607       } else if (mgr->ve->ve[i].instance_divisor) {
    608          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
    609              !(incompatible_vb_mask & (1 << vb_index))) {
    610             continue;
    611          }
    612          mask[VB_INSTANCE] |= 1 << vb_index;
    613       } else {
    614          if (!unroll_indices &&
    615              !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
    616              !(incompatible_vb_mask & (1 << vb_index))) {
    617             continue;
    618          }
    619          mask[VB_VERTEX] |= 1 << vb_index;
    620       }
    621    }
    622 
    623    assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
    624 
    625    /* Find free vertex buffer slots. */
    626    if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
    627       return FALSE;
    628    }
    629 
    630    /* Initialize the translate keys. */
    631    for (i = 0; i < mgr->ve->count; i++) {
    632       struct translate_key *k;
    633       struct translate_element *te;
    634       enum pipe_format output_format = mgr->ve->native_format[i];
    635       unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
    636       bit = 1 << vb_index;
    637 
    638       if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
    639           !(incompatible_vb_mask & (1 << vb_index)) &&
    640           (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
    641          continue;
    642       }
    643 
    644       /* Set type to what we will translate.
    645        * Whether vertex, instance, or constant attribs. */
    646       for (type = 0; type < VB_NUM; type++) {
    647          if (mask[type] & bit) {
    648             break;
    649          }
    650       }
    651       assert(type < VB_NUM);
    652       if (mgr->ve->ve[i].src_format != output_format)
    653          assert(translate_is_output_format_supported(output_format));
    654       /*printf("velem=%i type=%i\n", i, type);*/
    655 
    656       /* Add the vertex element. */
    657       k = &key[type];
    658       elem_index[type][i] = k->nr_elements;
    659 
    660       te = &k->element[k->nr_elements];
    661       te->type = TRANSLATE_ELEMENT_NORMAL;
    662       te->instance_divisor = 0;
    663       te->input_buffer = vb_index;
    664       te->input_format = mgr->ve->ve[i].src_format;
    665       te->input_offset = mgr->ve->ve[i].src_offset;
    666       te->output_format = output_format;
    667       te->output_offset = k->output_stride;
    668 
    669       k->output_stride += mgr->ve->native_format_size[i];
    670       k->nr_elements++;
    671    }
    672 
    673    /* Translate buffers. */
    674    for (type = 0; type < VB_NUM; type++) {
    675       if (key[type].nr_elements) {
    676          enum pipe_error err;
    677          err = u_vbuf_translate_buffers(mgr, &key[type], info, mask[type],
    678                                         mgr->fallback_vbs[type],
    679                                         start[type], num[type], min_index,
    680                                         unroll_indices && type == VB_VERTEX);
    681          if (err != PIPE_OK)
    682             return FALSE;
    683 
    684          /* Fixup the stride for constant attribs. */
    685          if (type == VB_CONST) {
    686             mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
    687          }
    688       }
    689    }
    690 
    691    /* Setup new vertex elements. */
    692    for (i = 0; i < mgr->ve->count; i++) {
    693       for (type = 0; type < VB_NUM; type++) {
    694          if (elem_index[type][i] < key[type].nr_elements) {
    695             struct translate_element *te = &key[type].element[elem_index[type][i]];
    696             mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
    697             mgr->fallback_velems[i].src_format = te->output_format;
    698             mgr->fallback_velems[i].src_offset = te->output_offset;
    699             mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
    700 
    701             /* elem_index[type][i] can only be set for one type. */
    702             assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
    703             assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0u);
    704             break;
    705          }
    706       }
    707       /* No translating, just copy the original vertex element over. */
    708       if (type == VB_NUM) {
    709          memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i],
    710                 sizeof(struct pipe_vertex_element));
    711       }
    712    }
    713 
    714    u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count,
    715                                        mgr->fallback_velems);
    716    mgr->using_translate = TRUE;
    717    return TRUE;
    718 }
    719 
    720 static void u_vbuf_translate_end(struct u_vbuf *mgr)
    721 {
    722    unsigned i;
    723 
    724    /* Restore vertex elements. */
    725    mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
    726    mgr->using_translate = FALSE;
    727 
    728    /* Unreference the now-unused VBOs. */
    729    for (i = 0; i < VB_NUM; i++) {
    730       unsigned vb = mgr->fallback_vbs[i];
    731       if (vb != ~0u) {
    732          pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL);
    733          mgr->fallback_vbs[i] = ~0;
    734 
    735          /* This will cause the buffer to be unbound in the driver later. */
    736          mgr->dirty_real_vb_mask |= 1 << vb;
    737       }
    738    }
    739 }
    740 
    741 static void *
    742 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
    743                               const struct pipe_vertex_element *attribs)
    744 {
    745    struct pipe_context *pipe = mgr->pipe;
    746    unsigned i;
    747    struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
    748    struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
    749    uint32_t used_buffers = 0;
    750 
    751    ve->count = count;
    752 
    753    memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
    754    memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
    755 
    756    /* Set the best native format in case the original format is not
    757     * supported. */
    758    for (i = 0; i < count; i++) {
    759       enum pipe_format format = ve->ve[i].src_format;
    760 
    761       ve->src_format_size[i] = util_format_get_blocksize(format);
    762 
    763       used_buffers |= 1 << ve->ve[i].vertex_buffer_index;
    764 
    765       if (!ve->ve[i].instance_divisor) {
    766          ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
    767       }
    768 
    769       format = mgr->caps.format_translation[format];
    770 
    771       driver_attribs[i].src_format = format;
    772       ve->native_format[i] = format;
    773       ve->native_format_size[i] =
    774             util_format_get_blocksize(ve->native_format[i]);
    775 
    776       if (ve->ve[i].src_format != format ||
    777           (!mgr->caps.velem_src_offset_unaligned &&
    778            ve->ve[i].src_offset % 4 != 0)) {
    779          ve->incompatible_elem_mask |= 1 << i;
    780          ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
    781       } else {
    782          ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
    783       }
    784    }
    785 
    786    ve->used_vb_mask = used_buffers;
    787    ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
    788    ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
    789 
    790    /* Align the formats and offsets to the size of DWORD if needed. */
    791    if (!mgr->caps.velem_src_offset_unaligned) {
    792       for (i = 0; i < count; i++) {
    793          ve->native_format_size[i] = align(ve->native_format_size[i], 4);
    794          driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
    795       }
    796    }
    797 
    798    ve->driver_cso =
    799       pipe->create_vertex_elements_state(pipe, count, driver_attribs);
    800    return ve;
    801 }
    802 
    803 static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso)
    804 {
    805    struct pipe_context *pipe = mgr->pipe;
    806    struct u_vbuf_elements *ve = cso;
    807 
    808    pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
    809    FREE(ve);
    810 }
    811 
    812 void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
    813                                unsigned start_slot, unsigned count,
    814                                const struct pipe_vertex_buffer *bufs)
    815 {
    816    unsigned i;
    817    /* which buffers are enabled */
    818    uint32_t enabled_vb_mask = 0;
    819    /* which buffers are in user memory */
    820    uint32_t user_vb_mask = 0;
    821    /* which buffers are incompatible with the driver */
    822    uint32_t incompatible_vb_mask = 0;
    823    /* which buffers have a non-zero stride */
    824    uint32_t nonzero_stride_vb_mask = 0;
    825    const uint32_t mask = ~(((1ull << count) - 1) << start_slot);
    826 
    827    /* Zero out the bits we are going to rewrite completely. */
    828    mgr->user_vb_mask &= mask;
    829    mgr->incompatible_vb_mask &= mask;
    830    mgr->nonzero_stride_vb_mask &= mask;
    831    mgr->enabled_vb_mask &= mask;
    832 
    833    if (!bufs) {
    834       struct pipe_context *pipe = mgr->pipe;
    835       /* Unbind. */
    836       mgr->dirty_real_vb_mask &= mask;
    837 
    838       for (i = 0; i < count; i++) {
    839          unsigned dst_index = start_slot + i;
    840 
    841          pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
    842          pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
    843       }
    844 
    845       pipe->set_vertex_buffers(pipe, start_slot, count, NULL);
    846       return;
    847    }
    848 
    849    for (i = 0; i < count; i++) {
    850       unsigned dst_index = start_slot + i;
    851       const struct pipe_vertex_buffer *vb = &bufs[i];
    852       struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
    853       struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
    854 
    855       if (!vb->buffer.resource) {
    856          pipe_vertex_buffer_unreference(orig_vb);
    857          pipe_vertex_buffer_unreference(real_vb);
    858          continue;
    859       }
    860 
    861       pipe_vertex_buffer_reference(orig_vb, vb);
    862 
    863       if (vb->stride) {
    864          nonzero_stride_vb_mask |= 1 << dst_index;
    865       }
    866       enabled_vb_mask |= 1 << dst_index;
    867 
    868       if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
    869           (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
    870          incompatible_vb_mask |= 1 << dst_index;
    871          real_vb->buffer_offset = vb->buffer_offset;
    872          real_vb->stride = vb->stride;
    873          pipe_vertex_buffer_unreference(real_vb);
    874          real_vb->is_user_buffer = false;
    875          continue;
    876       }
    877 
    878       if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) {
    879          user_vb_mask |= 1 << dst_index;
    880          real_vb->buffer_offset = vb->buffer_offset;
    881          real_vb->stride = vb->stride;
    882          pipe_vertex_buffer_unreference(real_vb);
    883          real_vb->is_user_buffer = false;
    884          continue;
    885       }
    886 
    887       pipe_vertex_buffer_reference(real_vb, vb);
    888    }
    889 
    890    mgr->user_vb_mask |= user_vb_mask;
    891    mgr->incompatible_vb_mask |= incompatible_vb_mask;
    892    mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
    893    mgr->enabled_vb_mask |= enabled_vb_mask;
    894 
    895    /* All changed buffers are marked as dirty, even the NULL ones,
    896     * which will cause the NULL buffers to be unbound in the driver later. */
    897    mgr->dirty_real_vb_mask |= ~mask;
    898 }
    899 
    900 static enum pipe_error
    901 u_vbuf_upload_buffers(struct u_vbuf *mgr,
    902                       int start_vertex, unsigned num_vertices,
    903                       int start_instance, unsigned num_instances)
    904 {
    905    unsigned i;
    906    unsigned nr_velems = mgr->ve->count;
    907    const struct pipe_vertex_element *velems =
    908          mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve;
    909    unsigned start_offset[PIPE_MAX_ATTRIBS];
    910    unsigned end_offset[PIPE_MAX_ATTRIBS];
    911    uint32_t buffer_mask = 0;
    912 
    913    /* Determine how much data needs to be uploaded. */
    914    for (i = 0; i < nr_velems; i++) {
    915       const struct pipe_vertex_element *velem = &velems[i];
    916       unsigned index = velem->vertex_buffer_index;
    917       struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
    918       unsigned instance_div, first, size, index_bit;
    919 
    920       /* Skip the buffers generated by translate. */
    921       if (index == mgr->fallback_vbs[VB_VERTEX] ||
    922           index == mgr->fallback_vbs[VB_INSTANCE] ||
    923           index == mgr->fallback_vbs[VB_CONST]) {
    924          continue;
    925       }
    926 
    927       if (!vb->is_user_buffer) {
    928          continue;
    929       }
    930 
    931       instance_div = velem->instance_divisor;
    932       first = vb->buffer_offset + velem->src_offset;
    933 
    934       if (!vb->stride) {
    935          /* Constant attrib. */
    936          size = mgr->ve->src_format_size[i];
    937       } else if (instance_div) {
    938          /* Per-instance attrib. */
    939          unsigned count = (num_instances + instance_div - 1) / instance_div;
    940          first += vb->stride * start_instance;
    941          size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
    942       } else {
    943          /* Per-vertex attrib. */
    944          first += vb->stride * start_vertex;
    945          size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i];
    946       }
    947 
    948       index_bit = 1 << index;
    949 
    950       /* Update offsets. */
    951       if (!(buffer_mask & index_bit)) {
    952          start_offset[index] = first;
    953          end_offset[index] = first + size;
    954       } else {
    955          if (first < start_offset[index])
    956             start_offset[index] = first;
    957          if (first + size > end_offset[index])
    958             end_offset[index] = first + size;
    959       }
    960 
    961       buffer_mask |= index_bit;
    962    }
    963 
    964    /* Upload buffers. */
    965    while (buffer_mask) {
    966       unsigned start, end;
    967       struct pipe_vertex_buffer *real_vb;
    968       const uint8_t *ptr;
    969 
    970       i = u_bit_scan(&buffer_mask);
    971 
    972       start = start_offset[i];
    973       end = end_offset[i];
    974       assert(start < end);
    975 
    976       real_vb = &mgr->real_vertex_buffer[i];
    977       ptr = mgr->vertex_buffer[i].buffer.user;
    978 
    979       u_upload_data(mgr->pipe->stream_uploader,
    980                     mgr->has_signed_vb_offset ? 0 : start,
    981                     end - start, 4,
    982                     ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource);
    983       if (!real_vb->buffer.resource)
    984          return PIPE_ERROR_OUT_OF_MEMORY;
    985 
    986       real_vb->buffer_offset -= start;
    987    }
    988 
    989    return PIPE_OK;
    990 }
    991 
    992 static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
    993 {
    994    /* See if there are any per-vertex attribs which will be uploaded or
    995     * translated. Use bitmasks to get the info instead of looping over vertex
    996     * elements. */
    997    return (mgr->ve->used_vb_mask &
    998            ((mgr->user_vb_mask |
    999              mgr->incompatible_vb_mask |
   1000              mgr->ve->incompatible_vb_mask_any) &
   1001             mgr->ve->noninstance_vb_mask_any &
   1002             mgr->nonzero_stride_vb_mask)) != 0;
   1003 }
   1004 
   1005 static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
   1006 {
   1007    /* Return true if there are hw buffers which don't need to be translated.
   1008     *
   1009     * We could query whether each buffer is busy, but that would
   1010     * be way more costly than this. */
   1011    return (mgr->ve->used_vb_mask &
   1012            (~mgr->user_vb_mask &
   1013             ~mgr->incompatible_vb_mask &
   1014             mgr->ve->compatible_vb_mask_all &
   1015             mgr->ve->noninstance_vb_mask_any &
   1016             mgr->nonzero_stride_vb_mask)) != 0;
   1017 }
   1018 
   1019 static void u_vbuf_get_minmax_index(struct pipe_context *pipe,
   1020                                     const struct pipe_draw_info *info,
   1021                                     int *out_min_index, int *out_max_index)
   1022 {
   1023    struct pipe_transfer *transfer = NULL;
   1024    const void *indices;
   1025    unsigned i;
   1026 
   1027    if (info->has_user_indices) {
   1028       indices = (uint8_t*)info->index.user +
   1029                 info->start * info->index_size;
   1030    } else {
   1031       indices = pipe_buffer_map_range(pipe, info->index.resource,
   1032                                       info->start * info->index_size,
   1033                                       info->count * info->index_size,
   1034                                       PIPE_TRANSFER_READ, &transfer);
   1035    }
   1036 
   1037    switch (info->index_size) {
   1038    case 4: {
   1039       const unsigned *ui_indices = (const unsigned*)indices;
   1040       unsigned max_ui = 0;
   1041       unsigned min_ui = ~0U;
   1042       if (info->primitive_restart) {
   1043          for (i = 0; i < info->count; i++) {
   1044             if (ui_indices[i] != info->restart_index) {
   1045                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
   1046                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
   1047             }
   1048          }
   1049       }
   1050       else {
   1051          for (i = 0; i < info->count; i++) {
   1052             if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
   1053             if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
   1054          }
   1055       }
   1056       *out_min_index = min_ui;
   1057       *out_max_index = max_ui;
   1058       break;
   1059    }
   1060    case 2: {
   1061       const unsigned short *us_indices = (const unsigned short*)indices;
   1062       unsigned max_us = 0;
   1063       unsigned min_us = ~0U;
   1064       if (info->primitive_restart) {
   1065          for (i = 0; i < info->count; i++) {
   1066             if (us_indices[i] != info->restart_index) {
   1067                if (us_indices[i] > max_us) max_us = us_indices[i];
   1068                if (us_indices[i] < min_us) min_us = us_indices[i];
   1069             }
   1070          }
   1071       }
   1072       else {
   1073          for (i = 0; i < info->count; i++) {
   1074             if (us_indices[i] > max_us) max_us = us_indices[i];
   1075             if (us_indices[i] < min_us) min_us = us_indices[i];
   1076          }
   1077       }
   1078       *out_min_index = min_us;
   1079       *out_max_index = max_us;
   1080       break;
   1081    }
   1082    case 1: {
   1083       const unsigned char *ub_indices = (const unsigned char*)indices;
   1084       unsigned max_ub = 0;
   1085       unsigned min_ub = ~0U;
   1086       if (info->primitive_restart) {
   1087          for (i = 0; i < info->count; i++) {
   1088             if (ub_indices[i] != info->restart_index) {
   1089                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
   1090                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
   1091             }
   1092          }
   1093       }
   1094       else {
   1095          for (i = 0; i < info->count; i++) {
   1096             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
   1097             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
   1098          }
   1099       }
   1100       *out_min_index = min_ub;
   1101       *out_max_index = max_ub;
   1102       break;
   1103    }
   1104    default:
   1105       assert(0);
   1106       *out_min_index = 0;
   1107       *out_max_index = 0;
   1108    }
   1109 
   1110    if (transfer) {
   1111       pipe_buffer_unmap(pipe, transfer);
   1112    }
   1113 }
   1114 
   1115 static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
   1116 {
   1117    struct pipe_context *pipe = mgr->pipe;
   1118    unsigned start_slot, count;
   1119 
   1120    start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
   1121    count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
   1122 
   1123    pipe->set_vertex_buffers(pipe, start_slot, count,
   1124                             mgr->real_vertex_buffer + start_slot);
   1125    mgr->dirty_real_vb_mask = 0;
   1126 }
   1127 
   1128 void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
   1129 {
   1130    struct pipe_context *pipe = mgr->pipe;
   1131    int start_vertex, min_index;
   1132    unsigned num_vertices;
   1133    boolean unroll_indices = FALSE;
   1134    const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
   1135    uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
   1136    const uint32_t incompatible_vb_mask =
   1137       mgr->incompatible_vb_mask & used_vb_mask;
   1138    struct pipe_draw_info new_info;
   1139 
   1140    /* Normal draw. No fallback and no user buffers. */
   1141    if (!incompatible_vb_mask &&
   1142        !mgr->ve->incompatible_elem_mask &&
   1143        !user_vb_mask) {
   1144 
   1145       /* Set vertex buffers if needed. */
   1146       if (mgr->dirty_real_vb_mask & used_vb_mask) {
   1147          u_vbuf_set_driver_vertex_buffers(mgr);
   1148       }
   1149 
   1150       pipe->draw_vbo(pipe, info);
   1151       return;
   1152    }
   1153 
   1154    new_info = *info;
   1155 
   1156    /* Fallback. We need to know all the parameters. */
   1157    if (new_info.indirect) {
   1158       struct pipe_transfer *transfer = NULL;
   1159       int *data;
   1160 
   1161       if (new_info.index_size) {
   1162          data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
   1163                                       new_info.indirect->offset, 20,
   1164                                       PIPE_TRANSFER_READ, &transfer);
   1165          new_info.index_bias = data[3];
   1166          new_info.start_instance = data[4];
   1167       }
   1168       else {
   1169          data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
   1170                                       new_info.indirect->offset, 16,
   1171                                       PIPE_TRANSFER_READ, &transfer);
   1172          new_info.start_instance = data[3];
   1173       }
   1174 
   1175       new_info.count = data[0];
   1176       new_info.instance_count = data[1];
   1177       new_info.start = data[2];
   1178       pipe_buffer_unmap(pipe, transfer);
   1179       new_info.indirect = NULL;
   1180    }
   1181 
   1182    if (new_info.index_size) {
   1183       /* See if anything needs to be done for per-vertex attribs. */
   1184       if (u_vbuf_need_minmax_index(mgr)) {
   1185          int max_index;
   1186 
   1187          if (new_info.max_index != ~0u) {
   1188             min_index = new_info.min_index;
   1189             max_index = new_info.max_index;
   1190          } else {
   1191             u_vbuf_get_minmax_index(mgr->pipe, &new_info,
   1192                                     &min_index, &max_index);
   1193          }
   1194 
   1195          assert(min_index <= max_index);
   1196 
   1197          start_vertex = min_index + new_info.index_bias;
   1198          num_vertices = max_index + 1 - min_index;
   1199 
   1200          /* Primitive restart doesn't work when unrolling indices.
   1201           * We would have to break this drawing operation into several ones. */
   1202          /* Use some heuristic to see if unrolling indices improves
   1203           * performance. */
   1204          if (!new_info.primitive_restart &&
   1205              num_vertices > new_info.count*2 &&
   1206              num_vertices - new_info.count > 32 &&
   1207              !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
   1208             unroll_indices = TRUE;
   1209             user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
   1210                               mgr->ve->noninstance_vb_mask_any);
   1211          }
   1212       } else {
   1213          /* Nothing to do for per-vertex attribs. */
   1214          start_vertex = 0;
   1215          num_vertices = 0;
   1216          min_index = 0;
   1217       }
   1218    } else {
   1219       start_vertex = new_info.start;
   1220       num_vertices = new_info.count;
   1221       min_index = 0;
   1222    }
   1223 
   1224    /* Translate vertices with non-native layouts or formats. */
   1225    if (unroll_indices ||
   1226        incompatible_vb_mask ||
   1227        mgr->ve->incompatible_elem_mask) {
   1228       if (!u_vbuf_translate_begin(mgr, &new_info, start_vertex, num_vertices,
   1229                                   min_index, unroll_indices)) {
   1230          debug_warn_once("u_vbuf_translate_begin() failed");
   1231          return;
   1232       }
   1233 
   1234       if (unroll_indices) {
   1235          new_info.index_size = 0;
   1236          new_info.index_bias = 0;
   1237          new_info.min_index = 0;
   1238          new_info.max_index = new_info.count - 1;
   1239          new_info.start = 0;
   1240       }
   1241 
   1242       user_vb_mask &= ~(incompatible_vb_mask |
   1243                         mgr->ve->incompatible_vb_mask_all);
   1244    }
   1245 
   1246    /* Upload user buffers. */
   1247    if (user_vb_mask) {
   1248       if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
   1249                                 new_info.start_instance,
   1250                                 new_info.instance_count) != PIPE_OK) {
   1251          debug_warn_once("u_vbuf_upload_buffers() failed");
   1252          return;
   1253       }
   1254 
   1255       mgr->dirty_real_vb_mask |= user_vb_mask;
   1256    }
   1257 
   1258    /*
   1259    if (unroll_indices) {
   1260       printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
   1261              start_vertex, num_vertices);
   1262       util_dump_draw_info(stdout, info);
   1263       printf("\n");
   1264    }
   1265 
   1266    unsigned i;
   1267    for (i = 0; i < mgr->nr_vertex_buffers; i++) {
   1268       printf("input %i: ", i);
   1269       util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
   1270       printf("\n");
   1271    }
   1272    for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
   1273       printf("real %i: ", i);
   1274       util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
   1275       printf("\n");
   1276    }
   1277    */
   1278 
   1279    u_upload_unmap(pipe->stream_uploader);
   1280    u_vbuf_set_driver_vertex_buffers(mgr);
   1281 
   1282    pipe->draw_vbo(pipe, &new_info);
   1283 
   1284    if (mgr->using_translate) {
   1285       u_vbuf_translate_end(mgr);
   1286    }
   1287 }
   1288 
   1289 void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
   1290 {
   1291    assert(!mgr->ve_saved);
   1292    mgr->ve_saved = mgr->ve;
   1293 }
   1294 
   1295 void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
   1296 {
   1297    if (mgr->ve != mgr->ve_saved) {
   1298       struct pipe_context *pipe = mgr->pipe;
   1299 
   1300       mgr->ve = mgr->ve_saved;
   1301       pipe->bind_vertex_elements_state(pipe,
   1302                                        mgr->ve ? mgr->ve->driver_cso : NULL);
   1303    }
   1304    mgr->ve_saved = NULL;
   1305 }
   1306 
   1307 void u_vbuf_save_aux_vertex_buffer_slot(struct u_vbuf *mgr)
   1308 {
   1309    pipe_vertex_buffer_reference(&mgr->aux_vertex_buffer_saved,
   1310                            &mgr->vertex_buffer[mgr->aux_vertex_buffer_slot]);
   1311 }
   1312 
   1313 void u_vbuf_restore_aux_vertex_buffer_slot(struct u_vbuf *mgr)
   1314 {
   1315    u_vbuf_set_vertex_buffers(mgr, mgr->aux_vertex_buffer_slot, 1,
   1316                              &mgr->aux_vertex_buffer_saved);
   1317    pipe_vertex_buffer_unreference(&mgr->aux_vertex_buffer_saved);
   1318 }
   1319