Home | History | Annotate | Download | only in util
      1 /**************************************************************************
      2  *
      3  * Copyright 2011 Marek Olk <maraeo (at) gmail.com>
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * This module uploads user buffers and translates the vertex buffers which
     30  * contain incompatible vertices (i.e. not supported by the driver/hardware)
     31  * into compatible ones, based on the Gallium CAPs.
     32  *
     33  * It does not upload index buffers.
     34  *
     35  * The module heavily uses bitmasks to represent per-buffer and
     36  * per-vertex-element flags to avoid looping over the list of buffers just
     37  * to see if there's a non-zero stride, or user buffer, or unsupported format,
     38  * etc.
     39  *
     40  * There are 3 categories of vertex elements, which are processed separately:
     41  * - per-vertex attribs (stride != 0, instance_divisor == 0)
     42  * - instanced attribs (stride != 0, instance_divisor > 0)
     43  * - constant attribs (stride == 0)
     44  *
     45  * All needed uploads and translations are performed every draw command, but
     46  * only the subset of vertices needed for that draw command is uploaded or
     47  * translated. (the module never translates whole buffers)
     48  *
     49  *
     50  * The module consists of two main parts:
     51  *
     52  *
     53  * 1) Translate (u_vbuf_translate_begin/end)
     54  *
     55  * This is pretty much a vertex fetch fallback. It translates vertices from
     56  * one vertex buffer to another in an unused vertex buffer slot. It does
     57  * whatever is needed to make the vertices readable by the hardware (changes
     58  * vertex formats and aligns offsets and strides). The translate module is
     59  * used here.
     60  *
     61  * Each of the 3 categories is translated to a separate buffer.
     62  * Only the [min_index, max_index] range is translated. For instanced attribs,
     63  * the range is [start_instance, start_instance+instance_count]. For constant
     64  * attribs, the range is [0, 1].
     65  *
     66  *
     67  * 2) User buffer uploading (u_vbuf_upload_buffers)
     68  *
     69  * Only the [min_index, max_index] range is uploaded (just like Translate)
     70  * with a single memcpy.
     71  *
     72  * This method works best for non-indexed draw operations or indexed draw
     73  * operations where the [min_index, max_index] range is not being way bigger
     74  * than the vertex count.
     75  *
     76  * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
     77  * the per-vertex attribs are uploaded via the translate module, all packed
     78  * into one vertex buffer, and the indexed draw call is turned into
     79  * a non-indexed one in the process. This adds additional complexity
     80  * to the translate part, but it prevents bad apps from bringing your frame
     81  * rate down.
     82  *
     83  *
     84  * If there is nothing to do, it forwards every command to the driver.
     85  * The module also has its own CSO cache of vertex element states.
     86  */
     87 
     88 #include "util/u_vbuf.h"
     89 
     90 #include "util/u_dump.h"
     91 #include "util/u_format.h"
     92 #include "util/u_inlines.h"
     93 #include "util/u_memory.h"
     94 #include "util/u_upload_mgr.h"
     95 #include "translate/translate.h"
     96 #include "translate/translate_cache.h"
     97 #include "cso_cache/cso_cache.h"
     98 #include "cso_cache/cso_hash.h"
     99 
    100 struct u_vbuf_elements {
    101    unsigned count;
    102    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
    103 
    104    unsigned src_format_size[PIPE_MAX_ATTRIBS];
    105 
    106    /* If (velem[i].src_format != native_format[i]), the vertex buffer
    107     * referenced by the vertex element cannot be used for rendering and
    108     * its vertex data must be translated to native_format[i]. */
    109    enum pipe_format native_format[PIPE_MAX_ATTRIBS];
    110    unsigned native_format_size[PIPE_MAX_ATTRIBS];
    111 
    112    /* Which buffers are used by the vertex element state. */
    113    uint32_t used_vb_mask;
    114    /* This might mean two things:
    115     * - src_format != native_format, as discussed above.
    116     * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
    117    uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
    118    /* Which buffer has at least one vertex element referencing it
    119     * incompatible. */
    120    uint32_t incompatible_vb_mask_any;
    121    /* Which buffer has all vertex elements referencing it incompatible. */
    122    uint32_t incompatible_vb_mask_all;
    123    /* Which buffer has at least one vertex element referencing it
    124     * compatible. */
    125    uint32_t compatible_vb_mask_any;
    126    /* Which buffer has all vertex elements referencing it compatible. */
    127    uint32_t compatible_vb_mask_all;
    128 
    129    /* Which buffer has at least one vertex element referencing it
    130     * non-instanced. */
    131    uint32_t noninstance_vb_mask_any;
    132 
    133    void *driver_cso;
    134 };
    135 
    136 enum {
    137    VB_VERTEX = 0,
    138    VB_INSTANCE = 1,
    139    VB_CONST = 2,
    140    VB_NUM = 3
    141 };
    142 
    143 struct u_vbuf {
    144    struct u_vbuf_caps caps;
    145 
    146    struct pipe_context *pipe;
    147    struct translate_cache *translate_cache;
    148    struct cso_cache *cso_cache;
    149    struct u_upload_mgr *uploader;
    150 
    151    /* This is what was set in set_vertex_buffers.
    152     * May contain user buffers. */
    153    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
    154    uint32_t enabled_vb_mask;
    155 
    156    /* Saved vertex buffer. */
    157    unsigned aux_vertex_buffer_slot;
    158    struct pipe_vertex_buffer aux_vertex_buffer_saved;
    159 
    160    /* Vertex buffers for the driver.
    161     * There are usually no user buffers. */
    162    struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
    163    uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
    164                                    call of set_vertex_buffers */
    165 
    166    /* The index buffer. */
    167    struct pipe_index_buffer index_buffer;
    168 
    169    /* Vertex elements. */
    170    struct u_vbuf_elements *ve, *ve_saved;
    171 
    172    /* Vertex elements used for the translate fallback. */
    173    struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS];
    174    /* If non-NULL, this is a vertex element state used for the translate
    175     * fallback and therefore used for rendering too. */
    176    boolean using_translate;
    177    /* The vertex buffer slot index where translated vertices have been
    178     * stored in. */
    179    unsigned fallback_vbs[VB_NUM];
    180 
    181    /* Which buffer is a user buffer. */
    182    uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
    183    /* Which buffer is incompatible (unaligned). */
    184    uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
    185    /* Which buffer has a non-zero stride. */
    186    uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
    187 };
    188 
    189 static void *
    190 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
    191                               const struct pipe_vertex_element *attribs);
    192 static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso);
    193 
    194 static const struct {
    195    enum pipe_format from, to;
    196 } vbuf_format_fallbacks[] = {
    197    { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
    198    { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
    199    { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
    200    { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    201    { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
    202    { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
    203    { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
    204    { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    205    { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
    206    { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
    207    { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
    208    { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    209    { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
    210    { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
    211    { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
    212    { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    213    { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
    214    { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
    215    { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
    216    { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    217    { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
    218    { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
    219    { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
    220    { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
    221    { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
    222    { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
    223    { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
    224    { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
    225    { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
    226    { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
    227    { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
    228    { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    229    { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
    230    { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
    231    { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
    232    { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
    233    { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
    234    { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
    235    { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
    236    { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
    237    { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
    238    { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
    239    { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
    240    { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
    241    { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
    242    { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
    243    { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
    244    { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
    245    { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
    246    { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
    247    { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
    248    { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
    249    { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
    250    { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
    251    { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
    252    { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
    253    { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
    254    { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
    255    { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
    256    { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
    257 };
    258 
    259 boolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps)
    260 {
    261    unsigned i;
    262    boolean fallback = FALSE;
    263 
    264    /* I'd rather have a bitfield of which formats are supported and a static
    265     * table of the translations indexed by format, but since we don't have C99
    266     * we can't easily make a sparsely-populated table indexed by format.  So,
    267     * we construct the sparse table here.
    268     */
    269    for (i = 0; i < PIPE_FORMAT_COUNT; i++)
    270       caps->format_translation[i] = i;
    271 
    272    for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
    273       enum pipe_format format = vbuf_format_fallbacks[i].from;
    274 
    275       if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0,
    276                                        PIPE_BIND_VERTEX_BUFFER)) {
    277          caps->format_translation[format] = vbuf_format_fallbacks[i].to;
    278          fallback = TRUE;
    279       }
    280    }
    281 
    282    caps->buffer_offset_unaligned =
    283       !screen->get_param(screen,
    284                          PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
    285    caps->buffer_stride_unaligned =
    286      !screen->get_param(screen,
    287                         PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
    288    caps->velem_src_offset_unaligned =
    289       !screen->get_param(screen,
    290                          PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
    291    caps->user_vertex_buffers =
    292       screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
    293 
    294    if (!caps->buffer_offset_unaligned ||
    295        !caps->buffer_stride_unaligned ||
    296        !caps->velem_src_offset_unaligned ||
    297        !caps->user_vertex_buffers) {
    298       fallback = TRUE;
    299    }
    300 
    301    return fallback;
    302 }
    303 
    304 struct u_vbuf *
    305 u_vbuf_create(struct pipe_context *pipe,
    306               struct u_vbuf_caps *caps, unsigned aux_vertex_buffer_index)
    307 {
    308    struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
    309 
    310    mgr->caps = *caps;
    311    mgr->aux_vertex_buffer_slot = aux_vertex_buffer_index;
    312    mgr->pipe = pipe;
    313    mgr->cso_cache = cso_cache_create();
    314    mgr->translate_cache = translate_cache_create();
    315    memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
    316 
    317    mgr->uploader = u_upload_create(pipe, 1024 * 1024,
    318                                    PIPE_BIND_VERTEX_BUFFER,
    319                                    PIPE_USAGE_STREAM);
    320 
    321    return mgr;
    322 }
    323 
    324 /* u_vbuf uses its own caching for vertex elements, because it needs to keep
    325  * its own preprocessed state per vertex element CSO. */
    326 static struct u_vbuf_elements *
    327 u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count,
    328                                     const struct pipe_vertex_element *states)
    329 {
    330    struct pipe_context *pipe = mgr->pipe;
    331    unsigned key_size, hash_key;
    332    struct cso_hash_iter iter;
    333    struct u_vbuf_elements *ve;
    334    struct cso_velems_state velems_state;
    335 
    336    /* need to include the count into the stored state data too. */
    337    key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
    338    velems_state.count = count;
    339    memcpy(velems_state.velems, states,
    340           sizeof(struct pipe_vertex_element) * count);
    341    hash_key = cso_construct_key((void*)&velems_state, key_size);
    342    iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
    343                                   (void*)&velems_state, key_size);
    344 
    345    if (cso_hash_iter_is_null(iter)) {
    346       struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
    347       memcpy(&cso->state, &velems_state, key_size);
    348       cso->data = u_vbuf_create_vertex_elements(mgr, count, states);
    349       cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements;
    350       cso->context = (void*)mgr;
    351 
    352       iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
    353       ve = cso->data;
    354    } else {
    355       ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
    356    }
    357 
    358    assert(ve);
    359 
    360    if (ve != mgr->ve)
    361       pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
    362 
    363    return ve;
    364 }
    365 
    366 void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
    367                                const struct pipe_vertex_element *states)
    368 {
    369    mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
    370 }
    371 
    372 void u_vbuf_destroy(struct u_vbuf *mgr)
    373 {
    374    struct pipe_screen *screen = mgr->pipe->screen;
    375    unsigned i;
    376    unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
    377                                               PIPE_SHADER_CAP_MAX_INPUTS);
    378 
    379    mgr->pipe->set_index_buffer(mgr->pipe, NULL);
    380    pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
    381 
    382    mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);
    383 
    384    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
    385       pipe_resource_reference(&mgr->vertex_buffer[i].buffer, NULL);
    386    }
    387    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
    388       pipe_resource_reference(&mgr->real_vertex_buffer[i].buffer, NULL);
    389    }
    390    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL);
    391 
    392    translate_cache_destroy(mgr->translate_cache);
    393    u_upload_destroy(mgr->uploader);
    394    cso_cache_delete(mgr->cso_cache);
    395    FREE(mgr);
    396 }
    397 
    398 static enum pipe_error
    399 u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
    400                          unsigned vb_mask, unsigned out_vb,
    401                          int start_vertex, unsigned num_vertices,
    402                          int start_index, unsigned num_indices, int min_index,
    403                          boolean unroll_indices)
    404 {
    405    struct translate *tr;
    406    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
    407    struct pipe_resource *out_buffer = NULL;
    408    uint8_t *out_map;
    409    unsigned out_offset, mask;
    410 
    411    /* Get a translate object. */
    412    tr = translate_cache_find(mgr->translate_cache, key);
    413 
    414    /* Map buffers we want to translate. */
    415    mask = vb_mask;
    416    while (mask) {
    417       struct pipe_vertex_buffer *vb;
    418       unsigned offset;
    419       uint8_t *map;
    420       unsigned i = u_bit_scan(&mask);
    421 
    422       vb = &mgr->vertex_buffer[i];
    423       offset = vb->buffer_offset + vb->stride * start_vertex;
    424 
    425       if (vb->user_buffer) {
    426          map = (uint8_t*)vb->user_buffer + offset;
    427       } else {
    428          unsigned size = vb->stride ? num_vertices * vb->stride
    429                                     : sizeof(double)*4;
    430 
    431          if (offset+size > vb->buffer->width0) {
    432             size = vb->buffer->width0 - offset;
    433          }
    434 
    435          map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size,
    436                                      PIPE_TRANSFER_READ, &vb_transfer[i]);
    437       }
    438 
    439       /* Subtract min_index so that indexing with the index buffer works. */
    440       if (unroll_indices) {
    441          map -= (ptrdiff_t)vb->stride * min_index;
    442       }
    443 
    444       tr->set_buffer(tr, i, map, vb->stride, ~0);
    445    }
    446 
    447    /* Translate. */
    448    if (unroll_indices) {
    449       struct pipe_index_buffer *ib = &mgr->index_buffer;
    450       struct pipe_transfer *transfer = NULL;
    451       unsigned offset = ib->offset + start_index * ib->index_size;
    452       uint8_t *map;
    453 
    454       assert((ib->buffer || ib->user_buffer) && ib->index_size);
    455 
    456       /* Create and map the output buffer. */
    457       u_upload_alloc(mgr->uploader, 0,
    458                      key->output_stride * num_indices, 4,
    459                      &out_offset, &out_buffer,
    460                      (void**)&out_map);
    461       if (!out_buffer)
    462          return PIPE_ERROR_OUT_OF_MEMORY;
    463 
    464       if (ib->user_buffer) {
    465          map = (uint8_t*)ib->user_buffer + offset;
    466       } else {
    467          map = pipe_buffer_map_range(mgr->pipe, ib->buffer, offset,
    468                                      num_indices * ib->index_size,
    469                                      PIPE_TRANSFER_READ, &transfer);
    470       }
    471 
    472       switch (ib->index_size) {
    473       case 4:
    474          tr->run_elts(tr, (unsigned*)map, num_indices, 0, 0, out_map);
    475          break;
    476       case 2:
    477          tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, 0, out_map);
    478          break;
    479       case 1:
    480          tr->run_elts8(tr, map, num_indices, 0, 0, out_map);
    481          break;
    482       }
    483 
    484       if (transfer) {
    485          pipe_buffer_unmap(mgr->pipe, transfer);
    486       }
    487    } else {
    488       /* Create and map the output buffer. */
    489       u_upload_alloc(mgr->uploader,
    490                      key->output_stride * start_vertex,
    491                      key->output_stride * num_vertices, 4,
    492                      &out_offset, &out_buffer,
    493                      (void**)&out_map);
    494       if (!out_buffer)
    495          return PIPE_ERROR_OUT_OF_MEMORY;
    496 
    497       out_offset -= key->output_stride * start_vertex;
    498 
    499       tr->run(tr, 0, num_vertices, 0, 0, out_map);
    500    }
    501 
    502    /* Unmap all buffers. */
    503    mask = vb_mask;
    504    while (mask) {
    505       unsigned i = u_bit_scan(&mask);
    506 
    507       if (vb_transfer[i]) {
    508          pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
    509       }
    510    }
    511 
    512    /* Setup the new vertex buffer. */
    513    mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
    514    mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
    515 
    516    /* Move the buffer reference. */
    517    pipe_resource_reference(
    518       &mgr->real_vertex_buffer[out_vb].buffer, NULL);
    519    mgr->real_vertex_buffer[out_vb].buffer = out_buffer;
    520 
    521    return PIPE_OK;
    522 }
    523 
    524 static boolean
    525 u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
    526                                     unsigned mask[VB_NUM])
    527 {
    528    unsigned type;
    529    unsigned fallback_vbs[VB_NUM];
    530    /* Set the bit for each buffer which is incompatible, or isn't set. */
    531    uint32_t unused_vb_mask =
    532       mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
    533       ~mgr->enabled_vb_mask;
    534 
    535    memset(fallback_vbs, ~0, sizeof(fallback_vbs));
    536 
    537    /* Find free slots for each type if needed. */
    538    for (type = 0; type < VB_NUM; type++) {
    539       if (mask[type]) {
    540          uint32_t index;
    541 
    542          if (!unused_vb_mask) {
    543             return FALSE;
    544          }
    545 
    546          index = ffs(unused_vb_mask) - 1;
    547          fallback_vbs[type] = index;
    548          unused_vb_mask &= ~(1 << index);
    549          /*printf("found slot=%i for type=%i\n", index, type);*/
    550       }
    551    }
    552 
    553    for (type = 0; type < VB_NUM; type++) {
    554       if (mask[type]) {
    555          mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
    556       }
    557    }
    558 
    559    memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
    560    return TRUE;
    561 }
    562 
    563 static boolean
    564 u_vbuf_translate_begin(struct u_vbuf *mgr,
    565                        int start_vertex, unsigned num_vertices,
    566                        int start_instance, unsigned num_instances,
    567                        int start_index, unsigned num_indices, int min_index,
    568                        boolean unroll_indices)
    569 {
    570    unsigned mask[VB_NUM] = {0};
    571    struct translate_key key[VB_NUM];
    572    unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
    573    unsigned i, type;
    574    unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
    575                                    mgr->ve->used_vb_mask;
    576 
    577    int start[VB_NUM] = {
    578       start_vertex,     /* VERTEX */
    579       start_instance,   /* INSTANCE */
    580       0                 /* CONST */
    581    };
    582 
    583    unsigned num[VB_NUM] = {
    584       num_vertices,     /* VERTEX */
    585       num_instances,    /* INSTANCE */
    586       1                 /* CONST */
    587    };
    588 
    589    memset(key, 0, sizeof(key));
    590    memset(elem_index, ~0, sizeof(elem_index));
    591 
    592    /* See if there are vertex attribs of each type to translate and
    593     * which ones. */
    594    for (i = 0; i < mgr->ve->count; i++) {
    595       unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
    596 
    597       if (!mgr->vertex_buffer[vb_index].stride) {
    598          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
    599              !(incompatible_vb_mask & (1 << vb_index))) {
    600             continue;
    601          }
    602          mask[VB_CONST] |= 1 << vb_index;
    603       } else if (mgr->ve->ve[i].instance_divisor) {
    604          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
    605              !(incompatible_vb_mask & (1 << vb_index))) {
    606             continue;
    607          }
    608          mask[VB_INSTANCE] |= 1 << vb_index;
    609       } else {
    610          if (!unroll_indices &&
    611              !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
    612              !(incompatible_vb_mask & (1 << vb_index))) {
    613             continue;
    614          }
    615          mask[VB_VERTEX] |= 1 << vb_index;
    616       }
    617    }
    618 
    619    assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
    620 
    621    /* Find free vertex buffer slots. */
    622    if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
    623       return FALSE;
    624    }
    625 
    626    /* Initialize the translate keys. */
    627    for (i = 0; i < mgr->ve->count; i++) {
    628       struct translate_key *k;
    629       struct translate_element *te;
    630       enum pipe_format output_format = mgr->ve->native_format[i];
    631       unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
    632       bit = 1 << vb_index;
    633 
    634       if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
    635           !(incompatible_vb_mask & (1 << vb_index)) &&
    636           (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
    637          continue;
    638       }
    639 
    640       /* Set type to what we will translate.
    641        * Whether vertex, instance, or constant attribs. */
    642       for (type = 0; type < VB_NUM; type++) {
    643          if (mask[type] & bit) {
    644             break;
    645          }
    646       }
    647       assert(type < VB_NUM);
    648       if (mgr->ve->ve[i].src_format != output_format)
    649          assert(translate_is_output_format_supported(output_format));
    650       /*printf("velem=%i type=%i\n", i, type);*/
    651 
    652       /* Add the vertex element. */
    653       k = &key[type];
    654       elem_index[type][i] = k->nr_elements;
    655 
    656       te = &k->element[k->nr_elements];
    657       te->type = TRANSLATE_ELEMENT_NORMAL;
    658       te->instance_divisor = 0;
    659       te->input_buffer = vb_index;
    660       te->input_format = mgr->ve->ve[i].src_format;
    661       te->input_offset = mgr->ve->ve[i].src_offset;
    662       te->output_format = output_format;
    663       te->output_offset = k->output_stride;
    664 
    665       k->output_stride += mgr->ve->native_format_size[i];
    666       k->nr_elements++;
    667    }
    668 
    669    /* Translate buffers. */
    670    for (type = 0; type < VB_NUM; type++) {
    671       if (key[type].nr_elements) {
    672          enum pipe_error err;
    673          err = u_vbuf_translate_buffers(mgr, &key[type], mask[type],
    674                                         mgr->fallback_vbs[type],
    675                                         start[type], num[type],
    676                                         start_index, num_indices, min_index,
    677                                         unroll_indices && type == VB_VERTEX);
    678          if (err != PIPE_OK)
    679             return FALSE;
    680 
    681          /* Fixup the stride for constant attribs. */
    682          if (type == VB_CONST) {
    683             mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
    684          }
    685       }
    686    }
    687 
    688    /* Setup new vertex elements. */
    689    for (i = 0; i < mgr->ve->count; i++) {
    690       for (type = 0; type < VB_NUM; type++) {
    691          if (elem_index[type][i] < key[type].nr_elements) {
    692             struct translate_element *te = &key[type].element[elem_index[type][i]];
    693             mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
    694             mgr->fallback_velems[i].src_format = te->output_format;
    695             mgr->fallback_velems[i].src_offset = te->output_offset;
    696             mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
    697 
    698             /* elem_index[type][i] can only be set for one type. */
    699             assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
    700             assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0u);
    701             break;
    702          }
    703       }
    704       /* No translating, just copy the original vertex element over. */
    705       if (type == VB_NUM) {
    706          memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i],
    707                 sizeof(struct pipe_vertex_element));
    708       }
    709    }
    710 
    711    u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count,
    712                                        mgr->fallback_velems);
    713    mgr->using_translate = TRUE;
    714    return TRUE;
    715 }
    716 
    717 static void u_vbuf_translate_end(struct u_vbuf *mgr)
    718 {
    719    unsigned i;
    720 
    721    /* Restore vertex elements. */
    722    mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
    723    mgr->using_translate = FALSE;
    724 
    725    /* Unreference the now-unused VBOs. */
    726    for (i = 0; i < VB_NUM; i++) {
    727       unsigned vb = mgr->fallback_vbs[i];
    728       if (vb != ~0u) {
    729          pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer, NULL);
    730          mgr->fallback_vbs[i] = ~0;
    731 
    732          /* This will cause the buffer to be unbound in the driver later. */
    733          mgr->dirty_real_vb_mask |= 1 << vb;
    734       }
    735    }
    736 }
    737 
    738 static void *
    739 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
    740                               const struct pipe_vertex_element *attribs)
    741 {
    742    struct pipe_context *pipe = mgr->pipe;
    743    unsigned i;
    744    struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
    745    struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
    746    uint32_t used_buffers = 0;
    747 
    748    ve->count = count;
    749 
    750    memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
    751    memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
    752 
    753    /* Set the best native format in case the original format is not
    754     * supported. */
    755    for (i = 0; i < count; i++) {
    756       enum pipe_format format = ve->ve[i].src_format;
    757 
    758       ve->src_format_size[i] = util_format_get_blocksize(format);
    759 
    760       used_buffers |= 1 << ve->ve[i].vertex_buffer_index;
    761 
    762       if (!ve->ve[i].instance_divisor) {
    763          ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
    764       }
    765 
    766       format = mgr->caps.format_translation[format];
    767 
    768       driver_attribs[i].src_format = format;
    769       ve->native_format[i] = format;
    770       ve->native_format_size[i] =
    771             util_format_get_blocksize(ve->native_format[i]);
    772 
    773       if (ve->ve[i].src_format != format ||
    774           (!mgr->caps.velem_src_offset_unaligned &&
    775            ve->ve[i].src_offset % 4 != 0)) {
    776          ve->incompatible_elem_mask |= 1 << i;
    777          ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
    778       } else {
    779          ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
    780       }
    781    }
    782 
    783    ve->used_vb_mask = used_buffers;
    784    ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
    785    ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
    786 
    787    /* Align the formats and offsets to the size of DWORD if needed. */
    788    if (!mgr->caps.velem_src_offset_unaligned) {
    789       for (i = 0; i < count; i++) {
    790          ve->native_format_size[i] = align(ve->native_format_size[i], 4);
    791          driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
    792       }
    793    }
    794 
    795    ve->driver_cso =
    796       pipe->create_vertex_elements_state(pipe, count, driver_attribs);
    797    return ve;
    798 }
    799 
    800 static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso)
    801 {
    802    struct pipe_context *pipe = mgr->pipe;
    803    struct u_vbuf_elements *ve = cso;
    804 
    805    pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
    806    FREE(ve);
    807 }
    808 
    809 void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
    810                                unsigned start_slot, unsigned count,
    811                                const struct pipe_vertex_buffer *bufs)
    812 {
    813    unsigned i;
    814    /* which buffers are enabled */
    815    uint32_t enabled_vb_mask = 0;
    816    /* which buffers are in user memory */
    817    uint32_t user_vb_mask = 0;
    818    /* which buffers are incompatible with the driver */
    819    uint32_t incompatible_vb_mask = 0;
    820    /* which buffers have a non-zero stride */
    821    uint32_t nonzero_stride_vb_mask = 0;
    822    uint32_t mask = ~(((1ull << count) - 1) << start_slot);
    823 
    824    /* Zero out the bits we are going to rewrite completely. */
    825    mgr->user_vb_mask &= mask;
    826    mgr->incompatible_vb_mask &= mask;
    827    mgr->nonzero_stride_vb_mask &= mask;
    828    mgr->enabled_vb_mask &= mask;
    829 
    830    if (!bufs) {
    831       struct pipe_context *pipe = mgr->pipe;
    832       /* Unbind. */
    833       mgr->dirty_real_vb_mask &= mask;
    834 
    835       for (i = 0; i < count; i++) {
    836          unsigned dst_index = start_slot + i;
    837 
    838          pipe_resource_reference(&mgr->vertex_buffer[dst_index].buffer, NULL);
    839          pipe_resource_reference(&mgr->real_vertex_buffer[dst_index].buffer,
    840                                  NULL);
    841       }
    842 
    843       pipe->set_vertex_buffers(pipe, start_slot, count, NULL);
    844       return;
    845    }
    846 
    847    for (i = 0; i < count; i++) {
    848       unsigned dst_index = start_slot + i;
    849       const struct pipe_vertex_buffer *vb = &bufs[i];
    850       struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
    851       struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
    852 
    853       if (!vb->buffer && !vb->user_buffer) {
    854          pipe_resource_reference(&orig_vb->buffer, NULL);
    855          pipe_resource_reference(&real_vb->buffer, NULL);
    856          real_vb->user_buffer = NULL;
    857          continue;
    858       }
    859 
    860       pipe_resource_reference(&orig_vb->buffer, vb->buffer);
    861       orig_vb->user_buffer = vb->user_buffer;
    862 
    863       real_vb->buffer_offset = orig_vb->buffer_offset = vb->buffer_offset;
    864       real_vb->stride = orig_vb->stride = vb->stride;
    865 
    866       if (vb->stride) {
    867          nonzero_stride_vb_mask |= 1 << dst_index;
    868       }
    869       enabled_vb_mask |= 1 << dst_index;
    870 
    871       if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
    872           (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
    873          incompatible_vb_mask |= 1 << dst_index;
    874          pipe_resource_reference(&real_vb->buffer, NULL);
    875          continue;
    876       }
    877 
    878       if (!mgr->caps.user_vertex_buffers && vb->user_buffer) {
    879          user_vb_mask |= 1 << dst_index;
    880          pipe_resource_reference(&real_vb->buffer, NULL);
    881          continue;
    882       }
    883 
    884       pipe_resource_reference(&real_vb->buffer, vb->buffer);
    885       real_vb->user_buffer = vb->user_buffer;
    886    }
    887 
    888    mgr->user_vb_mask |= user_vb_mask;
    889    mgr->incompatible_vb_mask |= incompatible_vb_mask;
    890    mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
    891    mgr->enabled_vb_mask |= enabled_vb_mask;
    892 
    893    /* All changed buffers are marked as dirty, even the NULL ones,
    894     * which will cause the NULL buffers to be unbound in the driver later. */
    895    mgr->dirty_real_vb_mask |= ~mask;
    896 }
    897 
    898 void u_vbuf_set_index_buffer(struct u_vbuf *mgr,
    899                              const struct pipe_index_buffer *ib)
    900 {
    901    struct pipe_context *pipe = mgr->pipe;
    902 
    903    if (ib) {
    904       assert(ib->offset % ib->index_size == 0);
    905       pipe_resource_reference(&mgr->index_buffer.buffer, ib->buffer);
    906       memcpy(&mgr->index_buffer, ib, sizeof(*ib));
    907    } else {
    908       pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
    909    }
    910 
    911    pipe->set_index_buffer(pipe, ib);
    912 }
    913 
    914 static enum pipe_error
    915 u_vbuf_upload_buffers(struct u_vbuf *mgr,
    916                       int start_vertex, unsigned num_vertices,
    917                       int start_instance, unsigned num_instances)
    918 {
    919    unsigned i;
    920    unsigned nr_velems = mgr->ve->count;
    921    struct pipe_vertex_element *velems =
    922          mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve;
    923    unsigned start_offset[PIPE_MAX_ATTRIBS];
    924    unsigned end_offset[PIPE_MAX_ATTRIBS];
    925    uint32_t buffer_mask = 0;
    926 
    927    /* Determine how much data needs to be uploaded. */
    928    for (i = 0; i < nr_velems; i++) {
    929       struct pipe_vertex_element *velem = &velems[i];
    930       unsigned index = velem->vertex_buffer_index;
    931       struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
    932       unsigned instance_div, first, size, index_bit;
    933 
    934       /* Skip the buffers generated by translate. */
    935       if (index == mgr->fallback_vbs[VB_VERTEX] ||
    936           index == mgr->fallback_vbs[VB_INSTANCE] ||
    937           index == mgr->fallback_vbs[VB_CONST]) {
    938          continue;
    939       }
    940 
    941       if (!vb->user_buffer) {
    942          continue;
    943       }
    944 
    945       instance_div = velem->instance_divisor;
    946       first = vb->buffer_offset + velem->src_offset;
    947 
    948       if (!vb->stride) {
    949          /* Constant attrib. */
    950          size = mgr->ve->src_format_size[i];
    951       } else if (instance_div) {
    952          /* Per-instance attrib. */
    953          unsigned count = (num_instances + instance_div - 1) / instance_div;
    954          first += vb->stride * start_instance;
    955          size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
    956       } else {
    957          /* Per-vertex attrib. */
    958          first += vb->stride * start_vertex;
    959          size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i];
    960       }
    961 
    962       index_bit = 1 << index;
    963 
    964       /* Update offsets. */
    965       if (!(buffer_mask & index_bit)) {
    966          start_offset[index] = first;
    967          end_offset[index] = first + size;
    968       } else {
    969          if (first < start_offset[index])
    970             start_offset[index] = first;
    971          if (first + size > end_offset[index])
    972             end_offset[index] = first + size;
    973       }
    974 
    975       buffer_mask |= index_bit;
    976    }
    977 
    978    /* Upload buffers. */
    979    while (buffer_mask) {
    980       unsigned start, end;
    981       struct pipe_vertex_buffer *real_vb;
    982       const uint8_t *ptr;
    983 
    984       i = u_bit_scan(&buffer_mask);
    985 
    986       start = start_offset[i];
    987       end = end_offset[i];
    988       assert(start < end);
    989 
    990       real_vb = &mgr->real_vertex_buffer[i];
    991       ptr = mgr->vertex_buffer[i].user_buffer;
    992 
    993       u_upload_data(mgr->uploader, start, end - start, 4, ptr + start,
    994                     &real_vb->buffer_offset, &real_vb->buffer);
    995       if (!real_vb->buffer)
    996          return PIPE_ERROR_OUT_OF_MEMORY;
    997 
    998       real_vb->buffer_offset -= start;
    999    }
   1000 
   1001    return PIPE_OK;
   1002 }
   1003 
   1004 static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
   1005 {
   1006    /* See if there are any per-vertex attribs which will be uploaded or
   1007     * translated. Use bitmasks to get the info instead of looping over vertex
   1008     * elements. */
   1009    return (mgr->ve->used_vb_mask &
   1010            ((mgr->user_vb_mask |
   1011              mgr->incompatible_vb_mask |
   1012              mgr->ve->incompatible_vb_mask_any) &
   1013             mgr->ve->noninstance_vb_mask_any &
   1014             mgr->nonzero_stride_vb_mask)) != 0;
   1015 }
   1016 
   1017 static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
   1018 {
   1019    /* Return true if there are hw buffers which don't need to be translated.
   1020     *
   1021     * We could query whether each buffer is busy, but that would
   1022     * be way more costly than this. */
   1023    return (mgr->ve->used_vb_mask &
   1024            (~mgr->user_vb_mask &
   1025             ~mgr->incompatible_vb_mask &
   1026             mgr->ve->compatible_vb_mask_all &
   1027             mgr->ve->noninstance_vb_mask_any &
   1028             mgr->nonzero_stride_vb_mask)) != 0;
   1029 }
   1030 
   1031 static void u_vbuf_get_minmax_index(struct pipe_context *pipe,
   1032                                     struct pipe_index_buffer *ib,
   1033                                     boolean primitive_restart,
   1034                                     unsigned restart_index,
   1035                                     unsigned start, unsigned count,
   1036                                     int *out_min_index,
   1037                                     int *out_max_index)
   1038 {
   1039    struct pipe_transfer *transfer = NULL;
   1040    const void *indices;
   1041    unsigned i;
   1042 
   1043    if (ib->user_buffer) {
   1044       indices = (uint8_t*)ib->user_buffer +
   1045                 ib->offset + start * ib->index_size;
   1046    } else {
   1047       indices = pipe_buffer_map_range(pipe, ib->buffer,
   1048                                       ib->offset + start * ib->index_size,
   1049                                       count * ib->index_size,
   1050                                       PIPE_TRANSFER_READ, &transfer);
   1051    }
   1052 
   1053    switch (ib->index_size) {
   1054    case 4: {
   1055       const unsigned *ui_indices = (const unsigned*)indices;
   1056       unsigned max_ui = 0;
   1057       unsigned min_ui = ~0U;
   1058       if (primitive_restart) {
   1059          for (i = 0; i < count; i++) {
   1060             if (ui_indices[i] != restart_index) {
   1061                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
   1062                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
   1063             }
   1064          }
   1065       }
   1066       else {
   1067          for (i = 0; i < count; i++) {
   1068             if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
   1069             if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
   1070          }
   1071       }
   1072       *out_min_index = min_ui;
   1073       *out_max_index = max_ui;
   1074       break;
   1075    }
   1076    case 2: {
   1077       const unsigned short *us_indices = (const unsigned short*)indices;
   1078       unsigned max_us = 0;
   1079       unsigned min_us = ~0U;
   1080       if (primitive_restart) {
   1081          for (i = 0; i < count; i++) {
   1082             if (us_indices[i] != restart_index) {
   1083                if (us_indices[i] > max_us) max_us = us_indices[i];
   1084                if (us_indices[i] < min_us) min_us = us_indices[i];
   1085             }
   1086          }
   1087       }
   1088       else {
   1089          for (i = 0; i < count; i++) {
   1090             if (us_indices[i] > max_us) max_us = us_indices[i];
   1091             if (us_indices[i] < min_us) min_us = us_indices[i];
   1092          }
   1093       }
   1094       *out_min_index = min_us;
   1095       *out_max_index = max_us;
   1096       break;
   1097    }
   1098    case 1: {
   1099       const unsigned char *ub_indices = (const unsigned char*)indices;
   1100       unsigned max_ub = 0;
   1101       unsigned min_ub = ~0U;
   1102       if (primitive_restart) {
   1103          for (i = 0; i < count; i++) {
   1104             if (ub_indices[i] != restart_index) {
   1105                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
   1106                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
   1107             }
   1108          }
   1109       }
   1110       else {
   1111          for (i = 0; i < count; i++) {
   1112             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
   1113             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
   1114          }
   1115       }
   1116       *out_min_index = min_ub;
   1117       *out_max_index = max_ub;
   1118       break;
   1119    }
   1120    default:
   1121       assert(0);
   1122       *out_min_index = 0;
   1123       *out_max_index = 0;
   1124    }
   1125 
   1126    if (transfer) {
   1127       pipe_buffer_unmap(pipe, transfer);
   1128    }
   1129 }
   1130 
   1131 static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
   1132 {
   1133    struct pipe_context *pipe = mgr->pipe;
   1134    unsigned start_slot, count;
   1135 
   1136    start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
   1137    count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
   1138 
   1139    pipe->set_vertex_buffers(pipe, start_slot, count,
   1140                             mgr->real_vertex_buffer + start_slot);
   1141    mgr->dirty_real_vb_mask = 0;
   1142 }
   1143 
   1144 void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
   1145 {
   1146    struct pipe_context *pipe = mgr->pipe;
   1147    int start_vertex, min_index;
   1148    unsigned num_vertices;
   1149    boolean unroll_indices = FALSE;
   1150    uint32_t used_vb_mask = mgr->ve->used_vb_mask;
   1151    uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
   1152    uint32_t incompatible_vb_mask = mgr->incompatible_vb_mask & used_vb_mask;
   1153    struct pipe_draw_info new_info;
   1154 
   1155    /* Normal draw. No fallback and no user buffers. */
   1156    if (!incompatible_vb_mask &&
   1157        !mgr->ve->incompatible_elem_mask &&
   1158        !user_vb_mask) {
   1159 
   1160       /* Set vertex buffers if needed. */
   1161       if (mgr->dirty_real_vb_mask & used_vb_mask) {
   1162          u_vbuf_set_driver_vertex_buffers(mgr);
   1163       }
   1164 
   1165       pipe->draw_vbo(pipe, info);
   1166       return;
   1167    }
   1168 
   1169    new_info = *info;
   1170 
   1171    /* Fallback. We need to know all the parameters. */
   1172    if (new_info.indirect) {
   1173       struct pipe_transfer *transfer = NULL;
   1174       int *data;
   1175 
   1176       if (new_info.indexed) {
   1177          data = pipe_buffer_map_range(pipe, new_info.indirect,
   1178                                       new_info.indirect_offset, 20,
   1179                                       PIPE_TRANSFER_READ, &transfer);
   1180          new_info.index_bias = data[3];
   1181          new_info.start_instance = data[4];
   1182       }
   1183       else {
   1184          data = pipe_buffer_map_range(pipe, new_info.indirect,
   1185                                       new_info.indirect_offset, 16,
   1186                                       PIPE_TRANSFER_READ, &transfer);
   1187          new_info.start_instance = data[3];
   1188       }
   1189 
   1190       new_info.count = data[0];
   1191       new_info.instance_count = data[1];
   1192       new_info.start = data[2];
   1193       pipe_buffer_unmap(pipe, transfer);
   1194       new_info.indirect = NULL;
   1195    }
   1196 
   1197    if (new_info.indexed) {
   1198       /* See if anything needs to be done for per-vertex attribs. */
   1199       if (u_vbuf_need_minmax_index(mgr)) {
   1200          int max_index;
   1201 
   1202          if (new_info.max_index != ~0u) {
   1203             min_index = new_info.min_index;
   1204             max_index = new_info.max_index;
   1205          } else {
   1206             u_vbuf_get_minmax_index(mgr->pipe, &mgr->index_buffer,
   1207                                     new_info.primitive_restart,
   1208                                     new_info.restart_index, new_info.start,
   1209                                     new_info.count, &min_index, &max_index);
   1210          }
   1211 
   1212          assert(min_index <= max_index);
   1213 
   1214          start_vertex = min_index + new_info.index_bias;
   1215          num_vertices = max_index + 1 - min_index;
   1216 
   1217          /* Primitive restart doesn't work when unrolling indices.
   1218           * We would have to break this drawing operation into several ones. */
   1219          /* Use some heuristic to see if unrolling indices improves
   1220           * performance. */
   1221          if (!new_info.primitive_restart &&
   1222              num_vertices > new_info.count*2 &&
   1223              num_vertices - new_info.count > 32 &&
   1224              !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
   1225             unroll_indices = TRUE;
   1226             user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
   1227                               mgr->ve->noninstance_vb_mask_any);
   1228          }
   1229       } else {
   1230          /* Nothing to do for per-vertex attribs. */
   1231          start_vertex = 0;
   1232          num_vertices = 0;
   1233          min_index = 0;
   1234       }
   1235    } else {
   1236       start_vertex = new_info.start;
   1237       num_vertices = new_info.count;
   1238       min_index = 0;
   1239    }
   1240 
   1241    /* Translate vertices with non-native layouts or formats. */
   1242    if (unroll_indices ||
   1243        incompatible_vb_mask ||
   1244        mgr->ve->incompatible_elem_mask) {
   1245       if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
   1246                                   new_info.start_instance,
   1247                                   new_info.instance_count, new_info.start,
   1248                                   new_info.count, min_index, unroll_indices)) {
   1249          debug_warn_once("u_vbuf_translate_begin() failed");
   1250          return;
   1251       }
   1252 
   1253       if (unroll_indices) {
   1254          new_info.indexed = FALSE;
   1255          new_info.index_bias = 0;
   1256          new_info.min_index = 0;
   1257          new_info.max_index = new_info.count - 1;
   1258          new_info.start = 0;
   1259       }
   1260 
   1261       user_vb_mask &= ~(incompatible_vb_mask |
   1262                         mgr->ve->incompatible_vb_mask_all);
   1263    }
   1264 
   1265    /* Upload user buffers. */
   1266    if (user_vb_mask) {
   1267       if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
   1268                                 new_info.start_instance,
   1269                                 new_info.instance_count) != PIPE_OK) {
   1270          debug_warn_once("u_vbuf_upload_buffers() failed");
   1271          return;
   1272       }
   1273 
   1274       mgr->dirty_real_vb_mask |= user_vb_mask;
   1275    }
   1276 
   1277    /*
   1278    if (unroll_indices) {
   1279       printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
   1280              start_vertex, num_vertices);
   1281       util_dump_draw_info(stdout, info);
   1282       printf("\n");
   1283    }
   1284 
   1285    unsigned i;
   1286    for (i = 0; i < mgr->nr_vertex_buffers; i++) {
   1287       printf("input %i: ", i);
   1288       util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
   1289       printf("\n");
   1290    }
   1291    for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
   1292       printf("real %i: ", i);
   1293       util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
   1294       printf("\n");
   1295    }
   1296    */
   1297 
   1298    u_upload_unmap(mgr->uploader);
   1299    u_vbuf_set_driver_vertex_buffers(mgr);
   1300 
   1301    pipe->draw_vbo(pipe, &new_info);
   1302 
   1303    if (mgr->using_translate) {
   1304       u_vbuf_translate_end(mgr);
   1305    }
   1306 }
   1307 
   1308 void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
   1309 {
   1310    assert(!mgr->ve_saved);
   1311    mgr->ve_saved = mgr->ve;
   1312 }
   1313 
   1314 void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
   1315 {
   1316    if (mgr->ve != mgr->ve_saved) {
   1317       struct pipe_context *pipe = mgr->pipe;
   1318 
   1319       mgr->ve = mgr->ve_saved;
   1320       pipe->bind_vertex_elements_state(pipe,
   1321                                        mgr->ve ? mgr->ve->driver_cso : NULL);
   1322    }
   1323    mgr->ve_saved = NULL;
   1324 }
   1325 
   1326 void u_vbuf_save_aux_vertex_buffer_slot(struct u_vbuf *mgr)
   1327 {
   1328    struct pipe_vertex_buffer *vb =
   1329          &mgr->vertex_buffer[mgr->aux_vertex_buffer_slot];
   1330 
   1331    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, vb->buffer);
   1332    memcpy(&mgr->aux_vertex_buffer_saved, vb, sizeof(*vb));
   1333 }
   1334 
   1335 void u_vbuf_restore_aux_vertex_buffer_slot(struct u_vbuf *mgr)
   1336 {
   1337    u_vbuf_set_vertex_buffers(mgr, mgr->aux_vertex_buffer_slot, 1,
   1338                              &mgr->aux_vertex_buffer_saved);
   1339    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL);
   1340 }
   1341