Home | History | Annotate | Download | only in vbo
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright 2003 VMware, Inc.
      5  * Copyright 2009 VMware, Inc.
      6  * All Rights Reserved.
      7  * Copyright (C) 2016 Advanced Micro Devices, Inc.
      8  *
      9  * Permission is hereby granted, free of charge, to any person obtaining a
     10  * copy of this software and associated documentation files (the "Software"),
     11  * to deal in the Software without restriction, including without limitation
     12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     13  * and/or sell copies of the Software, and to permit persons to whom the
     14  * Software is furnished to do so, subject to the following conditions:
     15  *
     16  * The above copyright notice and this permission notice (including the next
     17  * paragraph) shall be included in all copies or substantial portions of the
     18  * Software.
     19  *
     20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     22  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     23  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     24  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     25  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     26  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     27  */
     28 
     29 #include "main/glheader.h"
     30 #include "main/context.h"
     31 #include "main/varray.h"
     32 #include "main/macros.h"
     33 #include "main/sse_minmax.h"
     34 #include "x86/common_x86_asm.h"
     35 #include "util/hash_table.h"
     36 
     37 
     38 struct minmax_cache_key {
     39    GLintptr offset;
     40    GLuint count;
     41    GLenum type;
     42 };
     43 
     44 
     45 struct minmax_cache_entry {
     46    struct minmax_cache_key key;
     47    GLuint min;
     48    GLuint max;
     49 };
     50 
     51 
     52 static uint32_t
     53 vbo_minmax_cache_hash(const struct minmax_cache_key *key)
     54 {
     55    return _mesa_hash_data(key, sizeof(*key));
     56 }
     57 
     58 
     59 static bool
     60 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
     61                            const struct minmax_cache_key *b)
     62 {
     63    return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type);
     64 }
     65 
     66 
     67 static void
     68 vbo_minmax_cache_delete_entry(struct hash_entry *entry)
     69 {
     70    free(entry->data);
     71 }
     72 
     73 
     74 static GLboolean
     75 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
     76 {
     77    if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
     78                                   USAGE_ATOMIC_COUNTER_BUFFER |
     79                                   USAGE_SHADER_STORAGE_BUFFER |
     80                                   USAGE_TRANSFORM_FEEDBACK_BUFFER |
     81                                   USAGE_PIXEL_PACK_BUFFER |
     82                                   USAGE_DISABLE_MINMAX_CACHE))
     83       return GL_FALSE;
     84 
     85    if ((bufferObj->Mappings[MAP_USER].AccessFlags &
     86         (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
     87        (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
     88       return GL_FALSE;
     89 
     90    return GL_TRUE;
     91 }
     92 
     93 
     94 void
     95 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
     96 {
     97    _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
     98    bufferObj->MinMaxCache = NULL;
     99 }
    100 
    101 
    102 static GLboolean
    103 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
    104                       GLenum type, GLintptr offset, GLuint count,
    105                       GLuint *min_index, GLuint *max_index)
    106 {
    107    GLboolean found = GL_FALSE;
    108    struct minmax_cache_key key;
    109    uint32_t hash;
    110    struct hash_entry *result;
    111 
    112    if (!bufferObj->MinMaxCache)
    113       return GL_FALSE;
    114    if (!vbo_use_minmax_cache(bufferObj))
    115       return GL_FALSE;
    116 
    117    mtx_lock(&bufferObj->Mutex);
    118 
    119    if (bufferObj->MinMaxCacheDirty) {
    120       /* Disable the cache permanently for this BO if the number of hits
    121        * is asymptotically less than the number of misses. This happens when
    122        * applications use the BO for streaming.
    123        *
    124        * However, some initial optimism allows applications that interleave
    125        * draw calls with glBufferSubData during warmup.
    126        */
    127       unsigned optimism = bufferObj->Size;
    128       if (bufferObj->MinMaxCacheMissIndices > optimism &&
    129           bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
    130          bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
    131          vbo_delete_minmax_cache(bufferObj);
    132          goto out_disable;
    133       }
    134 
    135       _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
    136       bufferObj->MinMaxCacheDirty = false;
    137       goto out_invalidate;
    138    }
    139 
    140    key.type = type;
    141    key.offset = offset;
    142    key.count = count;
    143    hash = vbo_minmax_cache_hash(&key);
    144    result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
    145    if (result) {
    146       struct minmax_cache_entry *entry = result->data;
    147       *min_index = entry->min;
    148       *max_index = entry->max;
    149       found = GL_TRUE;
    150    }
    151 
    152 out_invalidate:
    153    if (found) {
    154       /* The hit counter saturates so that we don't accidently disable the
    155        * cache in a long-running program.
    156        */
    157       unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
    158 
    159       if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
    160          bufferObj->MinMaxCacheHitIndices = new_hit_count;
    161       else
    162          bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
    163    } else {
    164       bufferObj->MinMaxCacheMissIndices += count;
    165    }
    166 
    167 out_disable:
    168    mtx_unlock(&bufferObj->Mutex);
    169    return found;
    170 }
    171 
    172 
    173 static void
    174 vbo_minmax_cache_store(struct gl_context *ctx,
    175                        struct gl_buffer_object *bufferObj,
    176                        GLenum type, GLintptr offset, GLuint count,
    177                        GLuint min, GLuint max)
    178 {
    179    struct minmax_cache_entry *entry;
    180    struct hash_entry *table_entry;
    181    uint32_t hash;
    182 
    183    if (!vbo_use_minmax_cache(bufferObj))
    184       return;
    185 
    186    mtx_lock(&bufferObj->Mutex);
    187 
    188    if (!bufferObj->MinMaxCache) {
    189       bufferObj->MinMaxCache =
    190          _mesa_hash_table_create(NULL,
    191                                  (uint32_t (*)(const void *))vbo_minmax_cache_hash,
    192                                  (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
    193       if (!bufferObj->MinMaxCache)
    194          goto out;
    195    }
    196 
    197    entry = MALLOC_STRUCT(minmax_cache_entry);
    198    if (!entry)
    199       goto out;
    200 
    201    entry->key.offset = offset;
    202    entry->key.count = count;
    203    entry->key.type = type;
    204    entry->min = min;
    205    entry->max = max;
    206    hash = vbo_minmax_cache_hash(&entry->key);
    207 
    208    table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
    209                                                     hash, &entry->key);
    210    if (table_entry) {
    211       /* It seems like this could happen when two contexts are rendering using
    212        * the same buffer object from multiple threads.
    213        */
    214       _mesa_debug(ctx, "duplicate entry in minmax cache\n");
    215       free(entry);
    216       goto out;
    217    }
    218 
    219    table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
    220                                                     hash, &entry->key, entry);
    221    if (!table_entry)
    222       free(entry);
    223 
    224 out:
    225    mtx_unlock(&bufferObj->Mutex);
    226 }
    227 
    228 
    229 /**
    230  * Compute min and max elements by scanning the index buffer for
    231  * glDraw[Range]Elements() calls.
    232  * If primitive restart is enabled, we need to ignore restart
    233  * indexes when computing min/max.
    234  */
    235 static void
    236 vbo_get_minmax_index(struct gl_context *ctx,
    237                      const struct _mesa_prim *prim,
    238                      const struct _mesa_index_buffer *ib,
    239                      GLuint *min_index, GLuint *max_index,
    240                      const GLuint count)
    241 {
    242    const GLboolean restart = ctx->Array._PrimitiveRestart;
    243    const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
    244    const int index_size = vbo_sizeof_ib_type(ib->type);
    245    const char *indices;
    246    GLuint i;
    247 
    248    indices = (char *) ib->ptr + prim->start * index_size;
    249    if (_mesa_is_bufferobj(ib->obj)) {
    250       GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
    251 
    252       if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count,
    253                                 min_index, max_index))
    254          return;
    255 
    256       indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
    257                                            GL_MAP_READ_BIT, ib->obj,
    258                                            MAP_INTERNAL);
    259    }
    260 
    261    switch (ib->type) {
    262    case GL_UNSIGNED_INT: {
    263       const GLuint *ui_indices = (const GLuint *)indices;
    264       GLuint max_ui = 0;
    265       GLuint min_ui = ~0U;
    266       if (restart) {
    267          for (i = 0; i < count; i++) {
    268             if (ui_indices[i] != restartIndex) {
    269                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
    270                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
    271             }
    272          }
    273       }
    274       else {
    275 #if defined(USE_SSE41)
    276          if (cpu_has_sse4_1) {
    277             _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
    278          }
    279          else
    280 #endif
    281             for (i = 0; i < count; i++) {
    282                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
    283                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
    284             }
    285       }
    286       *min_index = min_ui;
    287       *max_index = max_ui;
    288       break;
    289    }
    290    case GL_UNSIGNED_SHORT: {
    291       const GLushort *us_indices = (const GLushort *)indices;
    292       GLuint max_us = 0;
    293       GLuint min_us = ~0U;
    294       if (restart) {
    295          for (i = 0; i < count; i++) {
    296             if (us_indices[i] != restartIndex) {
    297                if (us_indices[i] > max_us) max_us = us_indices[i];
    298                if (us_indices[i] < min_us) min_us = us_indices[i];
    299             }
    300          }
    301       }
    302       else {
    303          for (i = 0; i < count; i++) {
    304             if (us_indices[i] > max_us) max_us = us_indices[i];
    305             if (us_indices[i] < min_us) min_us = us_indices[i];
    306          }
    307       }
    308       *min_index = min_us;
    309       *max_index = max_us;
    310       break;
    311    }
    312    case GL_UNSIGNED_BYTE: {
    313       const GLubyte *ub_indices = (const GLubyte *)indices;
    314       GLuint max_ub = 0;
    315       GLuint min_ub = ~0U;
    316       if (restart) {
    317          for (i = 0; i < count; i++) {
    318             if (ub_indices[i] != restartIndex) {
    319                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
    320                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
    321             }
    322          }
    323       }
    324       else {
    325          for (i = 0; i < count; i++) {
    326             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
    327             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
    328          }
    329       }
    330       *min_index = min_ub;
    331       *max_index = max_ub;
    332       break;
    333    }
    334    default:
    335       unreachable("not reached");
    336    }
    337 
    338    if (_mesa_is_bufferobj(ib->obj)) {
    339       vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,
    340                              *min_index, *max_index);
    341       ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
    342    }
    343 }
    344 
    345 /**
    346  * Compute min and max elements for nr_prims
    347  */
    348 void
    349 vbo_get_minmax_indices(struct gl_context *ctx,
    350                        const struct _mesa_prim *prims,
    351                        const struct _mesa_index_buffer *ib,
    352                        GLuint *min_index,
    353                        GLuint *max_index,
    354                        GLuint nr_prims)
    355 {
    356    GLuint tmp_min, tmp_max;
    357    GLuint i;
    358    GLuint count;
    359 
    360    *min_index = ~0;
    361    *max_index = 0;
    362 
    363    for (i = 0; i < nr_prims; i++) {
    364       const struct _mesa_prim *start_prim;
    365 
    366       start_prim = &prims[i];
    367       count = start_prim->count;
    368       /* Do combination if possible to reduce map/unmap count */
    369       while ((i + 1 < nr_prims) &&
    370              (prims[i].start + prims[i].count == prims[i+1].start)) {
    371          count += prims[i+1].count;
    372          i++;
    373       }
    374       vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
    375       *min_index = MIN2(*min_index, tmp_min);
    376       *max_index = MAX2(*max_index, tmp_max);
    377    }
    378 }
    379