1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright 2003 VMware, Inc. 5 * Copyright 2009 VMware, Inc. 6 * All Rights Reserved. 7 * Copyright (C) 2016 Advanced Micro Devices, Inc. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a 10 * copy of this software and associated documentation files (the "Software"), 11 * to deal in the Software without restriction, including without limitation 12 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 * and/or sell copies of the Software, and to permit persons to whom the 14 * Software is furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice (including the next 17 * paragraph) shall be included in all copies or substantial portions of the 18 * Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 26 * USE OR OTHER DEALINGS IN THE SOFTWARE. 27 */ 28 29 #include "main/glheader.h" 30 #include "main/context.h" 31 #include "main/varray.h" 32 #include "main/macros.h" 33 #include "main/sse_minmax.h" 34 #include "x86/common_x86_asm.h" 35 #include "util/hash_table.h" 36 37 38 struct minmax_cache_key { 39 GLintptr offset; 40 GLuint count; 41 GLenum type; 42 }; 43 44 45 struct minmax_cache_entry { 46 struct minmax_cache_key key; 47 GLuint min; 48 GLuint max; 49 }; 50 51 52 static uint32_t 53 vbo_minmax_cache_hash(const struct minmax_cache_key *key) 54 { 55 return _mesa_hash_data(key, sizeof(*key)); 56 } 57 58 59 static bool 60 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a, 61 const struct minmax_cache_key *b) 62 { 63 return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type); 64 } 65 66 67 static void 68 vbo_minmax_cache_delete_entry(struct hash_entry *entry) 69 { 70 free(entry->data); 71 } 72 73 74 static GLboolean 75 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj) 76 { 77 if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER | 78 USAGE_ATOMIC_COUNTER_BUFFER | 79 USAGE_SHADER_STORAGE_BUFFER | 80 USAGE_TRANSFORM_FEEDBACK_BUFFER | 81 USAGE_PIXEL_PACK_BUFFER | 82 USAGE_DISABLE_MINMAX_CACHE)) 83 return GL_FALSE; 84 85 if ((bufferObj->Mappings[MAP_USER].AccessFlags & 86 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) == 87 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) 88 return GL_FALSE; 89 90 return GL_TRUE; 91 } 92 93 94 void 95 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj) 96 { 97 _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry); 98 bufferObj->MinMaxCache = NULL; 99 } 100 101 102 static GLboolean 103 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj, 104 GLenum type, GLintptr offset, GLuint count, 105 GLuint *min_index, GLuint *max_index) 106 { 107 GLboolean found = GL_FALSE; 108 struct minmax_cache_key key; 109 uint32_t hash; 110 struct hash_entry *result; 111 112 if (!bufferObj->MinMaxCache) 113 return GL_FALSE; 114 if (!vbo_use_minmax_cache(bufferObj)) 115 return GL_FALSE; 116 117 mtx_lock(&bufferObj->Mutex); 118 119 if (bufferObj->MinMaxCacheDirty) { 120 /* Disable the cache permanently for this BO if the number of hits 121 * is asymptotically less than the number of misses. This happens when 122 * applications use the BO for streaming. 123 * 124 * However, some initial optimism allows applications that interleave 125 * draw calls with glBufferSubData during warmup. 126 */ 127 unsigned optimism = bufferObj->Size; 128 if (bufferObj->MinMaxCacheMissIndices > optimism && 129 bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) { 130 bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE; 131 vbo_delete_minmax_cache(bufferObj); 132 goto out_disable; 133 } 134 135 _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry); 136 bufferObj->MinMaxCacheDirty = false; 137 goto out_invalidate; 138 } 139 140 key.type = type; 141 key.offset = offset; 142 key.count = count; 143 hash = vbo_minmax_cache_hash(&key); 144 result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key); 145 if (result) { 146 struct minmax_cache_entry *entry = result->data; 147 *min_index = entry->min; 148 *max_index = entry->max; 149 found = GL_TRUE; 150 } 151 152 out_invalidate: 153 if (found) { 154 /* The hit counter saturates so that we don't accidently disable the 155 * cache in a long-running program. 156 */ 157 unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count; 158 159 if (new_hit_count >= bufferObj->MinMaxCacheHitIndices) 160 bufferObj->MinMaxCacheHitIndices = new_hit_count; 161 else 162 bufferObj->MinMaxCacheHitIndices = ~(unsigned)0; 163 } else { 164 bufferObj->MinMaxCacheMissIndices += count; 165 } 166 167 out_disable: 168 mtx_unlock(&bufferObj->Mutex); 169 return found; 170 } 171 172 173 static void 174 vbo_minmax_cache_store(struct gl_context *ctx, 175 struct gl_buffer_object *bufferObj, 176 GLenum type, GLintptr offset, GLuint count, 177 GLuint min, GLuint max) 178 { 179 struct minmax_cache_entry *entry; 180 struct hash_entry *table_entry; 181 uint32_t hash; 182 183 if (!vbo_use_minmax_cache(bufferObj)) 184 return; 185 186 mtx_lock(&bufferObj->Mutex); 187 188 if (!bufferObj->MinMaxCache) { 189 bufferObj->MinMaxCache = 190 _mesa_hash_table_create(NULL, 191 (uint32_t (*)(const void *))vbo_minmax_cache_hash, 192 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal); 193 if (!bufferObj->MinMaxCache) 194 goto out; 195 } 196 197 entry = MALLOC_STRUCT(minmax_cache_entry); 198 if (!entry) 199 goto out; 200 201 entry->key.offset = offset; 202 entry->key.count = count; 203 entry->key.type = type; 204 entry->min = min; 205 entry->max = max; 206 hash = vbo_minmax_cache_hash(&entry->key); 207 208 table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, 209 hash, &entry->key); 210 if (table_entry) { 211 /* It seems like this could happen when two contexts are rendering using 212 * the same buffer object from multiple threads. 213 */ 214 _mesa_debug(ctx, "duplicate entry in minmax cache\n"); 215 free(entry); 216 goto out; 217 } 218 219 table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache, 220 hash, &entry->key, entry); 221 if (!table_entry) 222 free(entry); 223 224 out: 225 mtx_unlock(&bufferObj->Mutex); 226 } 227 228 229 /** 230 * Compute min and max elements by scanning the index buffer for 231 * glDraw[Range]Elements() calls. 232 * If primitive restart is enabled, we need to ignore restart 233 * indexes when computing min/max. 234 */ 235 static void 236 vbo_get_minmax_index(struct gl_context *ctx, 237 const struct _mesa_prim *prim, 238 const struct _mesa_index_buffer *ib, 239 GLuint *min_index, GLuint *max_index, 240 const GLuint count) 241 { 242 const GLboolean restart = ctx->Array._PrimitiveRestart; 243 const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type); 244 const int index_size = vbo_sizeof_ib_type(ib->type); 245 const char *indices; 246 GLuint i; 247 248 indices = (char *) ib->ptr + prim->start * index_size; 249 if (_mesa_is_bufferobj(ib->obj)) { 250 GLsizeiptr size = MIN2(count * index_size, ib->obj->Size); 251 252 if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count, 253 min_index, max_index)) 254 return; 255 256 indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size, 257 GL_MAP_READ_BIT, ib->obj, 258 MAP_INTERNAL); 259 } 260 261 switch (ib->type) { 262 case GL_UNSIGNED_INT: { 263 const GLuint *ui_indices = (const GLuint *)indices; 264 GLuint max_ui = 0; 265 GLuint min_ui = ~0U; 266 if (restart) { 267 for (i = 0; i < count; i++) { 268 if (ui_indices[i] != restartIndex) { 269 if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; 270 if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; 271 } 272 } 273 } 274 else { 275 #if defined(USE_SSE41) 276 if (cpu_has_sse4_1) { 277 _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count); 278 } 279 else 280 #endif 281 for (i = 0; i < count; i++) { 282 if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; 283 if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; 284 } 285 } 286 *min_index = min_ui; 287 *max_index = max_ui; 288 break; 289 } 290 case GL_UNSIGNED_SHORT: { 291 const GLushort *us_indices = (const GLushort *)indices; 292 GLuint max_us = 0; 293 GLuint min_us = ~0U; 294 if (restart) { 295 for (i = 0; i < count; i++) { 296 if (us_indices[i] != restartIndex) { 297 if (us_indices[i] > max_us) max_us = us_indices[i]; 298 if (us_indices[i] < min_us) min_us = us_indices[i]; 299 } 300 } 301 } 302 else { 303 for (i = 0; i < count; i++) { 304 if (us_indices[i] > max_us) max_us = us_indices[i]; 305 if (us_indices[i] < min_us) min_us = us_indices[i]; 306 } 307 } 308 *min_index = min_us; 309 *max_index = max_us; 310 break; 311 } 312 case GL_UNSIGNED_BYTE: { 313 const GLubyte *ub_indices = (const GLubyte *)indices; 314 GLuint max_ub = 0; 315 GLuint min_ub = ~0U; 316 if (restart) { 317 for (i = 0; i < count; i++) { 318 if (ub_indices[i] != restartIndex) { 319 if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; 320 if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; 321 } 322 } 323 } 324 else { 325 for (i = 0; i < count; i++) { 326 if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; 327 if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; 328 } 329 } 330 *min_index = min_ub; 331 *max_index = max_ub; 332 break; 333 } 334 default: 335 unreachable("not reached"); 336 } 337 338 if (_mesa_is_bufferobj(ib->obj)) { 339 vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count, 340 *min_index, *max_index); 341 ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL); 342 } 343 } 344 345 /** 346 * Compute min and max elements for nr_prims 347 */ 348 void 349 vbo_get_minmax_indices(struct gl_context *ctx, 350 const struct _mesa_prim *prims, 351 const struct _mesa_index_buffer *ib, 352 GLuint *min_index, 353 GLuint *max_index, 354 GLuint nr_prims) 355 { 356 GLuint tmp_min, tmp_max; 357 GLuint i; 358 GLuint count; 359 360 *min_index = ~0; 361 *max_index = 0; 362 363 for (i = 0; i < nr_prims; i++) { 364 const struct _mesa_prim *start_prim; 365 366 start_prim = &prims[i]; 367 count = start_prim->count; 368 /* Do combination if possible to reduce map/unmap count */ 369 while ((i + 1 < nr_prims) && 370 (prims[i].start + prims[i].count == prims[i+1].start)) { 371 count += prims[i+1].count; 372 i++; 373 } 374 vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count); 375 *min_index = MIN2(*min_index, tmp_min); 376 *max_index = MAX2(*max_index, tmp_max); 377 } 378 } 379