1 /************************************************************************** 2 * 3 * Copyright 2011 Marek Olk <maraeo (at) gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * This module uploads user buffers and translates the vertex buffers which 30 * contain incompatible vertices (i.e. not supported by the driver/hardware) 31 * into compatible ones, based on the Gallium CAPs. 32 * 33 * It does not upload index buffers. 34 * 35 * The module heavily uses bitmasks to represent per-buffer and 36 * per-vertex-element flags to avoid looping over the list of buffers just 37 * to see if there's a non-zero stride, or user buffer, or unsupported format, 38 * etc. 39 * 40 * There are 3 categories of vertex elements, which are processed separately: 41 * - per-vertex attribs (stride != 0, instance_divisor == 0) 42 * - instanced attribs (stride != 0, instance_divisor > 0) 43 * - constant attribs (stride == 0) 44 * 45 * All needed uploads and translations are performed every draw command, but 46 * only the subset of vertices needed for that draw command is uploaded or 47 * translated. (the module never translates whole buffers) 48 * 49 * 50 * The module consists of two main parts: 51 * 52 * 53 * 1) Translate (u_vbuf_translate_begin/end) 54 * 55 * This is pretty much a vertex fetch fallback. It translates vertices from 56 * one vertex buffer to another in an unused vertex buffer slot. It does 57 * whatever is needed to make the vertices readable by the hardware (changes 58 * vertex formats and aligns offsets and strides). The translate module is 59 * used here. 60 * 61 * Each of the 3 categories is translated to a separate buffer. 62 * Only the [min_index, max_index] range is translated. For instanced attribs, 63 * the range is [start_instance, start_instance+instance_count]. For constant 64 * attribs, the range is [0, 1]. 65 * 66 * 67 * 2) User buffer uploading (u_vbuf_upload_buffers) 68 * 69 * Only the [min_index, max_index] range is uploaded (just like Translate) 70 * with a single memcpy. 71 * 72 * This method works best for non-indexed draw operations or indexed draw 73 * operations where the [min_index, max_index] range is not being way bigger 74 * than the vertex count. 75 * 76 * If the range is too big (e.g. one triangle with indices {0, 1, 10000}), 77 * the per-vertex attribs are uploaded via the translate module, all packed 78 * into one vertex buffer, and the indexed draw call is turned into 79 * a non-indexed one in the process. This adds additional complexity 80 * to the translate part, but it prevents bad apps from bringing your frame 81 * rate down. 82 * 83 * 84 * If there is nothing to do, it forwards every command to the driver. 85 * The module also has its own CSO cache of vertex element states. 86 */ 87 88 #include "util/u_vbuf.h" 89 90 #include "util/u_dump.h" 91 #include "util/u_format.h" 92 #include "util/u_inlines.h" 93 #include "util/u_memory.h" 94 #include "util/u_upload_mgr.h" 95 #include "translate/translate.h" 96 #include "translate/translate_cache.h" 97 #include "cso_cache/cso_cache.h" 98 #include "cso_cache/cso_hash.h" 99 100 struct u_vbuf_elements { 101 unsigned count; 102 struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; 103 104 unsigned src_format_size[PIPE_MAX_ATTRIBS]; 105 106 /* If (velem[i].src_format != native_format[i]), the vertex buffer 107 * referenced by the vertex element cannot be used for rendering and 108 * its vertex data must be translated to native_format[i]. */ 109 enum pipe_format native_format[PIPE_MAX_ATTRIBS]; 110 unsigned native_format_size[PIPE_MAX_ATTRIBS]; 111 112 /* Which buffers are used by the vertex element state. */ 113 uint32_t used_vb_mask; 114 /* This might mean two things: 115 * - src_format != native_format, as discussed above. 116 * - src_offset % 4 != 0 (if the caps don't allow such an offset). */ 117 uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib */ 118 /* Which buffer has at least one vertex element referencing it 119 * incompatible. */ 120 uint32_t incompatible_vb_mask_any; 121 /* Which buffer has all vertex elements referencing it incompatible. */ 122 uint32_t incompatible_vb_mask_all; 123 /* Which buffer has at least one vertex element referencing it 124 * compatible. */ 125 uint32_t compatible_vb_mask_any; 126 /* Which buffer has all vertex elements referencing it compatible. */ 127 uint32_t compatible_vb_mask_all; 128 129 /* Which buffer has at least one vertex element referencing it 130 * non-instanced. */ 131 uint32_t noninstance_vb_mask_any; 132 133 void *driver_cso; 134 }; 135 136 enum { 137 VB_VERTEX = 0, 138 VB_INSTANCE = 1, 139 VB_CONST = 2, 140 VB_NUM = 3 141 }; 142 143 struct u_vbuf { 144 struct u_vbuf_caps caps; 145 bool has_signed_vb_offset; 146 147 struct pipe_context *pipe; 148 struct translate_cache *translate_cache; 149 struct cso_cache *cso_cache; 150 151 /* This is what was set in set_vertex_buffers. 152 * May contain user buffers. */ 153 struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; 154 uint32_t enabled_vb_mask; 155 156 /* Saved vertex buffer. */ 157 unsigned aux_vertex_buffer_slot; 158 struct pipe_vertex_buffer aux_vertex_buffer_saved; 159 160 /* Vertex buffers for the driver. 161 * There are usually no user buffers. */ 162 struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS]; 163 uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last 164 call of set_vertex_buffers */ 165 166 /* Vertex elements. */ 167 struct u_vbuf_elements *ve, *ve_saved; 168 169 /* Vertex elements used for the translate fallback. */ 170 struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS]; 171 /* If non-NULL, this is a vertex element state used for the translate 172 * fallback and therefore used for rendering too. */ 173 boolean using_translate; 174 /* The vertex buffer slot index where translated vertices have been 175 * stored in. */ 176 unsigned fallback_vbs[VB_NUM]; 177 178 /* Which buffer is a user buffer. */ 179 uint32_t user_vb_mask; /* each bit describes a corresp. buffer */ 180 /* Which buffer is incompatible (unaligned). */ 181 uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ 182 /* Which buffer has a non-zero stride. */ 183 uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ 184 }; 185 186 static void * 187 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, 188 const struct pipe_vertex_element *attribs); 189 static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso); 190 191 static const struct { 192 enum pipe_format from, to; 193 } vbuf_format_fallbacks[] = { 194 { PIPE_FORMAT_R32_FIXED, PIPE_FORMAT_R32_FLOAT }, 195 { PIPE_FORMAT_R32G32_FIXED, PIPE_FORMAT_R32G32_FLOAT }, 196 { PIPE_FORMAT_R32G32B32_FIXED, PIPE_FORMAT_R32G32B32_FLOAT }, 197 { PIPE_FORMAT_R32G32B32A32_FIXED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 198 { PIPE_FORMAT_R16_FLOAT, PIPE_FORMAT_R32_FLOAT }, 199 { PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R32G32_FLOAT }, 200 { PIPE_FORMAT_R16G16B16_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT }, 201 { PIPE_FORMAT_R16G16B16A16_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT }, 202 { PIPE_FORMAT_R64_FLOAT, PIPE_FORMAT_R32_FLOAT }, 203 { PIPE_FORMAT_R64G64_FLOAT, PIPE_FORMAT_R32G32_FLOAT }, 204 { PIPE_FORMAT_R64G64B64_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT }, 205 { PIPE_FORMAT_R64G64B64A64_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT }, 206 { PIPE_FORMAT_R32_UNORM, PIPE_FORMAT_R32_FLOAT }, 207 { PIPE_FORMAT_R32G32_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 208 { PIPE_FORMAT_R32G32B32_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 209 { PIPE_FORMAT_R32G32B32A32_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 210 { PIPE_FORMAT_R32_SNORM, PIPE_FORMAT_R32_FLOAT }, 211 { PIPE_FORMAT_R32G32_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 212 { PIPE_FORMAT_R32G32B32_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 213 { PIPE_FORMAT_R32G32B32A32_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 214 { PIPE_FORMAT_R32_USCALED, PIPE_FORMAT_R32_FLOAT }, 215 { PIPE_FORMAT_R32G32_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 216 { PIPE_FORMAT_R32G32B32_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 217 { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 218 { PIPE_FORMAT_R32_SSCALED, PIPE_FORMAT_R32_FLOAT }, 219 { PIPE_FORMAT_R32G32_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 220 { PIPE_FORMAT_R32G32B32_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 221 { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 222 { PIPE_FORMAT_R16_UNORM, PIPE_FORMAT_R32_FLOAT }, 223 { PIPE_FORMAT_R16G16_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 224 { PIPE_FORMAT_R16G16B16_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 225 { PIPE_FORMAT_R16G16B16A16_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 226 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R32_FLOAT }, 227 { PIPE_FORMAT_R16G16_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 228 { PIPE_FORMAT_R16G16B16_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 229 { PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 230 { PIPE_FORMAT_R16_USCALED, PIPE_FORMAT_R32_FLOAT }, 231 { PIPE_FORMAT_R16G16_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 232 { PIPE_FORMAT_R16G16B16_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 233 { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 234 { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R32_FLOAT }, 235 { PIPE_FORMAT_R16G16_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 236 { PIPE_FORMAT_R16G16B16_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 237 { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 238 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R32_FLOAT }, 239 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R32G32_FLOAT }, 240 { PIPE_FORMAT_R8G8B8_UNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 241 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 242 { PIPE_FORMAT_R8_SNORM, PIPE_FORMAT_R32_FLOAT }, 243 { PIPE_FORMAT_R8G8_SNORM, PIPE_FORMAT_R32G32_FLOAT }, 244 { PIPE_FORMAT_R8G8B8_SNORM, PIPE_FORMAT_R32G32B32_FLOAT }, 245 { PIPE_FORMAT_R8G8B8A8_SNORM, PIPE_FORMAT_R32G32B32A32_FLOAT }, 246 { PIPE_FORMAT_R8_USCALED, PIPE_FORMAT_R32_FLOAT }, 247 { PIPE_FORMAT_R8G8_USCALED, PIPE_FORMAT_R32G32_FLOAT }, 248 { PIPE_FORMAT_R8G8B8_USCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 249 { PIPE_FORMAT_R8G8B8A8_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 250 { PIPE_FORMAT_R8_SSCALED, PIPE_FORMAT_R32_FLOAT }, 251 { PIPE_FORMAT_R8G8_SSCALED, PIPE_FORMAT_R32G32_FLOAT }, 252 { PIPE_FORMAT_R8G8B8_SSCALED, PIPE_FORMAT_R32G32B32_FLOAT }, 253 { PIPE_FORMAT_R8G8B8A8_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, 254 }; 255 256 boolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, 257 unsigned flags) 258 { 259 unsigned i; 260 boolean fallback = FALSE; 261 262 /* I'd rather have a bitfield of which formats are supported and a static 263 * table of the translations indexed by format, but since we don't have C99 264 * we can't easily make a sparsely-populated table indexed by format. So, 265 * we construct the sparse table here. 266 */ 267 for (i = 0; i < PIPE_FORMAT_COUNT; i++) 268 caps->format_translation[i] = i; 269 270 for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) { 271 enum pipe_format format = vbuf_format_fallbacks[i].from; 272 273 if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 274 PIPE_BIND_VERTEX_BUFFER)) { 275 caps->format_translation[format] = vbuf_format_fallbacks[i].to; 276 fallback = TRUE; 277 } 278 } 279 280 caps->buffer_offset_unaligned = 281 !screen->get_param(screen, 282 PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY); 283 caps->buffer_stride_unaligned = 284 !screen->get_param(screen, 285 PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY); 286 caps->velem_src_offset_unaligned = 287 !screen->get_param(screen, 288 PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY); 289 caps->user_vertex_buffers = 290 screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS); 291 292 if (!caps->buffer_offset_unaligned || 293 !caps->buffer_stride_unaligned || 294 !caps->velem_src_offset_unaligned || 295 (!(flags & U_VBUF_FLAG_NO_USER_VBOS) && !caps->user_vertex_buffers)) { 296 fallback = TRUE; 297 } 298 299 return fallback; 300 } 301 302 struct u_vbuf * 303 u_vbuf_create(struct pipe_context *pipe, 304 struct u_vbuf_caps *caps, unsigned aux_vertex_buffer_index) 305 { 306 struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf); 307 308 mgr->caps = *caps; 309 mgr->aux_vertex_buffer_slot = aux_vertex_buffer_index; 310 mgr->pipe = pipe; 311 mgr->cso_cache = cso_cache_create(); 312 mgr->translate_cache = translate_cache_create(); 313 memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs)); 314 315 mgr->has_signed_vb_offset = 316 pipe->screen->get_param(pipe->screen, 317 PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET); 318 319 return mgr; 320 } 321 322 /* u_vbuf uses its own caching for vertex elements, because it needs to keep 323 * its own preprocessed state per vertex element CSO. */ 324 static struct u_vbuf_elements * 325 u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count, 326 const struct pipe_vertex_element *states) 327 { 328 struct pipe_context *pipe = mgr->pipe; 329 unsigned key_size, hash_key; 330 struct cso_hash_iter iter; 331 struct u_vbuf_elements *ve; 332 struct cso_velems_state velems_state; 333 334 /* need to include the count into the stored state data too. */ 335 key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); 336 velems_state.count = count; 337 memcpy(velems_state.velems, states, 338 sizeof(struct pipe_vertex_element) * count); 339 hash_key = cso_construct_key((void*)&velems_state, key_size); 340 iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS, 341 (void*)&velems_state, key_size); 342 343 if (cso_hash_iter_is_null(iter)) { 344 struct cso_velements *cso = MALLOC_STRUCT(cso_velements); 345 memcpy(&cso->state, &velems_state, key_size); 346 cso->data = u_vbuf_create_vertex_elements(mgr, count, states); 347 cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements; 348 cso->context = (void*)mgr; 349 350 iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso); 351 ve = cso->data; 352 } else { 353 ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data; 354 } 355 356 assert(ve); 357 358 if (ve != mgr->ve) 359 pipe->bind_vertex_elements_state(pipe, ve->driver_cso); 360 361 return ve; 362 } 363 364 void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, 365 const struct pipe_vertex_element *states) 366 { 367 mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states); 368 } 369 370 void u_vbuf_destroy(struct u_vbuf *mgr) 371 { 372 struct pipe_screen *screen = mgr->pipe->screen; 373 unsigned i; 374 const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX, 375 PIPE_SHADER_CAP_MAX_INPUTS); 376 377 mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL); 378 379 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) 380 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]); 381 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) 382 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]); 383 384 pipe_vertex_buffer_unreference(&mgr->aux_vertex_buffer_saved); 385 386 translate_cache_destroy(mgr->translate_cache); 387 cso_cache_delete(mgr->cso_cache); 388 FREE(mgr); 389 } 390 391 static enum pipe_error 392 u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, 393 const struct pipe_draw_info *info, 394 unsigned vb_mask, unsigned out_vb, 395 int start_vertex, unsigned num_vertices, 396 int min_index, boolean unroll_indices) 397 { 398 struct translate *tr; 399 struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; 400 struct pipe_resource *out_buffer = NULL; 401 uint8_t *out_map; 402 unsigned out_offset, mask; 403 404 /* Get a translate object. */ 405 tr = translate_cache_find(mgr->translate_cache, key); 406 407 /* Map buffers we want to translate. */ 408 mask = vb_mask; 409 while (mask) { 410 struct pipe_vertex_buffer *vb; 411 unsigned offset; 412 uint8_t *map; 413 unsigned i = u_bit_scan(&mask); 414 415 vb = &mgr->vertex_buffer[i]; 416 offset = vb->buffer_offset + vb->stride * start_vertex; 417 418 if (vb->is_user_buffer) { 419 map = (uint8_t*)vb->buffer.user + offset; 420 } else { 421 unsigned size = vb->stride ? num_vertices * vb->stride 422 : sizeof(double)*4; 423 424 if (offset + size > vb->buffer.resource->width0) { 425 /* Don't try to map past end of buffer. This often happens when 426 * we're translating an attribute that's at offset > 0 from the 427 * start of the vertex. If we'd subtract attrib's offset from 428 * the size, this probably wouldn't happen. 429 */ 430 size = vb->buffer.resource->width0 - offset; 431 432 /* Also adjust num_vertices. A common user error is to call 433 * glDrawRangeElements() with incorrect 'end' argument. The 'end 434 * value should be the max index value, but people often 435 * accidentally add one to this value. This adjustment avoids 436 * crashing (by reading past the end of a hardware buffer mapping) 437 * when people do that. 438 */ 439 num_vertices = (size + vb->stride - 1) / vb->stride; 440 } 441 442 map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size, 443 PIPE_TRANSFER_READ, &vb_transfer[i]); 444 } 445 446 /* Subtract min_index so that indexing with the index buffer works. */ 447 if (unroll_indices) { 448 map -= (ptrdiff_t)vb->stride * min_index; 449 } 450 451 tr->set_buffer(tr, i, map, vb->stride, ~0); 452 } 453 454 /* Translate. */ 455 if (unroll_indices) { 456 struct pipe_transfer *transfer = NULL; 457 const unsigned offset = info->start * info->index_size; 458 uint8_t *map; 459 460 /* Create and map the output buffer. */ 461 u_upload_alloc(mgr->pipe->stream_uploader, 0, 462 key->output_stride * info->count, 4, 463 &out_offset, &out_buffer, 464 (void**)&out_map); 465 if (!out_buffer) 466 return PIPE_ERROR_OUT_OF_MEMORY; 467 468 if (info->has_user_indices) { 469 map = (uint8_t*)info->index.user + offset; 470 } else { 471 map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset, 472 info->count * info->index_size, 473 PIPE_TRANSFER_READ, &transfer); 474 } 475 476 switch (info->index_size) { 477 case 4: 478 tr->run_elts(tr, (unsigned*)map, info->count, 0, 0, out_map); 479 break; 480 case 2: 481 tr->run_elts16(tr, (uint16_t*)map, info->count, 0, 0, out_map); 482 break; 483 case 1: 484 tr->run_elts8(tr, map, info->count, 0, 0, out_map); 485 break; 486 } 487 488 if (transfer) { 489 pipe_buffer_unmap(mgr->pipe, transfer); 490 } 491 } else { 492 /* Create and map the output buffer. */ 493 u_upload_alloc(mgr->pipe->stream_uploader, 494 mgr->has_signed_vb_offset ? 495 0 : key->output_stride * start_vertex, 496 key->output_stride * num_vertices, 4, 497 &out_offset, &out_buffer, 498 (void**)&out_map); 499 if (!out_buffer) 500 return PIPE_ERROR_OUT_OF_MEMORY; 501 502 out_offset -= key->output_stride * start_vertex; 503 504 tr->run(tr, 0, num_vertices, 0, 0, out_map); 505 } 506 507 /* Unmap all buffers. */ 508 mask = vb_mask; 509 while (mask) { 510 unsigned i = u_bit_scan(&mask); 511 512 if (vb_transfer[i]) { 513 pipe_buffer_unmap(mgr->pipe, vb_transfer[i]); 514 } 515 } 516 517 /* Setup the new vertex buffer. */ 518 mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset; 519 mgr->real_vertex_buffer[out_vb].stride = key->output_stride; 520 521 /* Move the buffer reference. */ 522 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]); 523 mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer; 524 mgr->real_vertex_buffer[out_vb].is_user_buffer = false; 525 526 return PIPE_OK; 527 } 528 529 static boolean 530 u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, 531 unsigned mask[VB_NUM]) 532 { 533 unsigned type; 534 unsigned fallback_vbs[VB_NUM]; 535 /* Set the bit for each buffer which is incompatible, or isn't set. */ 536 uint32_t unused_vb_mask = 537 mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | 538 ~mgr->enabled_vb_mask; 539 540 memset(fallback_vbs, ~0, sizeof(fallback_vbs)); 541 542 /* Find free slots for each type if needed. */ 543 for (type = 0; type < VB_NUM; type++) { 544 if (mask[type]) { 545 uint32_t index; 546 547 if (!unused_vb_mask) { 548 return FALSE; 549 } 550 551 index = ffs(unused_vb_mask) - 1; 552 fallback_vbs[type] = index; 553 unused_vb_mask &= ~(1 << index); 554 /*printf("found slot=%i for type=%i\n", index, type);*/ 555 } 556 } 557 558 for (type = 0; type < VB_NUM; type++) { 559 if (mask[type]) { 560 mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type]; 561 } 562 } 563 564 memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs)); 565 return TRUE; 566 } 567 568 static boolean 569 u_vbuf_translate_begin(struct u_vbuf *mgr, 570 const struct pipe_draw_info *info, 571 int start_vertex, unsigned num_vertices, 572 int min_index, boolean unroll_indices) 573 { 574 unsigned mask[VB_NUM] = {0}; 575 struct translate_key key[VB_NUM]; 576 unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */ 577 unsigned i, type; 578 const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask & 579 mgr->ve->used_vb_mask; 580 581 const int start[VB_NUM] = { 582 start_vertex, /* VERTEX */ 583 info->start_instance, /* INSTANCE */ 584 0 /* CONST */ 585 }; 586 587 const unsigned num[VB_NUM] = { 588 num_vertices, /* VERTEX */ 589 info->instance_count, /* INSTANCE */ 590 1 /* CONST */ 591 }; 592 593 memset(key, 0, sizeof(key)); 594 memset(elem_index, ~0, sizeof(elem_index)); 595 596 /* See if there are vertex attribs of each type to translate and 597 * which ones. */ 598 for (i = 0; i < mgr->ve->count; i++) { 599 unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index; 600 601 if (!mgr->vertex_buffer[vb_index].stride) { 602 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 603 !(incompatible_vb_mask & (1 << vb_index))) { 604 continue; 605 } 606 mask[VB_CONST] |= 1 << vb_index; 607 } else if (mgr->ve->ve[i].instance_divisor) { 608 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 609 !(incompatible_vb_mask & (1 << vb_index))) { 610 continue; 611 } 612 mask[VB_INSTANCE] |= 1 << vb_index; 613 } else { 614 if (!unroll_indices && 615 !(mgr->ve->incompatible_elem_mask & (1 << i)) && 616 !(incompatible_vb_mask & (1 << vb_index))) { 617 continue; 618 } 619 mask[VB_VERTEX] |= 1 << vb_index; 620 } 621 } 622 623 assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]); 624 625 /* Find free vertex buffer slots. */ 626 if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) { 627 return FALSE; 628 } 629 630 /* Initialize the translate keys. */ 631 for (i = 0; i < mgr->ve->count; i++) { 632 struct translate_key *k; 633 struct translate_element *te; 634 enum pipe_format output_format = mgr->ve->native_format[i]; 635 unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index; 636 bit = 1 << vb_index; 637 638 if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && 639 !(incompatible_vb_mask & (1 << vb_index)) && 640 (!unroll_indices || !(mask[VB_VERTEX] & bit))) { 641 continue; 642 } 643 644 /* Set type to what we will translate. 645 * Whether vertex, instance, or constant attribs. */ 646 for (type = 0; type < VB_NUM; type++) { 647 if (mask[type] & bit) { 648 break; 649 } 650 } 651 assert(type < VB_NUM); 652 if (mgr->ve->ve[i].src_format != output_format) 653 assert(translate_is_output_format_supported(output_format)); 654 /*printf("velem=%i type=%i\n", i, type);*/ 655 656 /* Add the vertex element. */ 657 k = &key[type]; 658 elem_index[type][i] = k->nr_elements; 659 660 te = &k->element[k->nr_elements]; 661 te->type = TRANSLATE_ELEMENT_NORMAL; 662 te->instance_divisor = 0; 663 te->input_buffer = vb_index; 664 te->input_format = mgr->ve->ve[i].src_format; 665 te->input_offset = mgr->ve->ve[i].src_offset; 666 te->output_format = output_format; 667 te->output_offset = k->output_stride; 668 669 k->output_stride += mgr->ve->native_format_size[i]; 670 k->nr_elements++; 671 } 672 673 /* Translate buffers. */ 674 for (type = 0; type < VB_NUM; type++) { 675 if (key[type].nr_elements) { 676 enum pipe_error err; 677 err = u_vbuf_translate_buffers(mgr, &key[type], info, mask[type], 678 mgr->fallback_vbs[type], 679 start[type], num[type], min_index, 680 unroll_indices && type == VB_VERTEX); 681 if (err != PIPE_OK) 682 return FALSE; 683 684 /* Fixup the stride for constant attribs. */ 685 if (type == VB_CONST) { 686 mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0; 687 } 688 } 689 } 690 691 /* Setup new vertex elements. */ 692 for (i = 0; i < mgr->ve->count; i++) { 693 for (type = 0; type < VB_NUM; type++) { 694 if (elem_index[type][i] < key[type].nr_elements) { 695 struct translate_element *te = &key[type].element[elem_index[type][i]]; 696 mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor; 697 mgr->fallback_velems[i].src_format = te->output_format; 698 mgr->fallback_velems[i].src_offset = te->output_offset; 699 mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type]; 700 701 /* elem_index[type][i] can only be set for one type. */ 702 assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u); 703 assert(type > VB_VERTEX || elem_index[type+2][i] == ~0u); 704 break; 705 } 706 } 707 /* No translating, just copy the original vertex element over. */ 708 if (type == VB_NUM) { 709 memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i], 710 sizeof(struct pipe_vertex_element)); 711 } 712 } 713 714 u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count, 715 mgr->fallback_velems); 716 mgr->using_translate = TRUE; 717 return TRUE; 718 } 719 720 static void u_vbuf_translate_end(struct u_vbuf *mgr) 721 { 722 unsigned i; 723 724 /* Restore vertex elements. */ 725 mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso); 726 mgr->using_translate = FALSE; 727 728 /* Unreference the now-unused VBOs. */ 729 for (i = 0; i < VB_NUM; i++) { 730 unsigned vb = mgr->fallback_vbs[i]; 731 if (vb != ~0u) { 732 pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL); 733 mgr->fallback_vbs[i] = ~0; 734 735 /* This will cause the buffer to be unbound in the driver later. */ 736 mgr->dirty_real_vb_mask |= 1 << vb; 737 } 738 } 739 } 740 741 static void * 742 u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, 743 const struct pipe_vertex_element *attribs) 744 { 745 struct pipe_context *pipe = mgr->pipe; 746 unsigned i; 747 struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS]; 748 struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements); 749 uint32_t used_buffers = 0; 750 751 ve->count = count; 752 753 memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count); 754 memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count); 755 756 /* Set the best native format in case the original format is not 757 * supported. */ 758 for (i = 0; i < count; i++) { 759 enum pipe_format format = ve->ve[i].src_format; 760 761 ve->src_format_size[i] = util_format_get_blocksize(format); 762 763 used_buffers |= 1 << ve->ve[i].vertex_buffer_index; 764 765 if (!ve->ve[i].instance_divisor) { 766 ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 767 } 768 769 format = mgr->caps.format_translation[format]; 770 771 driver_attribs[i].src_format = format; 772 ve->native_format[i] = format; 773 ve->native_format_size[i] = 774 util_format_get_blocksize(ve->native_format[i]); 775 776 if (ve->ve[i].src_format != format || 777 (!mgr->caps.velem_src_offset_unaligned && 778 ve->ve[i].src_offset % 4 != 0)) { 779 ve->incompatible_elem_mask |= 1 << i; 780 ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 781 } else { 782 ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; 783 } 784 } 785 786 ve->used_vb_mask = used_buffers; 787 ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers; 788 ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers; 789 790 /* Align the formats and offsets to the size of DWORD if needed. */ 791 if (!mgr->caps.velem_src_offset_unaligned) { 792 for (i = 0; i < count; i++) { 793 ve->native_format_size[i] = align(ve->native_format_size[i], 4); 794 driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4); 795 } 796 } 797 798 ve->driver_cso = 799 pipe->create_vertex_elements_state(pipe, count, driver_attribs); 800 return ve; 801 } 802 803 static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso) 804 { 805 struct pipe_context *pipe = mgr->pipe; 806 struct u_vbuf_elements *ve = cso; 807 808 pipe->delete_vertex_elements_state(pipe, ve->driver_cso); 809 FREE(ve); 810 } 811 812 void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, 813 unsigned start_slot, unsigned count, 814 const struct pipe_vertex_buffer *bufs) 815 { 816 unsigned i; 817 /* which buffers are enabled */ 818 uint32_t enabled_vb_mask = 0; 819 /* which buffers are in user memory */ 820 uint32_t user_vb_mask = 0; 821 /* which buffers are incompatible with the driver */ 822 uint32_t incompatible_vb_mask = 0; 823 /* which buffers have a non-zero stride */ 824 uint32_t nonzero_stride_vb_mask = 0; 825 const uint32_t mask = ~(((1ull << count) - 1) << start_slot); 826 827 /* Zero out the bits we are going to rewrite completely. */ 828 mgr->user_vb_mask &= mask; 829 mgr->incompatible_vb_mask &= mask; 830 mgr->nonzero_stride_vb_mask &= mask; 831 mgr->enabled_vb_mask &= mask; 832 833 if (!bufs) { 834 struct pipe_context *pipe = mgr->pipe; 835 /* Unbind. */ 836 mgr->dirty_real_vb_mask &= mask; 837 838 for (i = 0; i < count; i++) { 839 unsigned dst_index = start_slot + i; 840 841 pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]); 842 pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); 843 } 844 845 pipe->set_vertex_buffers(pipe, start_slot, count, NULL); 846 return; 847 } 848 849 for (i = 0; i < count; i++) { 850 unsigned dst_index = start_slot + i; 851 const struct pipe_vertex_buffer *vb = &bufs[i]; 852 struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index]; 853 struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index]; 854 855 if (!vb->buffer.resource) { 856 pipe_vertex_buffer_unreference(orig_vb); 857 pipe_vertex_buffer_unreference(real_vb); 858 continue; 859 } 860 861 pipe_vertex_buffer_reference(orig_vb, vb); 862 863 if (vb->stride) { 864 nonzero_stride_vb_mask |= 1 << dst_index; 865 } 866 enabled_vb_mask |= 1 << dst_index; 867 868 if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) || 869 (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) { 870 incompatible_vb_mask |= 1 << dst_index; 871 real_vb->buffer_offset = vb->buffer_offset; 872 real_vb->stride = vb->stride; 873 pipe_vertex_buffer_unreference(real_vb); 874 real_vb->is_user_buffer = false; 875 continue; 876 } 877 878 if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) { 879 user_vb_mask |= 1 << dst_index; 880 real_vb->buffer_offset = vb->buffer_offset; 881 real_vb->stride = vb->stride; 882 pipe_vertex_buffer_unreference(real_vb); 883 real_vb->is_user_buffer = false; 884 continue; 885 } 886 887 pipe_vertex_buffer_reference(real_vb, vb); 888 } 889 890 mgr->user_vb_mask |= user_vb_mask; 891 mgr->incompatible_vb_mask |= incompatible_vb_mask; 892 mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; 893 mgr->enabled_vb_mask |= enabled_vb_mask; 894 895 /* All changed buffers are marked as dirty, even the NULL ones, 896 * which will cause the NULL buffers to be unbound in the driver later. */ 897 mgr->dirty_real_vb_mask |= ~mask; 898 } 899 900 static enum pipe_error 901 u_vbuf_upload_buffers(struct u_vbuf *mgr, 902 int start_vertex, unsigned num_vertices, 903 int start_instance, unsigned num_instances) 904 { 905 unsigned i; 906 unsigned nr_velems = mgr->ve->count; 907 const struct pipe_vertex_element *velems = 908 mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; 909 unsigned start_offset[PIPE_MAX_ATTRIBS]; 910 unsigned end_offset[PIPE_MAX_ATTRIBS]; 911 uint32_t buffer_mask = 0; 912 913 /* Determine how much data needs to be uploaded. */ 914 for (i = 0; i < nr_velems; i++) { 915 const struct pipe_vertex_element *velem = &velems[i]; 916 unsigned index = velem->vertex_buffer_index; 917 struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; 918 unsigned instance_div, first, size, index_bit; 919 920 /* Skip the buffers generated by translate. */ 921 if (index == mgr->fallback_vbs[VB_VERTEX] || 922 index == mgr->fallback_vbs[VB_INSTANCE] || 923 index == mgr->fallback_vbs[VB_CONST]) { 924 continue; 925 } 926 927 if (!vb->is_user_buffer) { 928 continue; 929 } 930 931 instance_div = velem->instance_divisor; 932 first = vb->buffer_offset + velem->src_offset; 933 934 if (!vb->stride) { 935 /* Constant attrib. */ 936 size = mgr->ve->src_format_size[i]; 937 } else if (instance_div) { 938 /* Per-instance attrib. */ 939 unsigned count = (num_instances + instance_div - 1) / instance_div; 940 first += vb->stride * start_instance; 941 size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; 942 } else { 943 /* Per-vertex attrib. */ 944 first += vb->stride * start_vertex; 945 size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i]; 946 } 947 948 index_bit = 1 << index; 949 950 /* Update offsets. */ 951 if (!(buffer_mask & index_bit)) { 952 start_offset[index] = first; 953 end_offset[index] = first + size; 954 } else { 955 if (first < start_offset[index]) 956 start_offset[index] = first; 957 if (first + size > end_offset[index]) 958 end_offset[index] = first + size; 959 } 960 961 buffer_mask |= index_bit; 962 } 963 964 /* Upload buffers. */ 965 while (buffer_mask) { 966 unsigned start, end; 967 struct pipe_vertex_buffer *real_vb; 968 const uint8_t *ptr; 969 970 i = u_bit_scan(&buffer_mask); 971 972 start = start_offset[i]; 973 end = end_offset[i]; 974 assert(start < end); 975 976 real_vb = &mgr->real_vertex_buffer[i]; 977 ptr = mgr->vertex_buffer[i].buffer.user; 978 979 u_upload_data(mgr->pipe->stream_uploader, 980 mgr->has_signed_vb_offset ? 0 : start, 981 end - start, 4, 982 ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource); 983 if (!real_vb->buffer.resource) 984 return PIPE_ERROR_OUT_OF_MEMORY; 985 986 real_vb->buffer_offset -= start; 987 } 988 989 return PIPE_OK; 990 } 991 992 static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr) 993 { 994 /* See if there are any per-vertex attribs which will be uploaded or 995 * translated. Use bitmasks to get the info instead of looping over vertex 996 * elements. */ 997 return (mgr->ve->used_vb_mask & 998 ((mgr->user_vb_mask | 999 mgr->incompatible_vb_mask | 1000 mgr->ve->incompatible_vb_mask_any) & 1001 mgr->ve->noninstance_vb_mask_any & 1002 mgr->nonzero_stride_vb_mask)) != 0; 1003 } 1004 1005 static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) 1006 { 1007 /* Return true if there are hw buffers which don't need to be translated. 1008 * 1009 * We could query whether each buffer is busy, but that would 1010 * be way more costly than this. */ 1011 return (mgr->ve->used_vb_mask & 1012 (~mgr->user_vb_mask & 1013 ~mgr->incompatible_vb_mask & 1014 mgr->ve->compatible_vb_mask_all & 1015 mgr->ve->noninstance_vb_mask_any & 1016 mgr->nonzero_stride_vb_mask)) != 0; 1017 } 1018 1019 static void u_vbuf_get_minmax_index(struct pipe_context *pipe, 1020 const struct pipe_draw_info *info, 1021 int *out_min_index, int *out_max_index) 1022 { 1023 struct pipe_transfer *transfer = NULL; 1024 const void *indices; 1025 unsigned i; 1026 1027 if (info->has_user_indices) { 1028 indices = (uint8_t*)info->index.user + 1029 info->start * info->index_size; 1030 } else { 1031 indices = pipe_buffer_map_range(pipe, info->index.resource, 1032 info->start * info->index_size, 1033 info->count * info->index_size, 1034 PIPE_TRANSFER_READ, &transfer); 1035 } 1036 1037 switch (info->index_size) { 1038 case 4: { 1039 const unsigned *ui_indices = (const unsigned*)indices; 1040 unsigned max_ui = 0; 1041 unsigned min_ui = ~0U; 1042 if (info->primitive_restart) { 1043 for (i = 0; i < info->count; i++) { 1044 if (ui_indices[i] != info->restart_index) { 1045 if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; 1046 if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; 1047 } 1048 } 1049 } 1050 else { 1051 for (i = 0; i < info->count; i++) { 1052 if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; 1053 if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; 1054 } 1055 } 1056 *out_min_index = min_ui; 1057 *out_max_index = max_ui; 1058 break; 1059 } 1060 case 2: { 1061 const unsigned short *us_indices = (const unsigned short*)indices; 1062 unsigned max_us = 0; 1063 unsigned min_us = ~0U; 1064 if (info->primitive_restart) { 1065 for (i = 0; i < info->count; i++) { 1066 if (us_indices[i] != info->restart_index) { 1067 if (us_indices[i] > max_us) max_us = us_indices[i]; 1068 if (us_indices[i] < min_us) min_us = us_indices[i]; 1069 } 1070 } 1071 } 1072 else { 1073 for (i = 0; i < info->count; i++) { 1074 if (us_indices[i] > max_us) max_us = us_indices[i]; 1075 if (us_indices[i] < min_us) min_us = us_indices[i]; 1076 } 1077 } 1078 *out_min_index = min_us; 1079 *out_max_index = max_us; 1080 break; 1081 } 1082 case 1: { 1083 const unsigned char *ub_indices = (const unsigned char*)indices; 1084 unsigned max_ub = 0; 1085 unsigned min_ub = ~0U; 1086 if (info->primitive_restart) { 1087 for (i = 0; i < info->count; i++) { 1088 if (ub_indices[i] != info->restart_index) { 1089 if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; 1090 if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; 1091 } 1092 } 1093 } 1094 else { 1095 for (i = 0; i < info->count; i++) { 1096 if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; 1097 if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; 1098 } 1099 } 1100 *out_min_index = min_ub; 1101 *out_max_index = max_ub; 1102 break; 1103 } 1104 default: 1105 assert(0); 1106 *out_min_index = 0; 1107 *out_max_index = 0; 1108 } 1109 1110 if (transfer) { 1111 pipe_buffer_unmap(pipe, transfer); 1112 } 1113 } 1114 1115 static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) 1116 { 1117 struct pipe_context *pipe = mgr->pipe; 1118 unsigned start_slot, count; 1119 1120 start_slot = ffs(mgr->dirty_real_vb_mask) - 1; 1121 count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot); 1122 1123 pipe->set_vertex_buffers(pipe, start_slot, count, 1124 mgr->real_vertex_buffer + start_slot); 1125 mgr->dirty_real_vb_mask = 0; 1126 } 1127 1128 void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) 1129 { 1130 struct pipe_context *pipe = mgr->pipe; 1131 int start_vertex, min_index; 1132 unsigned num_vertices; 1133 boolean unroll_indices = FALSE; 1134 const uint32_t used_vb_mask = mgr->ve->used_vb_mask; 1135 uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask; 1136 const uint32_t incompatible_vb_mask = 1137 mgr->incompatible_vb_mask & used_vb_mask; 1138 struct pipe_draw_info new_info; 1139 1140 /* Normal draw. No fallback and no user buffers. */ 1141 if (!incompatible_vb_mask && 1142 !mgr->ve->incompatible_elem_mask && 1143 !user_vb_mask) { 1144 1145 /* Set vertex buffers if needed. */ 1146 if (mgr->dirty_real_vb_mask & used_vb_mask) { 1147 u_vbuf_set_driver_vertex_buffers(mgr); 1148 } 1149 1150 pipe->draw_vbo(pipe, info); 1151 return; 1152 } 1153 1154 new_info = *info; 1155 1156 /* Fallback. We need to know all the parameters. */ 1157 if (new_info.indirect) { 1158 struct pipe_transfer *transfer = NULL; 1159 int *data; 1160 1161 if (new_info.index_size) { 1162 data = pipe_buffer_map_range(pipe, new_info.indirect->buffer, 1163 new_info.indirect->offset, 20, 1164 PIPE_TRANSFER_READ, &transfer); 1165 new_info.index_bias = data[3]; 1166 new_info.start_instance = data[4]; 1167 } 1168 else { 1169 data = pipe_buffer_map_range(pipe, new_info.indirect->buffer, 1170 new_info.indirect->offset, 16, 1171 PIPE_TRANSFER_READ, &transfer); 1172 new_info.start_instance = data[3]; 1173 } 1174 1175 new_info.count = data[0]; 1176 new_info.instance_count = data[1]; 1177 new_info.start = data[2]; 1178 pipe_buffer_unmap(pipe, transfer); 1179 new_info.indirect = NULL; 1180 } 1181 1182 if (new_info.index_size) { 1183 /* See if anything needs to be done for per-vertex attribs. */ 1184 if (u_vbuf_need_minmax_index(mgr)) { 1185 int max_index; 1186 1187 if (new_info.max_index != ~0u) { 1188 min_index = new_info.min_index; 1189 max_index = new_info.max_index; 1190 } else { 1191 u_vbuf_get_minmax_index(mgr->pipe, &new_info, 1192 &min_index, &max_index); 1193 } 1194 1195 assert(min_index <= max_index); 1196 1197 start_vertex = min_index + new_info.index_bias; 1198 num_vertices = max_index + 1 - min_index; 1199 1200 /* Primitive restart doesn't work when unrolling indices. 1201 * We would have to break this drawing operation into several ones. */ 1202 /* Use some heuristic to see if unrolling indices improves 1203 * performance. */ 1204 if (!new_info.primitive_restart && 1205 num_vertices > new_info.count*2 && 1206 num_vertices - new_info.count > 32 && 1207 !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { 1208 unroll_indices = TRUE; 1209 user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & 1210 mgr->ve->noninstance_vb_mask_any); 1211 } 1212 } else { 1213 /* Nothing to do for per-vertex attribs. */ 1214 start_vertex = 0; 1215 num_vertices = 0; 1216 min_index = 0; 1217 } 1218 } else { 1219 start_vertex = new_info.start; 1220 num_vertices = new_info.count; 1221 min_index = 0; 1222 } 1223 1224 /* Translate vertices with non-native layouts or formats. */ 1225 if (unroll_indices || 1226 incompatible_vb_mask || 1227 mgr->ve->incompatible_elem_mask) { 1228 if (!u_vbuf_translate_begin(mgr, &new_info, start_vertex, num_vertices, 1229 min_index, unroll_indices)) { 1230 debug_warn_once("u_vbuf_translate_begin() failed"); 1231 return; 1232 } 1233 1234 if (unroll_indices) { 1235 new_info.index_size = 0; 1236 new_info.index_bias = 0; 1237 new_info.min_index = 0; 1238 new_info.max_index = new_info.count - 1; 1239 new_info.start = 0; 1240 } 1241 1242 user_vb_mask &= ~(incompatible_vb_mask | 1243 mgr->ve->incompatible_vb_mask_all); 1244 } 1245 1246 /* Upload user buffers. */ 1247 if (user_vb_mask) { 1248 if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, 1249 new_info.start_instance, 1250 new_info.instance_count) != PIPE_OK) { 1251 debug_warn_once("u_vbuf_upload_buffers() failed"); 1252 return; 1253 } 1254 1255 mgr->dirty_real_vb_mask |= user_vb_mask; 1256 } 1257 1258 /* 1259 if (unroll_indices) { 1260 printf("unrolling indices: start_vertex = %i, num_vertices = %i\n", 1261 start_vertex, num_vertices); 1262 util_dump_draw_info(stdout, info); 1263 printf("\n"); 1264 } 1265 1266 unsigned i; 1267 for (i = 0; i < mgr->nr_vertex_buffers; i++) { 1268 printf("input %i: ", i); 1269 util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i); 1270 printf("\n"); 1271 } 1272 for (i = 0; i < mgr->nr_real_vertex_buffers; i++) { 1273 printf("real %i: ", i); 1274 util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i); 1275 printf("\n"); 1276 } 1277 */ 1278 1279 u_upload_unmap(pipe->stream_uploader); 1280 u_vbuf_set_driver_vertex_buffers(mgr); 1281 1282 pipe->draw_vbo(pipe, &new_info); 1283 1284 if (mgr->using_translate) { 1285 u_vbuf_translate_end(mgr); 1286 } 1287 } 1288 1289 void u_vbuf_save_vertex_elements(struct u_vbuf *mgr) 1290 { 1291 assert(!mgr->ve_saved); 1292 mgr->ve_saved = mgr->ve; 1293 } 1294 1295 void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr) 1296 { 1297 if (mgr->ve != mgr->ve_saved) { 1298 struct pipe_context *pipe = mgr->pipe; 1299 1300 mgr->ve = mgr->ve_saved; 1301 pipe->bind_vertex_elements_state(pipe, 1302 mgr->ve ? mgr->ve->driver_cso : NULL); 1303 } 1304 mgr->ve_saved = NULL; 1305 } 1306 1307 void u_vbuf_save_aux_vertex_buffer_slot(struct u_vbuf *mgr) 1308 { 1309 pipe_vertex_buffer_reference(&mgr->aux_vertex_buffer_saved, 1310 &mgr->vertex_buffer[mgr->aux_vertex_buffer_slot]); 1311 } 1312 1313 void u_vbuf_restore_aux_vertex_buffer_slot(struct u_vbuf *mgr) 1314 { 1315 u_vbuf_set_vertex_buffers(mgr, mgr->aux_vertex_buffer_slot, 1, 1316 &mgr->aux_vertex_buffer_saved); 1317 pipe_vertex_buffer_unreference(&mgr->aux_vertex_buffer_saved); 1318 } 1319