1 /* 2 * Copyright 2003 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 #include "main/bufferobj.h" 27 #include "main/context.h" 28 #include "main/enums.h" 29 #include "main/macros.h" 30 #include "main/glformats.h" 31 32 #include "brw_draw.h" 33 #include "brw_defines.h" 34 #include "brw_context.h" 35 #include "brw_state.h" 36 37 #include "intel_batchbuffer.h" 38 #include "intel_buffer_objects.h" 39 40 static const GLuint double_types_float[5] = { 41 0, 42 ISL_FORMAT_R64_FLOAT, 43 ISL_FORMAT_R64G64_FLOAT, 44 ISL_FORMAT_R64G64B64_FLOAT, 45 ISL_FORMAT_R64G64B64A64_FLOAT 46 }; 47 48 static const GLuint double_types_passthru[5] = { 49 0, 50 ISL_FORMAT_R64_PASSTHRU, 51 ISL_FORMAT_R64G64_PASSTHRU, 52 ISL_FORMAT_R64G64B64_PASSTHRU, 53 ISL_FORMAT_R64G64B64A64_PASSTHRU 54 }; 55 56 static const GLuint float_types[5] = { 57 0, 58 ISL_FORMAT_R32_FLOAT, 59 ISL_FORMAT_R32G32_FLOAT, 60 ISL_FORMAT_R32G32B32_FLOAT, 61 ISL_FORMAT_R32G32B32A32_FLOAT 62 }; 63 64 static const GLuint half_float_types[5] = { 65 0, 66 ISL_FORMAT_R16_FLOAT, 67 ISL_FORMAT_R16G16_FLOAT, 68 ISL_FORMAT_R16G16B16_FLOAT, 69 ISL_FORMAT_R16G16B16A16_FLOAT 70 }; 71 72 static const GLuint fixed_point_types[5] = { 73 0, 74 ISL_FORMAT_R32_SFIXED, 75 ISL_FORMAT_R32G32_SFIXED, 76 ISL_FORMAT_R32G32B32_SFIXED, 77 ISL_FORMAT_R32G32B32A32_SFIXED, 78 }; 79 80 static const GLuint uint_types_direct[5] = { 81 0, 82 ISL_FORMAT_R32_UINT, 83 ISL_FORMAT_R32G32_UINT, 84 ISL_FORMAT_R32G32B32_UINT, 85 ISL_FORMAT_R32G32B32A32_UINT 86 }; 87 88 static const GLuint uint_types_norm[5] = { 89 0, 90 ISL_FORMAT_R32_UNORM, 91 ISL_FORMAT_R32G32_UNORM, 92 ISL_FORMAT_R32G32B32_UNORM, 93 ISL_FORMAT_R32G32B32A32_UNORM 94 }; 95 96 static const GLuint uint_types_scale[5] = { 97 0, 98 ISL_FORMAT_R32_USCALED, 99 ISL_FORMAT_R32G32_USCALED, 100 ISL_FORMAT_R32G32B32_USCALED, 101 ISL_FORMAT_R32G32B32A32_USCALED 102 }; 103 104 static const GLuint int_types_direct[5] = { 105 0, 106 ISL_FORMAT_R32_SINT, 107 ISL_FORMAT_R32G32_SINT, 108 ISL_FORMAT_R32G32B32_SINT, 109 ISL_FORMAT_R32G32B32A32_SINT 110 }; 111 112 static const GLuint int_types_norm[5] = { 113 0, 114 ISL_FORMAT_R32_SNORM, 115 ISL_FORMAT_R32G32_SNORM, 116 ISL_FORMAT_R32G32B32_SNORM, 117 ISL_FORMAT_R32G32B32A32_SNORM 118 }; 119 120 static const GLuint int_types_scale[5] = { 121 0, 122 ISL_FORMAT_R32_SSCALED, 123 ISL_FORMAT_R32G32_SSCALED, 124 ISL_FORMAT_R32G32B32_SSCALED, 125 ISL_FORMAT_R32G32B32A32_SSCALED 126 }; 127 128 static const GLuint ushort_types_direct[5] = { 129 0, 130 ISL_FORMAT_R16_UINT, 131 ISL_FORMAT_R16G16_UINT, 132 ISL_FORMAT_R16G16B16_UINT, 133 ISL_FORMAT_R16G16B16A16_UINT 134 }; 135 136 static const GLuint ushort_types_norm[5] = { 137 0, 138 ISL_FORMAT_R16_UNORM, 139 ISL_FORMAT_R16G16_UNORM, 140 ISL_FORMAT_R16G16B16_UNORM, 141 ISL_FORMAT_R16G16B16A16_UNORM 142 }; 143 144 static const GLuint ushort_types_scale[5] = { 145 0, 146 ISL_FORMAT_R16_USCALED, 147 ISL_FORMAT_R16G16_USCALED, 148 ISL_FORMAT_R16G16B16_USCALED, 149 ISL_FORMAT_R16G16B16A16_USCALED 150 }; 151 152 static const GLuint short_types_direct[5] = { 153 0, 154 ISL_FORMAT_R16_SINT, 155 ISL_FORMAT_R16G16_SINT, 156 ISL_FORMAT_R16G16B16_SINT, 157 ISL_FORMAT_R16G16B16A16_SINT 158 }; 159 160 static const GLuint short_types_norm[5] = { 161 0, 162 ISL_FORMAT_R16_SNORM, 163 ISL_FORMAT_R16G16_SNORM, 164 ISL_FORMAT_R16G16B16_SNORM, 165 ISL_FORMAT_R16G16B16A16_SNORM 166 }; 167 168 static const GLuint short_types_scale[5] = { 169 0, 170 ISL_FORMAT_R16_SSCALED, 171 ISL_FORMAT_R16G16_SSCALED, 172 ISL_FORMAT_R16G16B16_SSCALED, 173 ISL_FORMAT_R16G16B16A16_SSCALED 174 }; 175 176 static const GLuint ubyte_types_direct[5] = { 177 0, 178 ISL_FORMAT_R8_UINT, 179 ISL_FORMAT_R8G8_UINT, 180 ISL_FORMAT_R8G8B8_UINT, 181 ISL_FORMAT_R8G8B8A8_UINT 182 }; 183 184 static const GLuint ubyte_types_norm[5] = { 185 0, 186 ISL_FORMAT_R8_UNORM, 187 ISL_FORMAT_R8G8_UNORM, 188 ISL_FORMAT_R8G8B8_UNORM, 189 ISL_FORMAT_R8G8B8A8_UNORM 190 }; 191 192 static const GLuint ubyte_types_scale[5] = { 193 0, 194 ISL_FORMAT_R8_USCALED, 195 ISL_FORMAT_R8G8_USCALED, 196 ISL_FORMAT_R8G8B8_USCALED, 197 ISL_FORMAT_R8G8B8A8_USCALED 198 }; 199 200 static const GLuint byte_types_direct[5] = { 201 0, 202 ISL_FORMAT_R8_SINT, 203 ISL_FORMAT_R8G8_SINT, 204 ISL_FORMAT_R8G8B8_SINT, 205 ISL_FORMAT_R8G8B8A8_SINT 206 }; 207 208 static const GLuint byte_types_norm[5] = { 209 0, 210 ISL_FORMAT_R8_SNORM, 211 ISL_FORMAT_R8G8_SNORM, 212 ISL_FORMAT_R8G8B8_SNORM, 213 ISL_FORMAT_R8G8B8A8_SNORM 214 }; 215 216 static const GLuint byte_types_scale[5] = { 217 0, 218 ISL_FORMAT_R8_SSCALED, 219 ISL_FORMAT_R8G8_SSCALED, 220 ISL_FORMAT_R8G8B8_SSCALED, 221 ISL_FORMAT_R8G8B8A8_SSCALED 222 }; 223 224 static GLuint 225 double_types(struct brw_context *brw, 226 int size, 227 GLboolean doubles) 228 { 229 /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): 230 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, 231 * 64-bit components are stored in the URB without any conversion." 232 * Also included on BDW PRM, Volume 7, page 470, table "Source Element 233 * Formats Supported in VF Unit" 234 * 235 * Previous PRMs don't include those references, so for gen7 we can't use 236 * PASSTHRU formats directly. But in any case, we prefer to return passthru 237 * even in that case, because that reflects what we want to achieve, even 238 * if we would need to workaround on gen < 8. 239 */ 240 return (doubles 241 ? double_types_passthru[size] 242 : double_types_float[size]); 243 } 244 245 /** 246 * Given vertex array type/size/format/normalized info, return 247 * the appopriate hardware surface type. 248 * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. 249 */ 250 unsigned 251 brw_get_vertex_surface_type(struct brw_context *brw, 252 const struct gl_vertex_array *glarray) 253 { 254 int size = glarray->Size; 255 const struct gen_device_info *devinfo = &brw->screen->devinfo; 256 const bool is_ivybridge_or_older = 257 devinfo->gen <= 7 && !devinfo->is_baytrail && !devinfo->is_haswell; 258 259 if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) 260 fprintf(stderr, "type %s size %d normalized %d\n", 261 _mesa_enum_to_string(glarray->Type), 262 glarray->Size, glarray->Normalized); 263 264 if (glarray->Integer) { 265 assert(glarray->Format == GL_RGBA); /* sanity check */ 266 switch (glarray->Type) { 267 case GL_INT: return int_types_direct[size]; 268 case GL_SHORT: 269 if (is_ivybridge_or_older && size == 3) 270 return short_types_direct[4]; 271 else 272 return short_types_direct[size]; 273 case GL_BYTE: 274 if (is_ivybridge_or_older && size == 3) 275 return byte_types_direct[4]; 276 else 277 return byte_types_direct[size]; 278 case GL_UNSIGNED_INT: return uint_types_direct[size]; 279 case GL_UNSIGNED_SHORT: 280 if (is_ivybridge_or_older && size == 3) 281 return ushort_types_direct[4]; 282 else 283 return ushort_types_direct[size]; 284 case GL_UNSIGNED_BYTE: 285 if (is_ivybridge_or_older && size == 3) 286 return ubyte_types_direct[4]; 287 else 288 return ubyte_types_direct[size]; 289 default: unreachable("not reached"); 290 } 291 } else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { 292 return ISL_FORMAT_R11G11B10_FLOAT; 293 } else if (glarray->Normalized) { 294 switch (glarray->Type) { 295 case GL_DOUBLE: return double_types(brw, size, glarray->Doubles); 296 case GL_FLOAT: return float_types[size]; 297 case GL_HALF_FLOAT: 298 case GL_HALF_FLOAT_OES: 299 if (devinfo->gen < 6 && size == 3) 300 return half_float_types[4]; 301 else 302 return half_float_types[size]; 303 case GL_INT: return int_types_norm[size]; 304 case GL_SHORT: return short_types_norm[size]; 305 case GL_BYTE: return byte_types_norm[size]; 306 case GL_UNSIGNED_INT: return uint_types_norm[size]; 307 case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; 308 case GL_UNSIGNED_BYTE: 309 if (glarray->Format == GL_BGRA) { 310 /* See GL_EXT_vertex_array_bgra */ 311 assert(size == 4); 312 return ISL_FORMAT_B8G8R8A8_UNORM; 313 } 314 else { 315 return ubyte_types_norm[size]; 316 } 317 case GL_FIXED: 318 if (devinfo->gen >= 8 || devinfo->is_haswell) 319 return fixed_point_types[size]; 320 321 /* This produces GL_FIXED inputs as values between INT32_MIN and 322 * INT32_MAX, which will be scaled down by 1/65536 by the VS. 323 */ 324 return int_types_scale[size]; 325 /* See GL_ARB_vertex_type_2_10_10_10_rev. 326 * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd 327 * like to use here, so upload everything as UINT and fix 328 * it in the shader 329 */ 330 case GL_INT_2_10_10_10_REV: 331 assert(size == 4); 332 if (devinfo->gen >= 8 || devinfo->is_haswell) { 333 return glarray->Format == GL_BGRA 334 ? ISL_FORMAT_B10G10R10A2_SNORM 335 : ISL_FORMAT_R10G10B10A2_SNORM; 336 } 337 return ISL_FORMAT_R10G10B10A2_UINT; 338 case GL_UNSIGNED_INT_2_10_10_10_REV: 339 assert(size == 4); 340 if (devinfo->gen >= 8 || devinfo->is_haswell) { 341 return glarray->Format == GL_BGRA 342 ? ISL_FORMAT_B10G10R10A2_UNORM 343 : ISL_FORMAT_R10G10B10A2_UNORM; 344 } 345 return ISL_FORMAT_R10G10B10A2_UINT; 346 default: unreachable("not reached"); 347 } 348 } 349 else { 350 /* See GL_ARB_vertex_type_2_10_10_10_rev. 351 * W/A: the hardware doesn't really support the formats we'd 352 * like to use here, so upload everything as UINT and fix 353 * it in the shader 354 */ 355 if (glarray->Type == GL_INT_2_10_10_10_REV) { 356 assert(size == 4); 357 if (devinfo->gen >= 8 || devinfo->is_haswell) { 358 return glarray->Format == GL_BGRA 359 ? ISL_FORMAT_B10G10R10A2_SSCALED 360 : ISL_FORMAT_R10G10B10A2_SSCALED; 361 } 362 return ISL_FORMAT_R10G10B10A2_UINT; 363 } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { 364 assert(size == 4); 365 if (devinfo->gen >= 8 || devinfo->is_haswell) { 366 return glarray->Format == GL_BGRA 367 ? ISL_FORMAT_B10G10R10A2_USCALED 368 : ISL_FORMAT_R10G10B10A2_USCALED; 369 } 370 return ISL_FORMAT_R10G10B10A2_UINT; 371 } 372 assert(glarray->Format == GL_RGBA); /* sanity check */ 373 switch (glarray->Type) { 374 case GL_DOUBLE: return double_types(brw, size, glarray->Doubles); 375 case GL_FLOAT: return float_types[size]; 376 case GL_HALF_FLOAT: 377 case GL_HALF_FLOAT_OES: 378 if (devinfo->gen < 6 && size == 3) 379 return half_float_types[4]; 380 else 381 return half_float_types[size]; 382 case GL_INT: return int_types_scale[size]; 383 case GL_SHORT: return short_types_scale[size]; 384 case GL_BYTE: return byte_types_scale[size]; 385 case GL_UNSIGNED_INT: return uint_types_scale[size]; 386 case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; 387 case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; 388 case GL_FIXED: 389 if (devinfo->gen >= 8 || devinfo->is_haswell) 390 return fixed_point_types[size]; 391 392 /* This produces GL_FIXED inputs as values between INT32_MIN and 393 * INT32_MAX, which will be scaled down by 1/65536 by the VS. 394 */ 395 return int_types_scale[size]; 396 default: unreachable("not reached"); 397 } 398 } 399 } 400 401 static void 402 copy_array_to_vbo_array(struct brw_context *brw, 403 struct brw_vertex_element *element, 404 int min, int max, 405 struct brw_vertex_buffer *buffer, 406 GLuint dst_stride) 407 { 408 const int src_stride = element->glarray->StrideB; 409 410 /* If the source stride is zero, we just want to upload the current 411 * attribute once and set the buffer's stride to 0. There's no need 412 * to replicate it out. 413 */ 414 if (src_stride == 0) { 415 intel_upload_data(brw, element->glarray->Ptr, 416 element->glarray->_ElementSize, 417 element->glarray->_ElementSize, 418 &buffer->bo, &buffer->offset); 419 420 buffer->stride = 0; 421 buffer->size = element->glarray->_ElementSize; 422 return; 423 } 424 425 const unsigned char *src = element->glarray->Ptr + min * src_stride; 426 int count = max - min + 1; 427 GLuint size = count * dst_stride; 428 uint8_t *dst = intel_upload_space(brw, size, dst_stride, 429 &buffer->bo, &buffer->offset); 430 431 /* The GL 4.5 spec says: 432 * "If any enabled arrays buffer binding is zero when DrawArrays or 433 * one of the other drawing commands defined in section 10.4 is called, 434 * the result is undefined." 435 * 436 * In this case, let's the dst with undefined values 437 */ 438 if (src != NULL) { 439 if (dst_stride == src_stride) { 440 memcpy(dst, src, size); 441 } else { 442 while (count--) { 443 memcpy(dst, src, dst_stride); 444 src += src_stride; 445 dst += dst_stride; 446 } 447 } 448 } 449 buffer->stride = dst_stride; 450 buffer->size = size; 451 } 452 453 void 454 brw_prepare_vertices(struct brw_context *brw) 455 { 456 const struct gen_device_info *devinfo = &brw->screen->devinfo; 457 struct gl_context *ctx = &brw->ctx; 458 /* BRW_NEW_VS_PROG_DATA */ 459 const struct brw_vs_prog_data *vs_prog_data = 460 brw_vs_prog_data(brw->vs.base.prog_data); 461 GLbitfield64 vs_inputs = vs_prog_data->inputs_read; 462 const unsigned char *ptr = NULL; 463 GLuint interleaved = 0; 464 unsigned int min_index = brw->vb.min_index + brw->basevertex; 465 unsigned int max_index = brw->vb.max_index + brw->basevertex; 466 unsigned i; 467 int delta, j; 468 469 struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; 470 GLuint nr_uploads = 0; 471 472 /* _NEW_POLYGON 473 * 474 * On gen6+, edge flags don't end up in the VUE (either in or out of the 475 * VS). Instead, they're uploaded as the last vertex element, and the data 476 * is passed sideband through the fixed function units. So, we need to 477 * prepare the vertex buffer for it, but it's not present in inputs_read. 478 */ 479 if (devinfo->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || 480 ctx->Polygon.BackMode != GL_FILL)) { 481 vs_inputs |= VERT_BIT_EDGEFLAG; 482 } 483 484 if (0) 485 fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); 486 487 /* Accumulate the list of enabled arrays. */ 488 brw->vb.nr_enabled = 0; 489 while (vs_inputs) { 490 GLuint first = ffsll(vs_inputs) - 1; 491 assert (first < 64); 492 GLuint index = 493 first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read & 494 BITFIELD64_MASK(first)), 2); 495 struct brw_vertex_element *input = &brw->vb.inputs[index]; 496 input->is_dual_slot = (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0; 497 vs_inputs &= ~BITFIELD64_BIT(first); 498 if (input->is_dual_slot) 499 vs_inputs &= ~BITFIELD64_BIT(first + 1); 500 brw->vb.enabled[brw->vb.nr_enabled++] = input; 501 } 502 503 if (brw->vb.nr_enabled == 0) 504 return; 505 506 if (brw->vb.nr_buffers) 507 return; 508 509 /* The range of data in a given buffer represented as [min, max) */ 510 struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; 511 uint32_t buffer_range_start[VERT_ATTRIB_MAX]; 512 uint32_t buffer_range_end[VERT_ATTRIB_MAX]; 513 514 for (i = j = 0; i < brw->vb.nr_enabled; i++) { 515 struct brw_vertex_element *input = brw->vb.enabled[i]; 516 const struct gl_vertex_array *glarray = input->glarray; 517 518 if (_mesa_is_bufferobj(glarray->BufferObj)) { 519 struct intel_buffer_object *intel_buffer = 520 intel_buffer_object(glarray->BufferObj); 521 522 const uint32_t offset = (uintptr_t)glarray->Ptr; 523 524 /* Start with the worst case */ 525 uint32_t start = 0; 526 uint32_t range = intel_buffer->Base.Size; 527 if (glarray->InstanceDivisor) { 528 if (brw->num_instances) { 529 start = offset + glarray->StrideB * brw->baseinstance; 530 range = (glarray->StrideB * ((brw->num_instances - 1) / 531 glarray->InstanceDivisor) + 532 glarray->_ElementSize); 533 } 534 } else { 535 if (brw->vb.index_bounds_valid) { 536 start = offset + min_index * glarray->StrideB; 537 range = (glarray->StrideB * (max_index - min_index) + 538 glarray->_ElementSize); 539 } 540 } 541 542 /* If we have a VB set to be uploaded for this buffer object 543 * already, reuse that VB state so that we emit fewer 544 * relocations. 545 */ 546 unsigned k; 547 for (k = 0; k < i; k++) { 548 const struct gl_vertex_array *other = brw->vb.enabled[k]->glarray; 549 if (glarray->BufferObj == other->BufferObj && 550 glarray->StrideB == other->StrideB && 551 glarray->InstanceDivisor == other->InstanceDivisor && 552 (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) 553 { 554 input->buffer = brw->vb.enabled[k]->buffer; 555 input->offset = glarray->Ptr - other->Ptr; 556 557 buffer_range_start[input->buffer] = 558 MIN2(buffer_range_start[input->buffer], start); 559 buffer_range_end[input->buffer] = 560 MAX2(buffer_range_end[input->buffer], start + range); 561 break; 562 } 563 } 564 if (k == i) { 565 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 566 567 /* Named buffer object: Just reference its contents directly. */ 568 buffer->offset = offset; 569 buffer->stride = glarray->StrideB; 570 buffer->step_rate = glarray->InstanceDivisor; 571 buffer->size = glarray->BufferObj->Size - offset; 572 573 enabled_buffer[j] = intel_buffer; 574 buffer_range_start[j] = start; 575 buffer_range_end[j] = start + range; 576 577 input->buffer = j++; 578 input->offset = 0; 579 } 580 } else { 581 /* Queue the buffer object up to be uploaded in the next pass, 582 * when we've decided if we're doing interleaved or not. 583 */ 584 if (nr_uploads == 0) { 585 interleaved = glarray->StrideB; 586 ptr = glarray->Ptr; 587 } 588 else if (interleaved != glarray->StrideB || 589 glarray->InstanceDivisor != 0 || 590 glarray->Ptr < ptr || 591 (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) 592 { 593 /* If our stride is different from the first attribute's stride, 594 * or if we are using an instance divisor or if the first 595 * attribute's stride didn't cover our element, disable the 596 * interleaved upload optimization. The second case can most 597 * commonly occur in cases where there is a single vertex and, for 598 * example, the data is stored on the application's stack. 599 * 600 * NOTE: This will also disable the optimization in cases where 601 * the data is in a different order than the array indices. 602 * Something like: 603 * 604 * float data[...]; 605 * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); 606 * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); 607 */ 608 interleaved = 0; 609 } 610 611 upload[nr_uploads++] = input; 612 } 613 } 614 615 /* Now that we've set up all of the buffers, we walk through and reference 616 * each of them. We do this late so that we get the right size in each 617 * buffer and don't reference too little data. 618 */ 619 for (i = 0; i < j; i++) { 620 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; 621 if (buffer->bo) 622 continue; 623 624 const uint32_t start = buffer_range_start[i]; 625 const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; 626 627 buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, 628 range, false); 629 brw_bo_reference(buffer->bo); 630 } 631 632 /* If we need to upload all the arrays, then we can trim those arrays to 633 * only the used elements [min_index, max_index] so long as we adjust all 634 * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. 635 */ 636 brw->vb.start_vertex_bias = 0; 637 delta = min_index; 638 if (nr_uploads == brw->vb.nr_enabled) { 639 brw->vb.start_vertex_bias = -delta; 640 delta = 0; 641 } 642 643 /* Handle any arrays to be uploaded. */ 644 if (nr_uploads > 1) { 645 if (interleaved) { 646 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 647 /* All uploads are interleaved, so upload the arrays together as 648 * interleaved. First, upload the contents and set up upload[0]. 649 */ 650 copy_array_to_vbo_array(brw, upload[0], min_index, max_index, 651 buffer, interleaved); 652 buffer->offset -= delta * interleaved; 653 buffer->size += delta * interleaved; 654 buffer->step_rate = 0; 655 656 for (i = 0; i < nr_uploads; i++) { 657 /* Then, just point upload[i] at upload[0]'s buffer. */ 658 upload[i]->offset = 659 ((const unsigned char *)upload[i]->glarray->Ptr - ptr); 660 upload[i]->buffer = j; 661 } 662 j++; 663 664 nr_uploads = 0; 665 } 666 } 667 /* Upload non-interleaved arrays */ 668 for (i = 0; i < nr_uploads; i++) { 669 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 670 if (upload[i]->glarray->InstanceDivisor == 0) { 671 copy_array_to_vbo_array(brw, upload[i], min_index, max_index, 672 buffer, upload[i]->glarray->_ElementSize); 673 } else { 674 /* This is an instanced attribute, since its InstanceDivisor 675 * is not zero. Therefore, its data will be stepped after the 676 * instanced draw has been run InstanceDivisor times. 677 */ 678 uint32_t instanced_attr_max_index = 679 (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; 680 copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, 681 buffer, upload[i]->glarray->_ElementSize); 682 } 683 buffer->offset -= delta * buffer->stride; 684 buffer->size += delta * buffer->stride; 685 buffer->step_rate = upload[i]->glarray->InstanceDivisor; 686 upload[i]->buffer = j++; 687 upload[i]->offset = 0; 688 } 689 690 brw->vb.nr_buffers = j; 691 } 692 693 void 694 brw_prepare_shader_draw_parameters(struct brw_context *brw) 695 { 696 const struct brw_vs_prog_data *vs_prog_data = 697 brw_vs_prog_data(brw->vs.base.prog_data); 698 699 /* For non-indirect draws, upload gl_BaseVertex. */ 700 if ((vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) && 701 brw->draw.draw_params_bo == NULL) { 702 intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4, 703 &brw->draw.draw_params_bo, 704 &brw->draw.draw_params_offset); 705 } 706 707 if (vs_prog_data->uses_drawid) { 708 intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4, 709 &brw->draw.draw_id_bo, 710 &brw->draw.draw_id_offset); 711 } 712 } 713 714 static void 715 brw_upload_indices(struct brw_context *brw) 716 { 717 const struct _mesa_index_buffer *index_buffer = brw->ib.ib; 718 GLuint ib_size; 719 struct brw_bo *old_bo = brw->ib.bo; 720 struct gl_buffer_object *bufferobj; 721 GLuint offset; 722 GLuint ib_type_size; 723 724 if (index_buffer == NULL) 725 return; 726 727 ib_type_size = index_buffer->index_size; 728 ib_size = index_buffer->count ? ib_type_size * index_buffer->count : 729 index_buffer->obj->Size; 730 bufferobj = index_buffer->obj; 731 732 /* Turn into a proper VBO: 733 */ 734 if (!_mesa_is_bufferobj(bufferobj)) { 735 /* Get new bufferobj, offset: 736 */ 737 intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size, 738 &brw->ib.bo, &offset); 739 brw->ib.size = brw->ib.bo->size; 740 } else { 741 offset = (GLuint) (unsigned long) index_buffer->ptr; 742 743 struct brw_bo *bo = 744 intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj), 745 offset, ib_size, false); 746 if (bo != brw->ib.bo) { 747 brw_bo_unreference(brw->ib.bo); 748 brw->ib.bo = bo; 749 brw->ib.size = bufferobj->Size; 750 brw_bo_reference(bo); 751 } 752 } 753 754 /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading 755 * the index buffer state when we're just moving the start index 756 * of our drawing. 757 */ 758 brw->ib.start_vertex_offset = offset / ib_type_size; 759 760 if (brw->ib.bo != old_bo) 761 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; 762 763 if (index_buffer->index_size != brw->ib.index_size) { 764 brw->ib.index_size = index_buffer->index_size; 765 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; 766 } 767 } 768 769 const struct brw_tracked_state brw_indices = { 770 .dirty = { 771 .mesa = 0, 772 .brw = BRW_NEW_BLORP | 773 BRW_NEW_INDICES, 774 }, 775 .emit = brw_upload_indices, 776 }; 777