1 /* 2 * Copyright 2003 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 #include "main/bufferobj.h" 27 #include "main/context.h" 28 #include "main/enums.h" 29 #include "main/macros.h" 30 #include "main/glformats.h" 31 32 #include "brw_draw.h" 33 #include "brw_defines.h" 34 #include "brw_context.h" 35 #include "brw_state.h" 36 37 #include "intel_batchbuffer.h" 38 #include "intel_buffer_objects.h" 39 40 static const GLuint double_types_float[5] = { 41 0, 42 BRW_SURFACEFORMAT_R64_FLOAT, 43 BRW_SURFACEFORMAT_R64G64_FLOAT, 44 BRW_SURFACEFORMAT_R64G64B64_FLOAT, 45 BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 46 }; 47 48 static const GLuint double_types_passthru[5] = { 49 0, 50 BRW_SURFACEFORMAT_R64_PASSTHRU, 51 BRW_SURFACEFORMAT_R64G64_PASSTHRU, 52 BRW_SURFACEFORMAT_R64G64B64_PASSTHRU, 53 BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU 54 }; 55 56 static const GLuint float_types[5] = { 57 0, 58 BRW_SURFACEFORMAT_R32_FLOAT, 59 BRW_SURFACEFORMAT_R32G32_FLOAT, 60 BRW_SURFACEFORMAT_R32G32B32_FLOAT, 61 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 62 }; 63 64 static const GLuint half_float_types[5] = { 65 0, 66 BRW_SURFACEFORMAT_R16_FLOAT, 67 BRW_SURFACEFORMAT_R16G16_FLOAT, 68 BRW_SURFACEFORMAT_R16G16B16_FLOAT, 69 BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 70 }; 71 72 static const GLuint fixed_point_types[5] = { 73 0, 74 BRW_SURFACEFORMAT_R32_SFIXED, 75 BRW_SURFACEFORMAT_R32G32_SFIXED, 76 BRW_SURFACEFORMAT_R32G32B32_SFIXED, 77 BRW_SURFACEFORMAT_R32G32B32A32_SFIXED, 78 }; 79 80 static const GLuint uint_types_direct[5] = { 81 0, 82 BRW_SURFACEFORMAT_R32_UINT, 83 BRW_SURFACEFORMAT_R32G32_UINT, 84 BRW_SURFACEFORMAT_R32G32B32_UINT, 85 BRW_SURFACEFORMAT_R32G32B32A32_UINT 86 }; 87 88 static const GLuint uint_types_norm[5] = { 89 0, 90 BRW_SURFACEFORMAT_R32_UNORM, 91 BRW_SURFACEFORMAT_R32G32_UNORM, 92 BRW_SURFACEFORMAT_R32G32B32_UNORM, 93 BRW_SURFACEFORMAT_R32G32B32A32_UNORM 94 }; 95 96 static const GLuint uint_types_scale[5] = { 97 0, 98 BRW_SURFACEFORMAT_R32_USCALED, 99 BRW_SURFACEFORMAT_R32G32_USCALED, 100 BRW_SURFACEFORMAT_R32G32B32_USCALED, 101 BRW_SURFACEFORMAT_R32G32B32A32_USCALED 102 }; 103 104 static const GLuint int_types_direct[5] = { 105 0, 106 BRW_SURFACEFORMAT_R32_SINT, 107 BRW_SURFACEFORMAT_R32G32_SINT, 108 BRW_SURFACEFORMAT_R32G32B32_SINT, 109 BRW_SURFACEFORMAT_R32G32B32A32_SINT 110 }; 111 112 static const GLuint int_types_norm[5] = { 113 0, 114 BRW_SURFACEFORMAT_R32_SNORM, 115 BRW_SURFACEFORMAT_R32G32_SNORM, 116 BRW_SURFACEFORMAT_R32G32B32_SNORM, 117 BRW_SURFACEFORMAT_R32G32B32A32_SNORM 118 }; 119 120 static const GLuint int_types_scale[5] = { 121 0, 122 BRW_SURFACEFORMAT_R32_SSCALED, 123 BRW_SURFACEFORMAT_R32G32_SSCALED, 124 BRW_SURFACEFORMAT_R32G32B32_SSCALED, 125 BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 126 }; 127 128 static const GLuint ushort_types_direct[5] = { 129 0, 130 BRW_SURFACEFORMAT_R16_UINT, 131 BRW_SURFACEFORMAT_R16G16_UINT, 132 BRW_SURFACEFORMAT_R16G16B16_UINT, 133 BRW_SURFACEFORMAT_R16G16B16A16_UINT 134 }; 135 136 static const GLuint ushort_types_norm[5] = { 137 0, 138 BRW_SURFACEFORMAT_R16_UNORM, 139 BRW_SURFACEFORMAT_R16G16_UNORM, 140 BRW_SURFACEFORMAT_R16G16B16_UNORM, 141 BRW_SURFACEFORMAT_R16G16B16A16_UNORM 142 }; 143 144 static const GLuint ushort_types_scale[5] = { 145 0, 146 BRW_SURFACEFORMAT_R16_USCALED, 147 BRW_SURFACEFORMAT_R16G16_USCALED, 148 BRW_SURFACEFORMAT_R16G16B16_USCALED, 149 BRW_SURFACEFORMAT_R16G16B16A16_USCALED 150 }; 151 152 static const GLuint short_types_direct[5] = { 153 0, 154 BRW_SURFACEFORMAT_R16_SINT, 155 BRW_SURFACEFORMAT_R16G16_SINT, 156 BRW_SURFACEFORMAT_R16G16B16_SINT, 157 BRW_SURFACEFORMAT_R16G16B16A16_SINT 158 }; 159 160 static const GLuint short_types_norm[5] = { 161 0, 162 BRW_SURFACEFORMAT_R16_SNORM, 163 BRW_SURFACEFORMAT_R16G16_SNORM, 164 BRW_SURFACEFORMAT_R16G16B16_SNORM, 165 BRW_SURFACEFORMAT_R16G16B16A16_SNORM 166 }; 167 168 static const GLuint short_types_scale[5] = { 169 0, 170 BRW_SURFACEFORMAT_R16_SSCALED, 171 BRW_SURFACEFORMAT_R16G16_SSCALED, 172 BRW_SURFACEFORMAT_R16G16B16_SSCALED, 173 BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 174 }; 175 176 static const GLuint ubyte_types_direct[5] = { 177 0, 178 BRW_SURFACEFORMAT_R8_UINT, 179 BRW_SURFACEFORMAT_R8G8_UINT, 180 BRW_SURFACEFORMAT_R8G8B8_UINT, 181 BRW_SURFACEFORMAT_R8G8B8A8_UINT 182 }; 183 184 static const GLuint ubyte_types_norm[5] = { 185 0, 186 BRW_SURFACEFORMAT_R8_UNORM, 187 BRW_SURFACEFORMAT_R8G8_UNORM, 188 BRW_SURFACEFORMAT_R8G8B8_UNORM, 189 BRW_SURFACEFORMAT_R8G8B8A8_UNORM 190 }; 191 192 static const GLuint ubyte_types_scale[5] = { 193 0, 194 BRW_SURFACEFORMAT_R8_USCALED, 195 BRW_SURFACEFORMAT_R8G8_USCALED, 196 BRW_SURFACEFORMAT_R8G8B8_USCALED, 197 BRW_SURFACEFORMAT_R8G8B8A8_USCALED 198 }; 199 200 static const GLuint byte_types_direct[5] = { 201 0, 202 BRW_SURFACEFORMAT_R8_SINT, 203 BRW_SURFACEFORMAT_R8G8_SINT, 204 BRW_SURFACEFORMAT_R8G8B8_SINT, 205 BRW_SURFACEFORMAT_R8G8B8A8_SINT 206 }; 207 208 static const GLuint byte_types_norm[5] = { 209 0, 210 BRW_SURFACEFORMAT_R8_SNORM, 211 BRW_SURFACEFORMAT_R8G8_SNORM, 212 BRW_SURFACEFORMAT_R8G8B8_SNORM, 213 BRW_SURFACEFORMAT_R8G8B8A8_SNORM 214 }; 215 216 static const GLuint byte_types_scale[5] = { 217 0, 218 BRW_SURFACEFORMAT_R8_SSCALED, 219 BRW_SURFACEFORMAT_R8G8_SSCALED, 220 BRW_SURFACEFORMAT_R8G8B8_SSCALED, 221 BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 222 }; 223 224 static GLuint 225 double_types(struct brw_context *brw, 226 int size, 227 GLboolean doubles) 228 { 229 /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): 230 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, 231 * 64-bit components are stored in the URB without any conversion." 232 * Also included on BDW PRM, Volume 7, page 470, table "Source Element 233 * Formats Supported in VF Unit" 234 * 235 * Previous PRMs don't include those references, so for gen7 we can't use 236 * PASSTHRU formats directly. But in any case, we prefer to return passthru 237 * even in that case, because that reflects what we want to achieve, even 238 * if we would need to workaround on gen < 8. 239 */ 240 return (doubles 241 ? double_types_passthru[size] 242 : double_types_float[size]); 243 } 244 245 static bool 246 is_passthru_format(uint32_t format) 247 { 248 switch (format) { 249 case BRW_SURFACEFORMAT_R64_PASSTHRU: 250 case BRW_SURFACEFORMAT_R64G64_PASSTHRU: 251 case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU: 252 case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU: 253 return true; 254 default: 255 return false; 256 } 257 } 258 259 static int 260 uploads_needed(uint32_t format) 261 { 262 if (!is_passthru_format(format)) 263 return 1; 264 265 switch (format) { 266 case BRW_SURFACEFORMAT_R64_PASSTHRU: 267 case BRW_SURFACEFORMAT_R64G64_PASSTHRU: 268 return 1; 269 case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU: 270 case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU: 271 return 2; 272 default: 273 unreachable("not reached"); 274 } 275 } 276 277 /* 278 * Returns the number of componentes associated with a format that is used on 279 * a 64 to 32 format split. See downsize_format() 280 */ 281 static int 282 upload_format_size(uint32_t upload_format) 283 { 284 switch (upload_format) { 285 case BRW_SURFACEFORMAT_R32G32_FLOAT: 286 return 2; 287 case BRW_SURFACEFORMAT_R32G32B32A32_FLOAT: 288 return 4; 289 default: 290 unreachable("not reached"); 291 } 292 } 293 294 /* 295 * Returns the format that we are finally going to use when upload a vertex 296 * element. It will only change if we are using *64*PASSTHRU formats, as for 297 * gen < 8 they need to be splitted on two *32*FLOAT formats. 298 * 299 * @upload points in which upload we are. Valid values are [0,1] 300 */ 301 static uint32_t 302 downsize_format_if_needed(uint32_t format, 303 int upload) 304 { 305 assert(upload == 0 || upload == 1); 306 307 if (!is_passthru_format(format)) 308 return format; 309 310 switch (format) { 311 case BRW_SURFACEFORMAT_R64_PASSTHRU: 312 return BRW_SURFACEFORMAT_R32G32_FLOAT; 313 case BRW_SURFACEFORMAT_R64G64_PASSTHRU: 314 return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; 315 case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU: 316 return !upload ? BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 317 : BRW_SURFACEFORMAT_R32G32_FLOAT; 318 case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU: 319 return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; 320 default: 321 unreachable("not reached"); 322 } 323 } 324 325 /** 326 * Given vertex array type/size/format/normalized info, return 327 * the appopriate hardware surface type. 328 * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. 329 */ 330 unsigned 331 brw_get_vertex_surface_type(struct brw_context *brw, 332 const struct gl_vertex_array *glarray) 333 { 334 int size = glarray->Size; 335 const bool is_ivybridge_or_older = 336 brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell; 337 338 if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) 339 fprintf(stderr, "type %s size %d normalized %d\n", 340 _mesa_enum_to_string(glarray->Type), 341 glarray->Size, glarray->Normalized); 342 343 if (glarray->Integer) { 344 assert(glarray->Format == GL_RGBA); /* sanity check */ 345 switch (glarray->Type) { 346 case GL_INT: return int_types_direct[size]; 347 case GL_SHORT: 348 if (is_ivybridge_or_older && size == 3) 349 return short_types_direct[4]; 350 else 351 return short_types_direct[size]; 352 case GL_BYTE: 353 if (is_ivybridge_or_older && size == 3) 354 return byte_types_direct[4]; 355 else 356 return byte_types_direct[size]; 357 case GL_UNSIGNED_INT: return uint_types_direct[size]; 358 case GL_UNSIGNED_SHORT: 359 if (is_ivybridge_or_older && size == 3) 360 return ushort_types_direct[4]; 361 else 362 return ushort_types_direct[size]; 363 case GL_UNSIGNED_BYTE: 364 if (is_ivybridge_or_older && size == 3) 365 return ubyte_types_direct[4]; 366 else 367 return ubyte_types_direct[size]; 368 default: unreachable("not reached"); 369 } 370 } else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { 371 return BRW_SURFACEFORMAT_R11G11B10_FLOAT; 372 } else if (glarray->Normalized) { 373 switch (glarray->Type) { 374 case GL_DOUBLE: return double_types(brw, size, glarray->Doubles); 375 case GL_FLOAT: return float_types[size]; 376 case GL_HALF_FLOAT: 377 case GL_HALF_FLOAT_OES: 378 if (brw->gen < 6 && size == 3) 379 return half_float_types[4]; 380 else 381 return half_float_types[size]; 382 case GL_INT: return int_types_norm[size]; 383 case GL_SHORT: return short_types_norm[size]; 384 case GL_BYTE: return byte_types_norm[size]; 385 case GL_UNSIGNED_INT: return uint_types_norm[size]; 386 case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; 387 case GL_UNSIGNED_BYTE: 388 if (glarray->Format == GL_BGRA) { 389 /* See GL_EXT_vertex_array_bgra */ 390 assert(size == 4); 391 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 392 } 393 else { 394 return ubyte_types_norm[size]; 395 } 396 case GL_FIXED: 397 if (brw->gen >= 8 || brw->is_haswell) 398 return fixed_point_types[size]; 399 400 /* This produces GL_FIXED inputs as values between INT32_MIN and 401 * INT32_MAX, which will be scaled down by 1/65536 by the VS. 402 */ 403 return int_types_scale[size]; 404 /* See GL_ARB_vertex_type_2_10_10_10_rev. 405 * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd 406 * like to use here, so upload everything as UINT and fix 407 * it in the shader 408 */ 409 case GL_INT_2_10_10_10_REV: 410 assert(size == 4); 411 if (brw->gen >= 8 || brw->is_haswell) { 412 return glarray->Format == GL_BGRA 413 ? BRW_SURFACEFORMAT_B10G10R10A2_SNORM 414 : BRW_SURFACEFORMAT_R10G10B10A2_SNORM; 415 } 416 return BRW_SURFACEFORMAT_R10G10B10A2_UINT; 417 case GL_UNSIGNED_INT_2_10_10_10_REV: 418 assert(size == 4); 419 if (brw->gen >= 8 || brw->is_haswell) { 420 return glarray->Format == GL_BGRA 421 ? BRW_SURFACEFORMAT_B10G10R10A2_UNORM 422 : BRW_SURFACEFORMAT_R10G10B10A2_UNORM; 423 } 424 return BRW_SURFACEFORMAT_R10G10B10A2_UINT; 425 default: unreachable("not reached"); 426 } 427 } 428 else { 429 /* See GL_ARB_vertex_type_2_10_10_10_rev. 430 * W/A: the hardware doesn't really support the formats we'd 431 * like to use here, so upload everything as UINT and fix 432 * it in the shader 433 */ 434 if (glarray->Type == GL_INT_2_10_10_10_REV) { 435 assert(size == 4); 436 if (brw->gen >= 8 || brw->is_haswell) { 437 return glarray->Format == GL_BGRA 438 ? BRW_SURFACEFORMAT_B10G10R10A2_SSCALED 439 : BRW_SURFACEFORMAT_R10G10B10A2_SSCALED; 440 } 441 return BRW_SURFACEFORMAT_R10G10B10A2_UINT; 442 } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { 443 assert(size == 4); 444 if (brw->gen >= 8 || brw->is_haswell) { 445 return glarray->Format == GL_BGRA 446 ? BRW_SURFACEFORMAT_B10G10R10A2_USCALED 447 : BRW_SURFACEFORMAT_R10G10B10A2_USCALED; 448 } 449 return BRW_SURFACEFORMAT_R10G10B10A2_UINT; 450 } 451 assert(glarray->Format == GL_RGBA); /* sanity check */ 452 switch (glarray->Type) { 453 case GL_DOUBLE: return double_types(brw, size, glarray->Doubles); 454 case GL_FLOAT: return float_types[size]; 455 case GL_HALF_FLOAT: 456 case GL_HALF_FLOAT_OES: 457 if (brw->gen < 6 && size == 3) 458 return half_float_types[4]; 459 else 460 return half_float_types[size]; 461 case GL_INT: return int_types_scale[size]; 462 case GL_SHORT: return short_types_scale[size]; 463 case GL_BYTE: return byte_types_scale[size]; 464 case GL_UNSIGNED_INT: return uint_types_scale[size]; 465 case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; 466 case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; 467 case GL_FIXED: 468 if (brw->gen >= 8 || brw->is_haswell) 469 return fixed_point_types[size]; 470 471 /* This produces GL_FIXED inputs as values between INT32_MIN and 472 * INT32_MAX, which will be scaled down by 1/65536 by the VS. 473 */ 474 return int_types_scale[size]; 475 default: unreachable("not reached"); 476 } 477 } 478 } 479 480 static void 481 copy_array_to_vbo_array(struct brw_context *brw, 482 struct brw_vertex_element *element, 483 int min, int max, 484 struct brw_vertex_buffer *buffer, 485 GLuint dst_stride) 486 { 487 const int src_stride = element->glarray->StrideB; 488 489 /* If the source stride is zero, we just want to upload the current 490 * attribute once and set the buffer's stride to 0. There's no need 491 * to replicate it out. 492 */ 493 if (src_stride == 0) { 494 intel_upload_data(brw, element->glarray->Ptr, 495 element->glarray->_ElementSize, 496 element->glarray->_ElementSize, 497 &buffer->bo, &buffer->offset); 498 499 buffer->stride = 0; 500 buffer->size = element->glarray->_ElementSize; 501 return; 502 } 503 504 const unsigned char *src = element->glarray->Ptr + min * src_stride; 505 int count = max - min + 1; 506 GLuint size = count * dst_stride; 507 uint8_t *dst = intel_upload_space(brw, size, dst_stride, 508 &buffer->bo, &buffer->offset); 509 510 /* The GL 4.5 spec says: 511 * "If any enabled arrays buffer binding is zero when DrawArrays or 512 * one of the other drawing commands defined in section 10.4 is called, 513 * the result is undefined." 514 * 515 * In this case, let's the dst with undefined values 516 */ 517 if (src != NULL) { 518 if (dst_stride == src_stride) { 519 memcpy(dst, src, size); 520 } else { 521 while (count--) { 522 memcpy(dst, src, dst_stride); 523 src += src_stride; 524 dst += dst_stride; 525 } 526 } 527 } 528 buffer->stride = dst_stride; 529 buffer->size = size; 530 } 531 532 void 533 brw_prepare_vertices(struct brw_context *brw) 534 { 535 struct gl_context *ctx = &brw->ctx; 536 /* BRW_NEW_VS_PROG_DATA */ 537 const struct brw_vs_prog_data *vs_prog_data = 538 brw_vs_prog_data(brw->vs.base.prog_data); 539 GLbitfield64 vs_inputs = vs_prog_data->inputs_read; 540 const unsigned char *ptr = NULL; 541 GLuint interleaved = 0; 542 unsigned int min_index = brw->vb.min_index + brw->basevertex; 543 unsigned int max_index = brw->vb.max_index + brw->basevertex; 544 unsigned i; 545 int delta, j; 546 547 struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; 548 GLuint nr_uploads = 0; 549 550 /* _NEW_POLYGON 551 * 552 * On gen6+, edge flags don't end up in the VUE (either in or out of the 553 * VS). Instead, they're uploaded as the last vertex element, and the data 554 * is passed sideband through the fixed function units. So, we need to 555 * prepare the vertex buffer for it, but it's not present in inputs_read. 556 */ 557 if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || 558 ctx->Polygon.BackMode != GL_FILL)) { 559 vs_inputs |= VERT_BIT_EDGEFLAG; 560 } 561 562 if (0) 563 fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); 564 565 /* Accumulate the list of enabled arrays. */ 566 brw->vb.nr_enabled = 0; 567 while (vs_inputs) { 568 GLuint first = ffsll(vs_inputs) - 1; 569 assert (first < 64); 570 GLuint index = 571 first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read & 572 BITFIELD64_MASK(first)), 2); 573 struct brw_vertex_element *input = &brw->vb.inputs[index]; 574 input->is_dual_slot = (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0; 575 vs_inputs &= ~BITFIELD64_BIT(first); 576 if (input->is_dual_slot) 577 vs_inputs &= ~BITFIELD64_BIT(first + 1); 578 brw->vb.enabled[brw->vb.nr_enabled++] = input; 579 } 580 581 if (brw->vb.nr_enabled == 0) 582 return; 583 584 if (brw->vb.nr_buffers) 585 return; 586 587 /* The range of data in a given buffer represented as [min, max) */ 588 struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; 589 uint32_t buffer_range_start[VERT_ATTRIB_MAX]; 590 uint32_t buffer_range_end[VERT_ATTRIB_MAX]; 591 592 for (i = j = 0; i < brw->vb.nr_enabled; i++) { 593 struct brw_vertex_element *input = brw->vb.enabled[i]; 594 const struct gl_vertex_array *glarray = input->glarray; 595 596 if (_mesa_is_bufferobj(glarray->BufferObj)) { 597 struct intel_buffer_object *intel_buffer = 598 intel_buffer_object(glarray->BufferObj); 599 600 const uint32_t offset = (uintptr_t)glarray->Ptr; 601 602 /* Start with the worst case */ 603 uint32_t start = 0; 604 uint32_t range = intel_buffer->Base.Size; 605 if (glarray->InstanceDivisor) { 606 if (brw->num_instances) { 607 start = offset + glarray->StrideB * brw->baseinstance; 608 range = (glarray->StrideB * ((brw->num_instances - 1) / 609 glarray->InstanceDivisor) + 610 glarray->_ElementSize); 611 } 612 } else { 613 if (brw->vb.index_bounds_valid) { 614 start = offset + min_index * glarray->StrideB; 615 range = (glarray->StrideB * (max_index - min_index) + 616 glarray->_ElementSize); 617 } 618 } 619 620 /* If we have a VB set to be uploaded for this buffer object 621 * already, reuse that VB state so that we emit fewer 622 * relocations. 623 */ 624 unsigned k; 625 for (k = 0; k < i; k++) { 626 const struct gl_vertex_array *other = brw->vb.enabled[k]->glarray; 627 if (glarray->BufferObj == other->BufferObj && 628 glarray->StrideB == other->StrideB && 629 glarray->InstanceDivisor == other->InstanceDivisor && 630 (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) 631 { 632 input->buffer = brw->vb.enabled[k]->buffer; 633 input->offset = glarray->Ptr - other->Ptr; 634 635 buffer_range_start[input->buffer] = 636 MIN2(buffer_range_start[input->buffer], start); 637 buffer_range_end[input->buffer] = 638 MAX2(buffer_range_end[input->buffer], start + range); 639 break; 640 } 641 } 642 if (k == i) { 643 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 644 645 /* Named buffer object: Just reference its contents directly. */ 646 buffer->offset = offset; 647 buffer->stride = glarray->StrideB; 648 buffer->step_rate = glarray->InstanceDivisor; 649 buffer->size = glarray->BufferObj->Size - offset; 650 651 enabled_buffer[j] = intel_buffer; 652 buffer_range_start[j] = start; 653 buffer_range_end[j] = start + range; 654 655 input->buffer = j++; 656 input->offset = 0; 657 } 658 } else { 659 /* Queue the buffer object up to be uploaded in the next pass, 660 * when we've decided if we're doing interleaved or not. 661 */ 662 if (nr_uploads == 0) { 663 interleaved = glarray->StrideB; 664 ptr = glarray->Ptr; 665 } 666 else if (interleaved != glarray->StrideB || 667 glarray->Ptr < ptr || 668 (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) 669 { 670 /* If our stride is different from the first attribute's stride, 671 * or if the first attribute's stride didn't cover our element, 672 * disable the interleaved upload optimization. The second case 673 * can most commonly occur in cases where there is a single vertex 674 * and, for example, the data is stored on the application's 675 * stack. 676 * 677 * NOTE: This will also disable the optimization in cases where 678 * the data is in a different order than the array indices. 679 * Something like: 680 * 681 * float data[...]; 682 * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); 683 * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); 684 */ 685 interleaved = 0; 686 } 687 688 upload[nr_uploads++] = input; 689 } 690 } 691 692 /* Now that we've set up all of the buffers, we walk through and reference 693 * each of them. We do this late so that we get the right size in each 694 * buffer and don't reference too little data. 695 */ 696 for (i = 0; i < j; i++) { 697 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; 698 if (buffer->bo) 699 continue; 700 701 const uint32_t start = buffer_range_start[i]; 702 const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; 703 704 buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range); 705 drm_intel_bo_reference(buffer->bo); 706 } 707 708 /* If we need to upload all the arrays, then we can trim those arrays to 709 * only the used elements [min_index, max_index] so long as we adjust all 710 * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. 711 */ 712 brw->vb.start_vertex_bias = 0; 713 delta = min_index; 714 if (nr_uploads == brw->vb.nr_enabled) { 715 brw->vb.start_vertex_bias = -delta; 716 delta = 0; 717 } 718 719 /* Handle any arrays to be uploaded. */ 720 if (nr_uploads > 1) { 721 if (interleaved) { 722 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 723 /* All uploads are interleaved, so upload the arrays together as 724 * interleaved. First, upload the contents and set up upload[0]. 725 */ 726 copy_array_to_vbo_array(brw, upload[0], min_index, max_index, 727 buffer, interleaved); 728 buffer->offset -= delta * interleaved; 729 buffer->size += delta * interleaved; 730 731 for (i = 0; i < nr_uploads; i++) { 732 /* Then, just point upload[i] at upload[0]'s buffer. */ 733 upload[i]->offset = 734 ((const unsigned char *)upload[i]->glarray->Ptr - ptr); 735 upload[i]->buffer = j; 736 } 737 j++; 738 739 nr_uploads = 0; 740 } 741 } 742 /* Upload non-interleaved arrays */ 743 for (i = 0; i < nr_uploads; i++) { 744 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; 745 if (upload[i]->glarray->InstanceDivisor == 0) { 746 copy_array_to_vbo_array(brw, upload[i], min_index, max_index, 747 buffer, upload[i]->glarray->_ElementSize); 748 } else { 749 /* This is an instanced attribute, since its InstanceDivisor 750 * is not zero. Therefore, its data will be stepped after the 751 * instanced draw has been run InstanceDivisor times. 752 */ 753 uint32_t instanced_attr_max_index = 754 (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; 755 copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, 756 buffer, upload[i]->glarray->_ElementSize); 757 } 758 buffer->offset -= delta * buffer->stride; 759 buffer->size += delta * buffer->stride; 760 buffer->step_rate = upload[i]->glarray->InstanceDivisor; 761 upload[i]->buffer = j++; 762 upload[i]->offset = 0; 763 } 764 765 brw->vb.nr_buffers = j; 766 } 767 768 void 769 brw_prepare_shader_draw_parameters(struct brw_context *brw) 770 { 771 const struct brw_vs_prog_data *vs_prog_data = 772 brw_vs_prog_data(brw->vs.base.prog_data); 773 774 /* For non-indirect draws, upload gl_BaseVertex. */ 775 if ((vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) && 776 brw->draw.draw_params_bo == NULL) { 777 intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4, 778 &brw->draw.draw_params_bo, 779 &brw->draw.draw_params_offset); 780 } 781 782 if (vs_prog_data->uses_drawid) { 783 intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4, 784 &brw->draw.draw_id_bo, 785 &brw->draw.draw_id_offset); 786 } 787 } 788 789 /** 790 * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS). 791 */ 792 uint32_t * 793 brw_emit_vertex_buffer_state(struct brw_context *brw, 794 unsigned buffer_nr, 795 drm_intel_bo *bo, 796 unsigned start_offset, 797 unsigned end_offset, 798 unsigned stride, 799 unsigned step_rate, 800 uint32_t *__map) 801 { 802 struct gl_context *ctx = &brw->ctx; 803 uint32_t dw0; 804 805 if (brw->gen >= 8) { 806 dw0 = buffer_nr << GEN6_VB0_INDEX_SHIFT; 807 } else if (brw->gen >= 6) { 808 dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) | 809 (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA 810 : GEN6_VB0_ACCESS_VERTEXDATA); 811 } else { 812 dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) | 813 (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA 814 : BRW_VB0_ACCESS_VERTEXDATA); 815 } 816 817 if (brw->gen >= 7) 818 dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; 819 820 switch (brw->gen) { 821 case 7: 822 dw0 |= GEN7_MOCS_L3 << 16; 823 break; 824 case 8: 825 dw0 |= BDW_MOCS_WB << 16; 826 break; 827 case 9: 828 dw0 |= SKL_MOCS_WB << 16; 829 break; 830 } 831 832 WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047), 833 "VBO stride %d too large, bad rendering may occur\n", 834 stride); 835 OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT)); 836 if (brw->gen >= 8) { 837 OUT_RELOC64(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); 838 /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry - 839 * Vertex Fetch (VF) Stage - State 840 * 841 * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x 842 * VBState.BufferPitch", the address of the byte immediately beyond the 843 * last valid byte of the buffer is determined by 844 * "VBState.StartingBufferAddress + VBState.BufferSize". 845 */ 846 OUT_BATCH(end_offset - start_offset); 847 } else if (brw->gen >= 5) { 848 OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); 849 /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry - 850 * Vertex Fetch (VF) Stage - State 851 * 852 * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x 853 * VBState.BufferPitch", the address of the byte immediately beyond the 854 * last valid byte of the buffer is determined by 855 * "VBState.EndAddress + 1". 856 */ 857 OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, end_offset - 1); 858 OUT_BATCH(step_rate); 859 } else { 860 OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); 861 OUT_BATCH(0); 862 OUT_BATCH(step_rate); 863 } 864 865 return __map; 866 } 867 868 static void 869 brw_emit_vertices(struct brw_context *brw) 870 { 871 GLuint i; 872 873 brw_prepare_vertices(brw); 874 brw_prepare_shader_draw_parameters(brw); 875 876 brw_emit_query_begin(brw); 877 878 const struct brw_vs_prog_data *vs_prog_data = 879 brw_vs_prog_data(brw->vs.base.prog_data); 880 881 unsigned nr_elements = brw->vb.nr_enabled; 882 if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || 883 vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) 884 ++nr_elements; 885 if (vs_prog_data->uses_drawid) 886 nr_elements++; 887 888 /* If any of the formats of vb.enabled needs more that one upload, we need 889 * to add it to nr_elements */ 890 unsigned extra_uploads = 0; 891 for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { 892 struct brw_vertex_element *input = brw->vb.enabled[i]; 893 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); 894 895 if (uploads_needed(format) > 1) 896 extra_uploads++; 897 } 898 nr_elements += extra_uploads; 899 900 /* If the VS doesn't read any inputs (calculating vertex position from 901 * a state variable for some reason, for example), emit a single pad 902 * VERTEX_ELEMENT struct and bail. 903 * 904 * The stale VB state stays in place, but they don't do anything unless 905 * a VE loads from them. 906 */ 907 if (nr_elements == 0) { 908 BEGIN_BATCH(3); 909 OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); 910 if (brw->gen >= 6) { 911 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | 912 GEN6_VE0_VALID | 913 (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | 914 (0 << BRW_VE0_SRC_OFFSET_SHIFT)); 915 } else { 916 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | 917 BRW_VE0_VALID | 918 (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | 919 (0 << BRW_VE0_SRC_OFFSET_SHIFT)); 920 } 921 OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | 922 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | 923 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | 924 (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); 925 ADVANCE_BATCH(); 926 return; 927 } 928 929 /* Now emit VB and VEP state packets. 930 */ 931 932 const bool uses_draw_params = 933 vs_prog_data->uses_basevertex || 934 vs_prog_data->uses_baseinstance; 935 const unsigned nr_buffers = brw->vb.nr_buffers + 936 uses_draw_params + vs_prog_data->uses_drawid; 937 938 if (nr_buffers) { 939 if (brw->gen >= 6) { 940 assert(nr_buffers <= 33); 941 } else { 942 assert(nr_buffers <= 17); 943 } 944 945 BEGIN_BATCH(1 + 4 * nr_buffers); 946 OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); 947 for (i = 0; i < brw->vb.nr_buffers; i++) { 948 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; 949 /* Prior to Haswell and Bay Trail we have to use 4-component formats 950 * to fake 3-component ones. In particular, we do this for 951 * half-float and 8 and 16-bit integer formats. This means that the 952 * vertex element may poke over the end of the buffer by 2 bytes. 953 */ 954 unsigned padding = 955 (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2; 956 EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset, 957 buffer->offset + buffer->size + padding, 958 buffer->stride, buffer->step_rate); 959 960 } 961 962 if (uses_draw_params) { 963 EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, 964 brw->draw.draw_params_bo, 965 brw->draw.draw_params_offset, 966 brw->draw.draw_params_bo->size, 967 0, /* stride */ 968 0); /* step rate */ 969 } 970 971 if (vs_prog_data->uses_drawid) { 972 EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, 973 brw->draw.draw_id_bo, 974 brw->draw.draw_id_offset, 975 brw->draw.draw_id_bo->size, 976 0, /* stride */ 977 0); /* step rate */ 978 } 979 980 ADVANCE_BATCH(); 981 } 982 983 /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably 984 * for VertexID/InstanceID. 985 */ 986 if (brw->gen >= 6) { 987 assert(nr_elements <= 34); 988 } else { 989 assert(nr_elements <= 18); 990 } 991 992 struct brw_vertex_element *gen6_edgeflag_input = NULL; 993 994 BEGIN_BATCH(1 + nr_elements * 2); 995 OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); 996 for (i = 0; i < brw->vb.nr_enabled; i++) { 997 struct brw_vertex_element *input = brw->vb.enabled[i]; 998 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); 999 uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; 1000 uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; 1001 uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; 1002 uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; 1003 unsigned num_uploads = 1; 1004 unsigned c; 1005 1006 num_uploads = uploads_needed(format); 1007 1008 if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { 1009 /* Gen6+ passes edgeflag as sideband along with the vertex, instead 1010 * of in the VUE. We have to upload it sideband as the last vertex 1011 * element according to the B-Spec. 1012 */ 1013 if (brw->gen >= 6) { 1014 gen6_edgeflag_input = input; 1015 continue; 1016 } 1017 } 1018 1019 for (c = 0; c < num_uploads; c++) { 1020 uint32_t upload_format = downsize_format_if_needed(format, c); 1021 /* If we need more that one upload, the offset stride would be 128 1022 * bits (16 bytes), as for previous uploads we are using the full 1023 * entry. */ 1024 unsigned int offset = input->offset + c * 16; 1025 int size = input->glarray->Size; 1026 1027 if (is_passthru_format(format)) 1028 size = upload_format_size(upload_format); 1029 1030 switch (size) { 1031 case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; 1032 case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; 1033 case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; 1034 case 3: comp3 = input->glarray->Integer 1035 ? BRW_VE1_COMPONENT_STORE_1_INT 1036 : BRW_VE1_COMPONENT_STORE_1_FLT; 1037 break; 1038 } 1039 1040 if (brw->gen >= 6) { 1041 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | 1042 GEN6_VE0_VALID | 1043 (upload_format << BRW_VE0_FORMAT_SHIFT) | 1044 (offset << BRW_VE0_SRC_OFFSET_SHIFT)); 1045 } else { 1046 OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | 1047 BRW_VE0_VALID | 1048 (upload_format << BRW_VE0_FORMAT_SHIFT) | 1049 (offset << BRW_VE0_SRC_OFFSET_SHIFT)); 1050 } 1051 1052 if (brw->gen >= 5) 1053 OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | 1054 (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | 1055 (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | 1056 (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); 1057 else 1058 OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | 1059 (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | 1060 (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | 1061 (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | 1062 ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); 1063 } 1064 } 1065 1066 if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || 1067 vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) { 1068 uint32_t dw0 = 0, dw1 = 0; 1069 uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0; 1070 uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0; 1071 uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0; 1072 uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0; 1073 1074 if (vs_prog_data->uses_basevertex) 1075 comp0 = BRW_VE1_COMPONENT_STORE_SRC; 1076 1077 if (vs_prog_data->uses_baseinstance) 1078 comp1 = BRW_VE1_COMPONENT_STORE_SRC; 1079 1080 if (vs_prog_data->uses_vertexid) 1081 comp2 = BRW_VE1_COMPONENT_STORE_VID; 1082 1083 if (vs_prog_data->uses_instanceid) 1084 comp3 = BRW_VE1_COMPONENT_STORE_IID; 1085 1086 dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) | 1087 (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | 1088 (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | 1089 (comp3 << BRW_VE1_COMPONENT_3_SHIFT); 1090 1091 if (brw->gen >= 6) { 1092 dw0 |= GEN6_VE0_VALID | 1093 brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | 1094 BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT; 1095 } else { 1096 dw0 |= BRW_VE0_VALID | 1097 brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT | 1098 BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT; 1099 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; 1100 } 1101 1102 /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, 1103 * the format is ignored and the value is always int. 1104 */ 1105 1106 OUT_BATCH(dw0); 1107 OUT_BATCH(dw1); 1108 } 1109 1110 if (vs_prog_data->uses_drawid) { 1111 uint32_t dw0 = 0, dw1 = 0; 1112 1113 dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | 1114 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | 1115 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | 1116 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); 1117 1118 if (brw->gen >= 6) { 1119 dw0 |= GEN6_VE0_VALID | 1120 ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | 1121 (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); 1122 } else { 1123 dw0 |= BRW_VE0_VALID | 1124 ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) | 1125 (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); 1126 1127 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; 1128 } 1129 1130 OUT_BATCH(dw0); 1131 OUT_BATCH(dw1); 1132 } 1133 1134 if (brw->gen >= 6 && gen6_edgeflag_input) { 1135 uint32_t format = 1136 brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); 1137 1138 OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | 1139 GEN6_VE0_VALID | 1140 GEN6_VE0_EDGE_FLAG_ENABLE | 1141 (format << BRW_VE0_FORMAT_SHIFT) | 1142 (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); 1143 OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | 1144 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | 1145 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | 1146 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); 1147 } 1148 1149 ADVANCE_BATCH(); 1150 } 1151 1152 const struct brw_tracked_state brw_vertices = { 1153 .dirty = { 1154 .mesa = _NEW_POLYGON, 1155 .brw = BRW_NEW_BATCH | 1156 BRW_NEW_BLORP | 1157 BRW_NEW_VERTICES | 1158 BRW_NEW_VS_PROG_DATA, 1159 }, 1160 .emit = brw_emit_vertices, 1161 }; 1162 1163 static void 1164 brw_upload_indices(struct brw_context *brw) 1165 { 1166 struct gl_context *ctx = &brw->ctx; 1167 const struct _mesa_index_buffer *index_buffer = brw->ib.ib; 1168 GLuint ib_size; 1169 drm_intel_bo *old_bo = brw->ib.bo; 1170 struct gl_buffer_object *bufferobj; 1171 GLuint offset; 1172 GLuint ib_type_size; 1173 1174 if (index_buffer == NULL) 1175 return; 1176 1177 ib_type_size = _mesa_sizeof_type(index_buffer->type); 1178 ib_size = index_buffer->count ? ib_type_size * index_buffer->count : 1179 index_buffer->obj->Size; 1180 bufferobj = index_buffer->obj; 1181 1182 /* Turn into a proper VBO: 1183 */ 1184 if (!_mesa_is_bufferobj(bufferobj)) { 1185 /* Get new bufferobj, offset: 1186 */ 1187 intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size, 1188 &brw->ib.bo, &offset); 1189 brw->ib.size = brw->ib.bo->size; 1190 } else { 1191 offset = (GLuint) (unsigned long) index_buffer->ptr; 1192 1193 /* If the index buffer isn't aligned to its element size, we have to 1194 * rebase it into a temporary. 1195 */ 1196 if ((ib_type_size - 1) & offset) { 1197 perf_debug("copying index buffer to a temporary to work around " 1198 "misaligned offset %d\n", offset); 1199 1200 GLubyte *map = ctx->Driver.MapBufferRange(ctx, 1201 offset, 1202 ib_size, 1203 GL_MAP_READ_BIT, 1204 bufferobj, 1205 MAP_INTERNAL); 1206 1207 intel_upload_data(brw, map, ib_size, ib_type_size, 1208 &brw->ib.bo, &offset); 1209 brw->ib.size = brw->ib.bo->size; 1210 1211 ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL); 1212 } else { 1213 drm_intel_bo *bo = 1214 intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj), 1215 offset, ib_size); 1216 if (bo != brw->ib.bo) { 1217 drm_intel_bo_unreference(brw->ib.bo); 1218 brw->ib.bo = bo; 1219 brw->ib.size = bufferobj->Size; 1220 drm_intel_bo_reference(bo); 1221 } 1222 } 1223 } 1224 1225 /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading 1226 * the index buffer state when we're just moving the start index 1227 * of our drawing. 1228 */ 1229 brw->ib.start_vertex_offset = offset / ib_type_size; 1230 1231 if (brw->ib.bo != old_bo) 1232 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; 1233 1234 if (index_buffer->type != brw->ib.type) { 1235 brw->ib.type = index_buffer->type; 1236 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; 1237 } 1238 } 1239 1240 const struct brw_tracked_state brw_indices = { 1241 .dirty = { 1242 .mesa = 0, 1243 .brw = BRW_NEW_BLORP | 1244 BRW_NEW_INDICES, 1245 }, 1246 .emit = brw_upload_indices, 1247 }; 1248 1249 static void 1250 brw_emit_index_buffer(struct brw_context *brw) 1251 { 1252 const struct _mesa_index_buffer *index_buffer = brw->ib.ib; 1253 GLuint cut_index_setting; 1254 1255 if (index_buffer == NULL) 1256 return; 1257 1258 if (brw->prim_restart.enable_cut_index && !brw->is_haswell) { 1259 cut_index_setting = BRW_CUT_INDEX_ENABLE; 1260 } else { 1261 cut_index_setting = 0; 1262 } 1263 1264 BEGIN_BATCH(3); 1265 OUT_BATCH(CMD_INDEX_BUFFER << 16 | 1266 cut_index_setting | 1267 brw_get_index_type(index_buffer->type) | 1268 1); 1269 OUT_RELOC(brw->ib.bo, 1270 I915_GEM_DOMAIN_VERTEX, 0, 1271 0); 1272 OUT_RELOC(brw->ib.bo, 1273 I915_GEM_DOMAIN_VERTEX, 0, 1274 brw->ib.size - 1); 1275 ADVANCE_BATCH(); 1276 } 1277 1278 const struct brw_tracked_state brw_index_buffer = { 1279 .dirty = { 1280 .mesa = 0, 1281 .brw = BRW_NEW_BATCH | 1282 BRW_NEW_BLORP | 1283 BRW_NEW_INDEX_BUFFER, 1284 }, 1285 .emit = brw_emit_index_buffer, 1286 }; 1287