1 /* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keithw (at) vmware.com> 30 */ 31 32 33 #include "compiler/nir/nir.h" 34 #include "main/context.h" 35 #include "main/blend.h" 36 #include "main/mtypes.h" 37 #include "main/samplerobj.h" 38 #include "main/shaderimage.h" 39 #include "main/teximage.h" 40 #include "program/prog_parameter.h" 41 #include "program/prog_instruction.h" 42 #include "main/framebuffer.h" 43 #include "main/shaderapi.h" 44 45 #include "isl/isl.h" 46 47 #include "intel_mipmap_tree.h" 48 #include "intel_batchbuffer.h" 49 #include "intel_tex.h" 50 #include "intel_fbo.h" 51 #include "intel_buffer_objects.h" 52 53 #include "brw_context.h" 54 #include "brw_state.h" 55 #include "brw_defines.h" 56 #include "brw_wm.h" 57 58 uint32_t wb_mocs[] = { 59 [7] = GEN7_MOCS_L3, 60 [8] = BDW_MOCS_WB, 61 [9] = SKL_MOCS_WB, 62 [10] = CNL_MOCS_WB, 63 }; 64 65 uint32_t pte_mocs[] = { 66 [7] = GEN7_MOCS_L3, 67 [8] = BDW_MOCS_PTE, 68 [9] = SKL_MOCS_PTE, 69 [10] = CNL_MOCS_PTE, 70 }; 71 72 uint32_t 73 brw_get_bo_mocs(const struct gen_device_info *devinfo, struct brw_bo *bo) 74 { 75 return (bo && bo->external ? pte_mocs : wb_mocs)[devinfo->gen]; 76 } 77 78 static void 79 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, 80 GLenum target, struct isl_view *view, 81 uint32_t *tile_x, uint32_t *tile_y, 82 uint32_t *offset, struct isl_surf *surf) 83 { 84 *surf = mt->surf; 85 86 const struct gen_device_info *devinfo = &brw->screen->devinfo; 87 const enum isl_dim_layout dim_layout = 88 get_isl_dim_layout(devinfo, mt->surf.tiling, target); 89 90 surf->dim = get_isl_surf_dim(target); 91 92 if (surf->dim_layout == dim_layout) 93 return; 94 95 /* The layout of the specified texture target is not compatible with the 96 * actual layout of the miptree structure in memory -- You're entering 97 * dangerous territory, this can only possibly work if you only intended 98 * to access a single level and slice of the texture, and the hardware 99 * supports the tile offset feature in order to allow non-tile-aligned 100 * base offsets, since we'll have to point the hardware to the first 101 * texel of the level instead of relying on the usual base level/layer 102 * controls. 103 */ 104 assert(devinfo->has_surface_tile_offset); 105 assert(view->levels == 1 && view->array_len == 1); 106 assert(*tile_x == 0 && *tile_y == 0); 107 108 *offset += intel_miptree_get_tile_offsets(mt, view->base_level, 109 view->base_array_layer, 110 tile_x, tile_y); 111 112 /* Minify the logical dimensions of the texture. */ 113 const unsigned l = view->base_level - mt->first_level; 114 surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l); 115 surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 : 116 minify(surf->logical_level0_px.height, l); 117 surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 : 118 minify(surf->logical_level0_px.depth, l); 119 120 /* Only the base level and layer can be addressed with the overridden 121 * layout. 122 */ 123 surf->logical_level0_px.array_len = 1; 124 surf->levels = 1; 125 surf->dim_layout = dim_layout; 126 127 /* The requested slice of the texture is now at the base level and 128 * layer. 129 */ 130 view->base_level = 0; 131 view->base_array_layer = 0; 132 } 133 134 static void 135 brw_emit_surface_state(struct brw_context *brw, 136 struct intel_mipmap_tree *mt, 137 GLenum target, struct isl_view view, 138 enum isl_aux_usage aux_usage, 139 uint32_t *surf_offset, int surf_index, 140 unsigned reloc_flags) 141 { 142 const struct gen_device_info *devinfo = &brw->screen->devinfo; 143 uint32_t tile_x = mt->level[0].level_x; 144 uint32_t tile_y = mt->level[0].level_y; 145 uint32_t offset = mt->offset; 146 147 struct isl_surf surf; 148 149 get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf); 150 151 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } }; 152 153 struct brw_bo *aux_bo; 154 struct isl_surf *aux_surf = NULL; 155 uint64_t aux_offset = 0; 156 switch (aux_usage) { 157 case ISL_AUX_USAGE_MCS: 158 case ISL_AUX_USAGE_CCS_D: 159 case ISL_AUX_USAGE_CCS_E: 160 aux_surf = &mt->mcs_buf->surf; 161 aux_bo = mt->mcs_buf->bo; 162 aux_offset = mt->mcs_buf->offset; 163 break; 164 165 case ISL_AUX_USAGE_HIZ: 166 aux_surf = &mt->hiz_buf->surf; 167 aux_bo = mt->hiz_buf->bo; 168 aux_offset = 0; 169 break; 170 171 case ISL_AUX_USAGE_NONE: 172 break; 173 } 174 175 if (aux_usage != ISL_AUX_USAGE_NONE) { 176 /* We only really need a clear color if we also have an auxiliary 177 * surface. Without one, it does nothing. 178 */ 179 clear_color = mt->fast_clear_color; 180 } 181 182 void *state = brw_state_batch(brw, 183 brw->isl_dev.ss.size, 184 brw->isl_dev.ss.align, 185 surf_offset); 186 187 isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view, 188 .address = brw_state_reloc(&brw->batch, 189 *surf_offset + brw->isl_dev.ss.addr_offset, 190 mt->bo, offset, reloc_flags), 191 .aux_surf = aux_surf, .aux_usage = aux_usage, 192 .aux_address = aux_offset, 193 .mocs = brw_get_bo_mocs(devinfo, mt->bo), 194 .clear_color = clear_color, 195 .x_offset_sa = tile_x, .y_offset_sa = tile_y); 196 if (aux_surf) { 197 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the 198 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits 199 * contain other control information. Since buffer addresses are always 200 * on 4k boundaries (and thus have their lower 12 bits zero), we can use 201 * an ordinary reloc to do the necessary address translation. 202 * 203 * FIXME: move to the point of assignment. 204 */ 205 assert((aux_offset & 0xfff) == 0); 206 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset; 207 *aux_addr = brw_state_reloc(&brw->batch, 208 *surf_offset + 209 brw->isl_dev.ss.aux_addr_offset, 210 aux_bo, *aux_addr, 211 reloc_flags); 212 } 213 } 214 215 static uint32_t 216 gen6_update_renderbuffer_surface(struct brw_context *brw, 217 struct gl_renderbuffer *rb, 218 unsigned unit, 219 uint32_t surf_index) 220 { 221 struct gl_context *ctx = &brw->ctx; 222 struct intel_renderbuffer *irb = intel_renderbuffer(rb); 223 struct intel_mipmap_tree *mt = irb->mt; 224 225 assert(brw_render_target_supported(brw, rb)); 226 227 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); 228 if (unlikely(!brw->mesa_format_supports_render[rb_format])) { 229 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", 230 __func__, _mesa_get_format_name(rb_format)); 231 } 232 enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format]; 233 234 struct isl_view view = { 235 .format = isl_format, 236 .base_level = irb->mt_level - irb->mt->first_level, 237 .levels = 1, 238 .base_array_layer = irb->mt_layer, 239 .array_len = MAX2(irb->layer_count, 1), 240 .swizzle = ISL_SWIZZLE_IDENTITY, 241 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, 242 }; 243 244 uint32_t offset; 245 brw_emit_surface_state(brw, mt, mt->target, view, 246 brw->draw_aux_usage[unit], 247 &offset, surf_index, 248 RELOC_WRITE); 249 return offset; 250 } 251 252 GLuint 253 translate_tex_target(GLenum target) 254 { 255 switch (target) { 256 case GL_TEXTURE_1D: 257 case GL_TEXTURE_1D_ARRAY_EXT: 258 return BRW_SURFACE_1D; 259 260 case GL_TEXTURE_RECTANGLE_NV: 261 return BRW_SURFACE_2D; 262 263 case GL_TEXTURE_2D: 264 case GL_TEXTURE_2D_ARRAY_EXT: 265 case GL_TEXTURE_EXTERNAL_OES: 266 case GL_TEXTURE_2D_MULTISAMPLE: 267 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 268 return BRW_SURFACE_2D; 269 270 case GL_TEXTURE_3D: 271 return BRW_SURFACE_3D; 272 273 case GL_TEXTURE_CUBE_MAP: 274 case GL_TEXTURE_CUBE_MAP_ARRAY: 275 return BRW_SURFACE_CUBE; 276 277 default: 278 unreachable("not reached"); 279 } 280 } 281 282 uint32_t 283 brw_get_surface_tiling_bits(enum isl_tiling tiling) 284 { 285 switch (tiling) { 286 case ISL_TILING_X: 287 return BRW_SURFACE_TILED; 288 case ISL_TILING_Y0: 289 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; 290 default: 291 return 0; 292 } 293 } 294 295 296 uint32_t 297 brw_get_surface_num_multisamples(unsigned num_samples) 298 { 299 if (num_samples > 1) 300 return BRW_SURFACE_MULTISAMPLECOUNT_4; 301 else 302 return BRW_SURFACE_MULTISAMPLECOUNT_1; 303 } 304 305 /** 306 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle 307 * swizzling. 308 */ 309 int 310 brw_get_texture_swizzle(const struct gl_context *ctx, 311 const struct gl_texture_object *t) 312 { 313 const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; 314 315 int swizzles[SWIZZLE_NIL + 1] = { 316 SWIZZLE_X, 317 SWIZZLE_Y, 318 SWIZZLE_Z, 319 SWIZZLE_W, 320 SWIZZLE_ZERO, 321 SWIZZLE_ONE, 322 SWIZZLE_NIL 323 }; 324 325 if (img->_BaseFormat == GL_DEPTH_COMPONENT || 326 img->_BaseFormat == GL_DEPTH_STENCIL) { 327 GLenum depth_mode = t->DepthMode; 328 329 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures 330 * with depth component data specified with a sized internal format. 331 * Otherwise, it's left at the old default, GL_LUMINANCE. 332 */ 333 if (_mesa_is_gles3(ctx) && 334 img->InternalFormat != GL_DEPTH_COMPONENT && 335 img->InternalFormat != GL_DEPTH_STENCIL) { 336 depth_mode = GL_RED; 337 } 338 339 switch (depth_mode) { 340 case GL_ALPHA: 341 swizzles[0] = SWIZZLE_ZERO; 342 swizzles[1] = SWIZZLE_ZERO; 343 swizzles[2] = SWIZZLE_ZERO; 344 swizzles[3] = SWIZZLE_X; 345 break; 346 case GL_LUMINANCE: 347 swizzles[0] = SWIZZLE_X; 348 swizzles[1] = SWIZZLE_X; 349 swizzles[2] = SWIZZLE_X; 350 swizzles[3] = SWIZZLE_ONE; 351 break; 352 case GL_INTENSITY: 353 swizzles[0] = SWIZZLE_X; 354 swizzles[1] = SWIZZLE_X; 355 swizzles[2] = SWIZZLE_X; 356 swizzles[3] = SWIZZLE_X; 357 break; 358 case GL_RED: 359 swizzles[0] = SWIZZLE_X; 360 swizzles[1] = SWIZZLE_ZERO; 361 swizzles[2] = SWIZZLE_ZERO; 362 swizzles[3] = SWIZZLE_ONE; 363 break; 364 } 365 } 366 367 GLenum datatype = _mesa_get_format_datatype(img->TexFormat); 368 369 /* If the texture's format is alpha-only, force R, G, and B to 370 * 0.0. Similarly, if the texture's format has no alpha channel, 371 * force the alpha value read to 1.0. This allows for the 372 * implementation to use an RGBA texture for any of these formats 373 * without leaking any unexpected values. 374 */ 375 switch (img->_BaseFormat) { 376 case GL_ALPHA: 377 swizzles[0] = SWIZZLE_ZERO; 378 swizzles[1] = SWIZZLE_ZERO; 379 swizzles[2] = SWIZZLE_ZERO; 380 break; 381 case GL_LUMINANCE: 382 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) { 383 swizzles[0] = SWIZZLE_X; 384 swizzles[1] = SWIZZLE_X; 385 swizzles[2] = SWIZZLE_X; 386 swizzles[3] = SWIZZLE_ONE; 387 } 388 break; 389 case GL_LUMINANCE_ALPHA: 390 if (datatype == GL_SIGNED_NORMALIZED) { 391 swizzles[0] = SWIZZLE_X; 392 swizzles[1] = SWIZZLE_X; 393 swizzles[2] = SWIZZLE_X; 394 swizzles[3] = SWIZZLE_W; 395 } 396 break; 397 case GL_INTENSITY: 398 if (datatype == GL_SIGNED_NORMALIZED) { 399 swizzles[0] = SWIZZLE_X; 400 swizzles[1] = SWIZZLE_X; 401 swizzles[2] = SWIZZLE_X; 402 swizzles[3] = SWIZZLE_X; 403 } 404 break; 405 case GL_RED: 406 case GL_RG: 407 case GL_RGB: 408 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 || 409 img->TexFormat == MESA_FORMAT_RGB_DXT1 || 410 img->TexFormat == MESA_FORMAT_SRGB_DXT1) 411 swizzles[3] = SWIZZLE_ONE; 412 break; 413 } 414 415 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)], 416 swizzles[GET_SWZ(t->_Swizzle, 1)], 417 swizzles[GET_SWZ(t->_Swizzle, 2)], 418 swizzles[GET_SWZ(t->_Swizzle, 3)]); 419 } 420 421 /** 422 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+ 423 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are 424 * 425 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE 426 * 0 1 2 3 4 5 427 * 4 5 6 7 0 1 428 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE 429 * 430 * which is simply adding 4 then modding by 8 (or anding with 7). 431 * 432 * We then may need to apply workarounds for textureGather hardware bugs. 433 */ 434 static unsigned 435 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue) 436 { 437 unsigned scs = (swizzle + 4) & 7; 438 439 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs; 440 } 441 442 static void brw_update_texture_surface(struct gl_context *ctx, 443 unsigned unit, 444 uint32_t *surf_offset, 445 bool for_gather, 446 bool for_txf, 447 uint32_t plane) 448 { 449 struct brw_context *brw = brw_context(ctx); 450 const struct gen_device_info *devinfo = &brw->screen->devinfo; 451 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; 452 453 if (obj->Target == GL_TEXTURE_BUFFER) { 454 brw_update_buffer_texture_surface(ctx, unit, surf_offset); 455 456 } else { 457 struct intel_texture_object *intel_obj = intel_texture_object(obj); 458 struct intel_mipmap_tree *mt = intel_obj->mt; 459 460 if (plane > 0) { 461 if (mt->plane[plane - 1] == NULL) 462 return; 463 mt = mt->plane[plane - 1]; 464 } 465 466 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); 467 /* If this is a view with restricted NumLayers, then our effective depth 468 * is not just the miptree depth. 469 */ 470 unsigned view_num_layers; 471 if (obj->Immutable && obj->Target != GL_TEXTURE_3D) { 472 view_num_layers = obj->NumLayers; 473 } else { 474 view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ? 475 mt->surf.logical_level0_px.depth : 476 mt->surf.logical_level0_px.array_len; 477 } 478 479 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style 480 * texturing functions that return a float, as our code generation always 481 * selects the .x channel (which would always be 0). 482 */ 483 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel]; 484 const bool alpha_depth = obj->DepthMode == GL_ALPHA && 485 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT || 486 firstImage->_BaseFormat == GL_DEPTH_STENCIL); 487 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW : 488 brw_get_texture_swizzle(&brw->ctx, obj)); 489 490 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format; 491 enum isl_format format = translate_tex_format(brw, mesa_fmt, 492 for_txf ? GL_DECODE_EXT : 493 sampler->sRGBDecode); 494 495 /* Implement gen6 and gen7 gather work-around */ 496 bool need_green_to_blue = false; 497 if (for_gather) { 498 if (devinfo->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT || 499 format == ISL_FORMAT_R32G32_SINT || 500 format == ISL_FORMAT_R32G32_UINT)) { 501 format = ISL_FORMAT_R32G32_FLOAT_LD; 502 need_green_to_blue = devinfo->is_haswell; 503 } else if (devinfo->gen == 6) { 504 /* Sandybridge's gather4 message is broken for integer formats. 505 * To work around this, we pretend the surface is UNORM for 506 * 8 or 16-bit formats, and emit shader instructions to recover 507 * the real INT/UINT value. For 32-bit formats, we pretend 508 * the surface is FLOAT, and simply reinterpret the resulting 509 * bits. 510 */ 511 switch (format) { 512 case ISL_FORMAT_R8_SINT: 513 case ISL_FORMAT_R8_UINT: 514 format = ISL_FORMAT_R8_UNORM; 515 break; 516 517 case ISL_FORMAT_R16_SINT: 518 case ISL_FORMAT_R16_UINT: 519 format = ISL_FORMAT_R16_UNORM; 520 break; 521 522 case ISL_FORMAT_R32_SINT: 523 case ISL_FORMAT_R32_UINT: 524 format = ISL_FORMAT_R32_FLOAT; 525 break; 526 527 default: 528 break; 529 } 530 } 531 } 532 533 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) { 534 if (devinfo->gen <= 7) { 535 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update); 536 mt = mt->r8stencil_mt; 537 } else { 538 mt = mt->stencil_mt; 539 } 540 format = ISL_FORMAT_R8_UINT; 541 } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) { 542 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update); 543 mt = mt->r8stencil_mt; 544 format = ISL_FORMAT_R8_UINT; 545 } 546 547 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; 548 549 struct isl_view view = { 550 .format = format, 551 .base_level = obj->MinLevel + obj->BaseLevel, 552 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1, 553 .base_array_layer = obj->MinLayer, 554 .array_len = view_num_layers, 555 .swizzle = { 556 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue), 557 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue), 558 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue), 559 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue), 560 }, 561 .usage = ISL_SURF_USAGE_TEXTURE_BIT, 562 }; 563 564 if (obj->Target == GL_TEXTURE_CUBE_MAP || 565 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) 566 view.usage |= ISL_SURF_USAGE_CUBE_BIT; 567 568 enum isl_aux_usage aux_usage = 569 intel_miptree_texture_aux_usage(brw, mt, format); 570 571 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, 572 surf_offset, surf_index, 573 0); 574 } 575 } 576 577 void 578 brw_emit_buffer_surface_state(struct brw_context *brw, 579 uint32_t *out_offset, 580 struct brw_bo *bo, 581 unsigned buffer_offset, 582 unsigned surface_format, 583 unsigned buffer_size, 584 unsigned pitch, 585 unsigned reloc_flags) 586 { 587 const struct gen_device_info *devinfo = &brw->screen->devinfo; 588 uint32_t *dw = brw_state_batch(brw, 589 brw->isl_dev.ss.size, 590 brw->isl_dev.ss.align, 591 out_offset); 592 593 isl_buffer_fill_state(&brw->isl_dev, dw, 594 .address = !bo ? buffer_offset : 595 brw_state_reloc(&brw->batch, 596 *out_offset + brw->isl_dev.ss.addr_offset, 597 bo, buffer_offset, 598 reloc_flags), 599 .size = buffer_size, 600 .format = surface_format, 601 .stride = pitch, 602 .mocs = brw_get_bo_mocs(devinfo, bo)); 603 } 604 605 void 606 brw_update_buffer_texture_surface(struct gl_context *ctx, 607 unsigned unit, 608 uint32_t *surf_offset) 609 { 610 struct brw_context *brw = brw_context(ctx); 611 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; 612 struct intel_buffer_object *intel_obj = 613 intel_buffer_object(tObj->BufferObject); 614 uint32_t size = tObj->BufferSize; 615 struct brw_bo *bo = NULL; 616 mesa_format format = tObj->_BufferObjectFormat; 617 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format); 618 int texel_size = _mesa_get_format_bytes(format); 619 620 if (intel_obj) { 621 size = MIN2(size, intel_obj->Base.Size); 622 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size, 623 false); 624 } 625 626 /* The ARB_texture_buffer_specification says: 627 * 628 * "The number of texels in the buffer texture's texel array is given by 629 * 630 * floor(<buffer_size> / (<components> * sizeof(<base_type>)), 631 * 632 * where <buffer_size> is the size of the buffer object, in basic 633 * machine units and <components> and <base_type> are the element count 634 * and base data type for elements, as specified in Table X.1. The 635 * number of texels in the texel array is then clamped to the 636 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB." 637 * 638 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride, 639 * so that when ISL divides by stride to obtain the number of texels, that 640 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. 641 */ 642 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size); 643 644 if (isl_format == ISL_FORMAT_UNSUPPORTED) { 645 _mesa_problem(NULL, "bad format %s for texture buffer\n", 646 _mesa_get_format_name(format)); 647 } 648 649 brw_emit_buffer_surface_state(brw, surf_offset, bo, 650 tObj->BufferOffset, 651 isl_format, 652 size, 653 texel_size, 654 0); 655 } 656 657 /** 658 * Set up a binding table entry for use by stream output logic (transform 659 * feedback). 660 * 661 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES. 662 */ 663 void 664 brw_update_sol_surface(struct brw_context *brw, 665 struct gl_buffer_object *buffer_obj, 666 uint32_t *out_offset, unsigned num_vector_components, 667 unsigned stride_dwords, unsigned offset_dwords) 668 { 669 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj); 670 uint32_t offset_bytes = 4 * offset_dwords; 671 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo, 672 offset_bytes, 673 buffer_obj->Size - offset_bytes, 674 true); 675 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset); 676 uint32_t pitch_minus_1 = 4*stride_dwords - 1; 677 size_t size_dwords = buffer_obj->Size / 4; 678 uint32_t buffer_size_minus_1, width, height, depth, surface_format; 679 680 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't 681 * too big to map using a single binding table entry? 682 */ 683 assert((size_dwords - offset_dwords) / stride_dwords 684 <= BRW_MAX_NUM_BUFFER_ENTRIES); 685 686 if (size_dwords > offset_dwords + num_vector_components) { 687 /* There is room for at least 1 transform feedback output in the buffer. 688 * Compute the number of additional transform feedback outputs the 689 * buffer has room for. 690 */ 691 buffer_size_minus_1 = 692 (size_dwords - offset_dwords - num_vector_components) / stride_dwords; 693 } else { 694 /* There isn't even room for a single transform feedback output in the 695 * buffer. We can't configure the binding table entry to prevent output 696 * entirely; we'll have to rely on the geometry shader to detect 697 * overflow. But to minimize the damage in case of a bug, set up the 698 * binding table entry to just allow a single output. 699 */ 700 buffer_size_minus_1 = 0; 701 } 702 width = buffer_size_minus_1 & 0x7f; 703 height = (buffer_size_minus_1 & 0xfff80) >> 7; 704 depth = (buffer_size_minus_1 & 0x7f00000) >> 20; 705 706 switch (num_vector_components) { 707 case 1: 708 surface_format = ISL_FORMAT_R32_FLOAT; 709 break; 710 case 2: 711 surface_format = ISL_FORMAT_R32G32_FLOAT; 712 break; 713 case 3: 714 surface_format = ISL_FORMAT_R32G32B32_FLOAT; 715 break; 716 case 4: 717 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT; 718 break; 719 default: 720 unreachable("Invalid vector size for transform feedback output"); 721 } 722 723 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | 724 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | 725 surface_format << BRW_SURFACE_FORMAT_SHIFT | 726 BRW_SURFACE_RC_READ_WRITE; 727 surf[1] = brw_state_reloc(&brw->batch, 728 *out_offset + 4, bo, offset_bytes, RELOC_WRITE); 729 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | 730 height << BRW_SURFACE_HEIGHT_SHIFT); 731 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | 732 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); 733 surf[4] = 0; 734 surf[5] = 0; 735 } 736 737 /* Creates a new WM constant buffer reflecting the current fragment program's 738 * constants, if needed by the fragment program. 739 * 740 * Otherwise, constants go through the CURBEs using the brw_constant_buffer 741 * state atom. 742 */ 743 static void 744 brw_upload_wm_pull_constants(struct brw_context *brw) 745 { 746 struct brw_stage_state *stage_state = &brw->wm.base; 747 /* BRW_NEW_FRAGMENT_PROGRAM */ 748 struct brw_program *fp = 749 (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT]; 750 751 /* BRW_NEW_FS_PROG_DATA */ 752 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; 753 754 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT); 755 /* _NEW_PROGRAM_CONSTANTS */ 756 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program, 757 stage_state, prog_data); 758 } 759 760 const struct brw_tracked_state brw_wm_pull_constants = { 761 .dirty = { 762 .mesa = _NEW_PROGRAM_CONSTANTS, 763 .brw = BRW_NEW_BATCH | 764 BRW_NEW_FRAGMENT_PROGRAM | 765 BRW_NEW_FS_PROG_DATA, 766 }, 767 .emit = brw_upload_wm_pull_constants, 768 }; 769 770 /** 771 * Creates a null renderbuffer surface. 772 * 773 * This is used when the shader doesn't write to any color output. An FB 774 * write to target 0 will still be emitted, because that's how the thread is 775 * terminated (and computed depth is returned), so we need to have the 776 * hardware discard the target 0 color output.. 777 */ 778 static void 779 emit_null_surface_state(struct brw_context *brw, 780 const struct gl_framebuffer *fb, 781 uint32_t *out_offset) 782 { 783 const struct gen_device_info *devinfo = &brw->screen->devinfo; 784 uint32_t *surf = brw_state_batch(brw, 785 brw->isl_dev.ss.size, 786 brw->isl_dev.ss.align, 787 out_offset); 788 789 /* Use the fb dimensions or 1x1x1 */ 790 const unsigned width = fb ? _mesa_geometric_width(fb) : 1; 791 const unsigned height = fb ? _mesa_geometric_height(fb) : 1; 792 const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1; 793 794 if (devinfo->gen != 6 || samples <= 1) { 795 isl_null_fill_state(&brw->isl_dev, surf, 796 isl_extent3d(width, height, 1)); 797 return; 798 } 799 800 /* On Gen6, null render targets seem to cause GPU hangs when multisampling. 801 * So work around this problem by rendering into dummy color buffer. 802 * 803 * To decrease the amount of memory needed by the workaround buffer, we 804 * set its pitch to 128 bytes (the width of a Y tile). This means that 805 * the amount of memory needed for the workaround buffer is 806 * (width_in_tiles + height_in_tiles - 1) tiles. 807 * 808 * Note that since the workaround buffer will be interpreted by the 809 * hardware as an interleaved multisampled buffer, we need to compute 810 * width_in_tiles and height_in_tiles by dividing the width and height 811 * by 16 rather than the normal Y-tile size of 32. 812 */ 813 unsigned width_in_tiles = ALIGN(width, 16) / 16; 814 unsigned height_in_tiles = ALIGN(height, 16) / 16; 815 unsigned pitch_minus_1 = 127; 816 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096; 817 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo, 818 size_needed); 819 820 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | 821 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); 822 surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4, 823 brw->wm.multisampled_null_render_target_bo, 824 0, RELOC_WRITE); 825 826 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT | 827 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); 828 829 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming 830 * Notes): 831 * 832 * If Surface Type is SURFTYPE_NULL, this field must be TRUE 833 */ 834 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y | 835 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); 836 surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4; 837 surf[5] = 0; 838 } 839 840 /** 841 * Sets up a surface state structure to point at the given region. 842 * While it is only used for the front/back buffer currently, it should be 843 * usable for further buffers when doing ARB_draw_buffer support. 844 */ 845 static uint32_t 846 gen4_update_renderbuffer_surface(struct brw_context *brw, 847 struct gl_renderbuffer *rb, 848 unsigned unit, 849 uint32_t surf_index) 850 { 851 const struct gen_device_info *devinfo = &brw->screen->devinfo; 852 struct gl_context *ctx = &brw->ctx; 853 struct intel_renderbuffer *irb = intel_renderbuffer(rb); 854 struct intel_mipmap_tree *mt = irb->mt; 855 uint32_t *surf; 856 uint32_t tile_x, tile_y; 857 enum isl_format format; 858 uint32_t offset; 859 /* _NEW_BUFFERS */ 860 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); 861 /* BRW_NEW_FS_PROG_DATA */ 862 863 if (rb->TexImage && !devinfo->has_surface_tile_offset) { 864 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y); 865 866 if (tile_x != 0 || tile_y != 0) { 867 /* Original gen4 hardware couldn't draw to a non-tile-aligned 868 * destination in a miptree unless you actually setup your renderbuffer 869 * as a miptree and used the fragile lod/array_index/etc. controls to 870 * select the image. So, instead, we just make a new single-level 871 * miptree and render into that. 872 */ 873 intel_renderbuffer_move_to_temp(brw, irb, false); 874 assert(irb->align_wa_mt); 875 mt = irb->align_wa_mt; 876 } 877 } 878 879 surf = brw_state_batch(brw, 6 * 4, 32, &offset); 880 881 format = brw->mesa_to_isl_render_format[rb_format]; 882 if (unlikely(!brw->mesa_format_supports_render[rb_format])) { 883 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", 884 __func__, _mesa_get_format_name(rb_format)); 885 } 886 887 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | 888 format << BRW_SURFACE_FORMAT_SHIFT); 889 890 /* reloc */ 891 assert(mt->offset % mt->cpp == 0); 892 surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo, 893 mt->offset + 894 intel_renderbuffer_get_tile_offsets(irb, 895 &tile_x, 896 &tile_y), 897 RELOC_WRITE); 898 899 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | 900 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); 901 902 surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) | 903 (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT); 904 905 surf[4] = brw_get_surface_num_multisamples(mt->surf.samples); 906 907 assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); 908 /* Note that the low bits of these fields are missing, so 909 * there's the possibility of getting in trouble. 910 */ 911 assert(tile_x % 4 == 0); 912 assert(tile_y % 2 == 0); 913 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | 914 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | 915 (mt->surf.image_alignment_el.height == 4 ? 916 BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); 917 918 if (devinfo->gen < 6) { 919 /* _NEW_COLOR */ 920 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode && 921 (ctx->Color.BlendEnabled & (1 << unit))) 922 surf[0] |= BRW_SURFACE_BLEND_ENABLED; 923 924 if (!ctx->Color.ColorMask[unit][0]) 925 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT; 926 if (!ctx->Color.ColorMask[unit][1]) 927 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT; 928 if (!ctx->Color.ColorMask[unit][2]) 929 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT; 930 931 /* As mentioned above, disable writes to the alpha component when the 932 * renderbuffer is XRGB. 933 */ 934 if (ctx->DrawBuffer->Visual.alphaBits == 0 || 935 !ctx->Color.ColorMask[unit][3]) { 936 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT; 937 } 938 } 939 940 return offset; 941 } 942 943 static void 944 update_renderbuffer_surfaces(struct brw_context *brw) 945 { 946 const struct gen_device_info *devinfo = &brw->screen->devinfo; 947 const struct gl_context *ctx = &brw->ctx; 948 949 /* _NEW_BUFFERS | _NEW_COLOR */ 950 const struct gl_framebuffer *fb = ctx->DrawBuffer; 951 952 /* Render targets always start at binding table index 0. */ 953 const unsigned rt_start = 0; 954 955 uint32_t *surf_offsets = brw->wm.base.surf_offset; 956 957 /* Update surfaces for drawing buffers */ 958 if (fb->_NumColorDrawBuffers >= 1) { 959 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { 960 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; 961 962 if (intel_renderbuffer(rb)) { 963 surf_offsets[rt_start + i] = devinfo->gen >= 6 ? 964 gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) : 965 gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i); 966 } else { 967 emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]); 968 } 969 } 970 } else { 971 emit_null_surface_state(brw, fb, &surf_offsets[rt_start]); 972 } 973 974 brw->ctx.NewDriverState |= BRW_NEW_SURFACES; 975 } 976 977 const struct brw_tracked_state brw_renderbuffer_surfaces = { 978 .dirty = { 979 .mesa = _NEW_BUFFERS | 980 _NEW_COLOR, 981 .brw = BRW_NEW_BATCH, 982 }, 983 .emit = update_renderbuffer_surfaces, 984 }; 985 986 const struct brw_tracked_state gen6_renderbuffer_surfaces = { 987 .dirty = { 988 .mesa = _NEW_BUFFERS, 989 .brw = BRW_NEW_BATCH | 990 BRW_NEW_AUX_STATE, 991 }, 992 .emit = update_renderbuffer_surfaces, 993 }; 994 995 static void 996 update_renderbuffer_read_surfaces(struct brw_context *brw) 997 { 998 const struct gl_context *ctx = &brw->ctx; 999 1000 /* BRW_NEW_FS_PROG_DATA */ 1001 const struct brw_wm_prog_data *wm_prog_data = 1002 brw_wm_prog_data(brw->wm.base.prog_data); 1003 1004 if (wm_prog_data->has_render_target_reads && 1005 !ctx->Extensions.MESA_shader_framebuffer_fetch) { 1006 /* _NEW_BUFFERS */ 1007 const struct gl_framebuffer *fb = ctx->DrawBuffer; 1008 1009 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { 1010 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; 1011 const struct intel_renderbuffer *irb = intel_renderbuffer(rb); 1012 const unsigned surf_index = 1013 wm_prog_data->binding_table.render_target_read_start + i; 1014 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index]; 1015 1016 if (irb) { 1017 const enum isl_format format = brw->mesa_to_isl_render_format[ 1018 _mesa_get_render_format(ctx, intel_rb_format(irb))]; 1019 assert(isl_format_supports_sampling(&brw->screen->devinfo, 1020 format)); 1021 1022 /* Override the target of the texture if the render buffer is a 1023 * single slice of a 3D texture (since the minimum array element 1024 * field of the surface state structure is ignored by the sampler 1025 * unit for 3D textures on some hardware), or if the render buffer 1026 * is a 1D array (since shaders always provide the array index 1027 * coordinate at the Z component to avoid state-dependent 1028 * recompiles when changing the texture target of the 1029 * framebuffer). 1030 */ 1031 const GLenum target = 1032 (irb->mt->target == GL_TEXTURE_3D && 1033 irb->layer_count == 1) ? GL_TEXTURE_2D : 1034 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY : 1035 irb->mt->target; 1036 1037 const struct isl_view view = { 1038 .format = format, 1039 .base_level = irb->mt_level - irb->mt->first_level, 1040 .levels = 1, 1041 .base_array_layer = irb->mt_layer, 1042 .array_len = irb->layer_count, 1043 .swizzle = ISL_SWIZZLE_IDENTITY, 1044 .usage = ISL_SURF_USAGE_TEXTURE_BIT, 1045 }; 1046 1047 enum isl_aux_usage aux_usage = 1048 intel_miptree_texture_aux_usage(brw, irb->mt, format); 1049 if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE) 1050 aux_usage = ISL_AUX_USAGE_NONE; 1051 1052 brw_emit_surface_state(brw, irb->mt, target, view, aux_usage, 1053 surf_offset, surf_index, 1054 0); 1055 1056 } else { 1057 emit_null_surface_state(brw, fb, surf_offset); 1058 } 1059 } 1060 1061 brw->ctx.NewDriverState |= BRW_NEW_SURFACES; 1062 } 1063 } 1064 1065 const struct brw_tracked_state brw_renderbuffer_read_surfaces = { 1066 .dirty = { 1067 .mesa = _NEW_BUFFERS, 1068 .brw = BRW_NEW_BATCH | 1069 BRW_NEW_AUX_STATE | 1070 BRW_NEW_FS_PROG_DATA, 1071 }, 1072 .emit = update_renderbuffer_read_surfaces, 1073 }; 1074 1075 static bool 1076 is_depth_texture(struct intel_texture_object *iobj) 1077 { 1078 GLenum base_format = _mesa_get_format_base_format(iobj->_Format); 1079 return base_format == GL_DEPTH_COMPONENT || 1080 (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling); 1081 } 1082 1083 static void 1084 update_stage_texture_surfaces(struct brw_context *brw, 1085 const struct gl_program *prog, 1086 struct brw_stage_state *stage_state, 1087 bool for_gather, uint32_t plane) 1088 { 1089 if (!prog) 1090 return; 1091 1092 struct gl_context *ctx = &brw->ctx; 1093 1094 uint32_t *surf_offset = stage_state->surf_offset; 1095 1096 /* BRW_NEW_*_PROG_DATA */ 1097 if (for_gather) 1098 surf_offset += stage_state->prog_data->binding_table.gather_texture_start; 1099 else 1100 surf_offset += stage_state->prog_data->binding_table.plane_start[plane]; 1101 1102 unsigned num_samplers = util_last_bit(prog->SamplersUsed); 1103 for (unsigned s = 0; s < num_samplers; s++) { 1104 surf_offset[s] = 0; 1105 1106 if (prog->SamplersUsed & (1 << s)) { 1107 const unsigned unit = prog->SamplerUnits[s]; 1108 const bool used_by_txf = prog->info.textures_used_by_txf & (1 << s); 1109 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; 1110 struct intel_texture_object *iobj = intel_texture_object(obj); 1111 1112 /* _NEW_TEXTURE */ 1113 if (!obj) 1114 continue; 1115 1116 if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) { 1117 /* A programming note for the sample_c message says: 1118 * 1119 * "The Surface Format of the associated surface must be 1120 * indicated as supporting shadow mapping as indicated in the 1121 * surface format table." 1122 * 1123 * Accessing non-depth textures via a sampler*Shadow type is 1124 * undefined. GLSL 4.50 page 162 says: 1125 * 1126 * "If a shadow texture call is made to a sampler that does not 1127 * represent a depth texture, then results are undefined." 1128 * 1129 * We give them a null surface (zeros) for undefined. We've seen 1130 * GPU hangs with color buffers and sample_c, so we try and avoid 1131 * those with this hack. 1132 */ 1133 emit_null_surface_state(brw, NULL, surf_offset + s); 1134 } else { 1135 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, 1136 used_by_txf, plane); 1137 } 1138 } 1139 } 1140 } 1141 1142 1143 /** 1144 * Construct SURFACE_STATE objects for enabled textures. 1145 */ 1146 static void 1147 brw_update_texture_surfaces(struct brw_context *brw) 1148 { 1149 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1150 1151 /* BRW_NEW_VERTEX_PROGRAM */ 1152 struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX]; 1153 1154 /* BRW_NEW_TESS_PROGRAMS */ 1155 struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL]; 1156 struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL]; 1157 1158 /* BRW_NEW_GEOMETRY_PROGRAM */ 1159 struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY]; 1160 1161 /* BRW_NEW_FRAGMENT_PROGRAM */ 1162 struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT]; 1163 1164 /* _NEW_TEXTURE */ 1165 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0); 1166 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0); 1167 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0); 1168 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0); 1169 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0); 1170 1171 /* emit alternate set of surface state for gather. this 1172 * allows the surface format to be overriden for only the 1173 * gather4 messages. */ 1174 if (devinfo->gen < 8) { 1175 if (vs && vs->info.uses_texture_gather) 1176 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0); 1177 if (tcs && tcs->info.uses_texture_gather) 1178 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0); 1179 if (tes && tes->info.uses_texture_gather) 1180 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0); 1181 if (gs && gs->info.uses_texture_gather) 1182 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0); 1183 if (fs && fs->info.uses_texture_gather) 1184 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0); 1185 } 1186 1187 if (fs) { 1188 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1); 1189 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2); 1190 } 1191 1192 brw->ctx.NewDriverState |= BRW_NEW_SURFACES; 1193 } 1194 1195 const struct brw_tracked_state brw_texture_surfaces = { 1196 .dirty = { 1197 .mesa = _NEW_TEXTURE, 1198 .brw = BRW_NEW_BATCH | 1199 BRW_NEW_AUX_STATE | 1200 BRW_NEW_FRAGMENT_PROGRAM | 1201 BRW_NEW_FS_PROG_DATA | 1202 BRW_NEW_GEOMETRY_PROGRAM | 1203 BRW_NEW_GS_PROG_DATA | 1204 BRW_NEW_TESS_PROGRAMS | 1205 BRW_NEW_TCS_PROG_DATA | 1206 BRW_NEW_TES_PROG_DATA | 1207 BRW_NEW_TEXTURE_BUFFER | 1208 BRW_NEW_VERTEX_PROGRAM | 1209 BRW_NEW_VS_PROG_DATA, 1210 }, 1211 .emit = brw_update_texture_surfaces, 1212 }; 1213 1214 static void 1215 brw_update_cs_texture_surfaces(struct brw_context *brw) 1216 { 1217 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1218 1219 /* BRW_NEW_COMPUTE_PROGRAM */ 1220 struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE]; 1221 1222 /* _NEW_TEXTURE */ 1223 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0); 1224 1225 /* emit alternate set of surface state for gather. this 1226 * allows the surface format to be overriden for only the 1227 * gather4 messages. 1228 */ 1229 if (devinfo->gen < 8) { 1230 if (cs && cs->info.uses_texture_gather) 1231 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0); 1232 } 1233 1234 brw->ctx.NewDriverState |= BRW_NEW_SURFACES; 1235 } 1236 1237 const struct brw_tracked_state brw_cs_texture_surfaces = { 1238 .dirty = { 1239 .mesa = _NEW_TEXTURE, 1240 .brw = BRW_NEW_BATCH | 1241 BRW_NEW_COMPUTE_PROGRAM | 1242 BRW_NEW_AUX_STATE, 1243 }, 1244 .emit = brw_update_cs_texture_surfaces, 1245 }; 1246 1247 static void 1248 upload_buffer_surface(struct brw_context *brw, 1249 struct gl_buffer_binding *binding, 1250 uint32_t *out_offset, 1251 enum isl_format format, 1252 unsigned reloc_flags) 1253 { 1254 struct gl_context *ctx = &brw->ctx; 1255 1256 if (binding->BufferObject == ctx->Shared->NullBufferObj) { 1257 emit_null_surface_state(brw, NULL, out_offset); 1258 } else { 1259 ptrdiff_t size = binding->BufferObject->Size - binding->Offset; 1260 if (!binding->AutomaticSize) 1261 size = MIN2(size, binding->Size); 1262 1263 struct intel_buffer_object *iobj = 1264 intel_buffer_object(binding->BufferObject); 1265 struct brw_bo *bo = 1266 intel_bufferobj_buffer(brw, iobj, binding->Offset, size, 1267 (reloc_flags & RELOC_WRITE) != 0); 1268 1269 brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset, 1270 format, size, 1, reloc_flags); 1271 } 1272 } 1273 1274 void 1275 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog, 1276 struct brw_stage_state *stage_state, 1277 struct brw_stage_prog_data *prog_data) 1278 { 1279 struct gl_context *ctx = &brw->ctx; 1280 1281 if (!prog || (prog->info.num_ubos == 0 && 1282 prog->info.num_ssbos == 0 && 1283 prog->info.num_abos == 0)) 1284 return; 1285 1286 uint32_t *ubo_surf_offsets = 1287 &stage_state->surf_offset[prog_data->binding_table.ubo_start]; 1288 1289 for (int i = 0; i < prog->info.num_ubos; i++) { 1290 struct gl_buffer_binding *binding = 1291 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding]; 1292 upload_buffer_surface(brw, binding, &ubo_surf_offsets[i], 1293 ISL_FORMAT_R32G32B32A32_FLOAT, 0); 1294 } 1295 1296 uint32_t *abo_surf_offsets = 1297 &stage_state->surf_offset[prog_data->binding_table.ssbo_start]; 1298 uint32_t *ssbo_surf_offsets = abo_surf_offsets + prog->info.num_abos; 1299 1300 for (int i = 0; i < prog->info.num_abos; i++) { 1301 struct gl_buffer_binding *binding = 1302 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding]; 1303 upload_buffer_surface(brw, binding, &abo_surf_offsets[i], 1304 ISL_FORMAT_RAW, RELOC_WRITE); 1305 } 1306 1307 for (int i = 0; i < prog->info.num_ssbos; i++) { 1308 struct gl_buffer_binding *binding = 1309 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding]; 1310 1311 upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i], 1312 ISL_FORMAT_RAW, RELOC_WRITE); 1313 } 1314 1315 stage_state->push_constants_dirty = true; 1316 brw->ctx.NewDriverState |= BRW_NEW_SURFACES; 1317 } 1318 1319 static void 1320 brw_upload_wm_ubo_surfaces(struct brw_context *brw) 1321 { 1322 struct gl_context *ctx = &brw->ctx; 1323 /* _NEW_PROGRAM */ 1324 struct gl_program *prog = ctx->FragmentProgram._Current; 1325 1326 /* BRW_NEW_FS_PROG_DATA */ 1327 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data); 1328 } 1329 1330 const struct brw_tracked_state brw_wm_ubo_surfaces = { 1331 .dirty = { 1332 .mesa = _NEW_PROGRAM, 1333 .brw = BRW_NEW_BATCH | 1334 BRW_NEW_FS_PROG_DATA | 1335 BRW_NEW_UNIFORM_BUFFER, 1336 }, 1337 .emit = brw_upload_wm_ubo_surfaces, 1338 }; 1339 1340 static void 1341 brw_upload_cs_ubo_surfaces(struct brw_context *brw) 1342 { 1343 struct gl_context *ctx = &brw->ctx; 1344 /* _NEW_PROGRAM */ 1345 struct gl_program *prog = 1346 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; 1347 1348 /* BRW_NEW_CS_PROG_DATA */ 1349 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data); 1350 } 1351 1352 const struct brw_tracked_state brw_cs_ubo_surfaces = { 1353 .dirty = { 1354 .mesa = _NEW_PROGRAM, 1355 .brw = BRW_NEW_BATCH | 1356 BRW_NEW_CS_PROG_DATA | 1357 BRW_NEW_UNIFORM_BUFFER, 1358 }, 1359 .emit = brw_upload_cs_ubo_surfaces, 1360 }; 1361 1362 static void 1363 brw_upload_cs_image_surfaces(struct brw_context *brw) 1364 { 1365 /* _NEW_PROGRAM */ 1366 const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE]; 1367 1368 if (cp) { 1369 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ 1370 brw_upload_image_surfaces(brw, cp, &brw->cs.base, 1371 brw->cs.base.prog_data); 1372 } 1373 } 1374 1375 const struct brw_tracked_state brw_cs_image_surfaces = { 1376 .dirty = { 1377 .mesa = _NEW_TEXTURE | _NEW_PROGRAM, 1378 .brw = BRW_NEW_BATCH | 1379 BRW_NEW_CS_PROG_DATA | 1380 BRW_NEW_AUX_STATE | 1381 BRW_NEW_IMAGE_UNITS 1382 }, 1383 .emit = brw_upload_cs_image_surfaces, 1384 }; 1385 1386 static uint32_t 1387 get_image_format(struct brw_context *brw, mesa_format format, GLenum access) 1388 { 1389 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1390 enum isl_format hw_format = brw_isl_format_for_mesa_format(format); 1391 if (access == GL_WRITE_ONLY) { 1392 return hw_format; 1393 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) { 1394 /* Typed surface reads support a very limited subset of the shader 1395 * image formats. Translate it into the closest format the 1396 * hardware supports. 1397 */ 1398 return isl_lower_storage_image_format(devinfo, hw_format); 1399 } else { 1400 /* The hardware doesn't actually support a typed format that we can use 1401 * so we have to fall back to untyped read/write messages. 1402 */ 1403 return ISL_FORMAT_RAW; 1404 } 1405 } 1406 1407 static void 1408 update_default_image_param(struct brw_context *brw, 1409 struct gl_image_unit *u, 1410 unsigned surface_idx, 1411 struct brw_image_param *param) 1412 { 1413 memset(param, 0, sizeof(*param)); 1414 param->surface_idx = surface_idx; 1415 /* Set the swizzling shifts to all-ones to effectively disable swizzling -- 1416 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more 1417 * detailed explanation of these parameters. 1418 */ 1419 param->swizzling[0] = 0xff; 1420 param->swizzling[1] = 0xff; 1421 } 1422 1423 static void 1424 update_buffer_image_param(struct brw_context *brw, 1425 struct gl_image_unit *u, 1426 unsigned surface_idx, 1427 struct brw_image_param *param) 1428 { 1429 struct gl_buffer_object *obj = u->TexObj->BufferObject; 1430 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size); 1431 update_default_image_param(brw, u, surface_idx, param); 1432 1433 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat); 1434 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat); 1435 } 1436 1437 static unsigned 1438 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target, 1439 unsigned level) 1440 { 1441 if (target == GL_TEXTURE_CUBE_MAP) 1442 return 6; 1443 1444 return target == GL_TEXTURE_3D ? 1445 minify(mt->surf.logical_level0_px.depth, level) : 1446 mt->surf.logical_level0_px.array_len; 1447 } 1448 1449 static void 1450 update_image_surface(struct brw_context *brw, 1451 struct gl_image_unit *u, 1452 GLenum access, 1453 unsigned surface_idx, 1454 uint32_t *surf_offset, 1455 struct brw_image_param *param) 1456 { 1457 if (_mesa_is_image_unit_valid(&brw->ctx, u)) { 1458 struct gl_texture_object *obj = u->TexObj; 1459 const unsigned format = get_image_format(brw, u->_ActualFormat, access); 1460 1461 if (obj->Target == GL_TEXTURE_BUFFER) { 1462 struct intel_buffer_object *intel_obj = 1463 intel_buffer_object(obj->BufferObject); 1464 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 : 1465 _mesa_get_format_bytes(u->_ActualFormat)); 1466 1467 brw_emit_buffer_surface_state( 1468 brw, surf_offset, intel_obj->buffer, obj->BufferOffset, 1469 format, intel_obj->Base.Size, texel_size, 1470 access != GL_READ_ONLY ? RELOC_WRITE : 0); 1471 1472 update_buffer_image_param(brw, u, surface_idx, param); 1473 1474 } else { 1475 struct intel_texture_object *intel_obj = intel_texture_object(obj); 1476 struct intel_mipmap_tree *mt = intel_obj->mt; 1477 const unsigned num_layers = u->Layered ? 1478 get_image_num_layers(mt, obj->Target, u->Level) : 1; 1479 1480 struct isl_view view = { 1481 .format = format, 1482 .base_level = obj->MinLevel + u->Level, 1483 .levels = 1, 1484 .base_array_layer = obj->MinLayer + u->_Layer, 1485 .array_len = num_layers, 1486 .swizzle = ISL_SWIZZLE_IDENTITY, 1487 .usage = ISL_SURF_USAGE_STORAGE_BIT, 1488 }; 1489 1490 if (format == ISL_FORMAT_RAW) { 1491 brw_emit_buffer_surface_state( 1492 brw, surf_offset, mt->bo, mt->offset, 1493 format, mt->bo->size - mt->offset, 1 /* pitch */, 1494 access != GL_READ_ONLY ? RELOC_WRITE : 0); 1495 1496 } else { 1497 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; 1498 assert(!intel_miptree_has_color_unresolved(mt, 1499 view.base_level, 1, 1500 view.base_array_layer, 1501 view.array_len)); 1502 brw_emit_surface_state(brw, mt, mt->target, view, 1503 ISL_AUX_USAGE_NONE, 1504 surf_offset, surf_index, 1505 access == GL_READ_ONLY ? 0 : RELOC_WRITE); 1506 } 1507 1508 isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view); 1509 param->surface_idx = surface_idx; 1510 } 1511 1512 } else { 1513 emit_null_surface_state(brw, NULL, surf_offset); 1514 update_default_image_param(brw, u, surface_idx, param); 1515 } 1516 } 1517 1518 void 1519 brw_upload_image_surfaces(struct brw_context *brw, 1520 const struct gl_program *prog, 1521 struct brw_stage_state *stage_state, 1522 struct brw_stage_prog_data *prog_data) 1523 { 1524 assert(prog); 1525 struct gl_context *ctx = &brw->ctx; 1526 1527 if (prog->info.num_images) { 1528 for (unsigned i = 0; i < prog->info.num_images; i++) { 1529 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]]; 1530 const unsigned surf_idx = prog_data->binding_table.image_start + i; 1531 1532 update_image_surface(brw, u, prog->sh.ImageAccess[i], 1533 surf_idx, 1534 &stage_state->surf_offset[surf_idx], 1535 &stage_state->image_param[i]); 1536 } 1537 1538 brw->ctx.NewDriverState |= BRW_NEW_SURFACES; 1539 /* This may have changed the image metadata dependent on the context 1540 * image unit state and passed to the program as uniforms, make sure 1541 * that push and pull constants are reuploaded. 1542 */ 1543 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS; 1544 } 1545 } 1546 1547 static void 1548 brw_upload_wm_image_surfaces(struct brw_context *brw) 1549 { 1550 /* BRW_NEW_FRAGMENT_PROGRAM */ 1551 const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT]; 1552 1553 if (wm) { 1554 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ 1555 brw_upload_image_surfaces(brw, wm, &brw->wm.base, 1556 brw->wm.base.prog_data); 1557 } 1558 } 1559 1560 const struct brw_tracked_state brw_wm_image_surfaces = { 1561 .dirty = { 1562 .mesa = _NEW_TEXTURE, 1563 .brw = BRW_NEW_BATCH | 1564 BRW_NEW_AUX_STATE | 1565 BRW_NEW_FRAGMENT_PROGRAM | 1566 BRW_NEW_FS_PROG_DATA | 1567 BRW_NEW_IMAGE_UNITS 1568 }, 1569 .emit = brw_upload_wm_image_surfaces, 1570 }; 1571 1572 static void 1573 brw_upload_cs_work_groups_surface(struct brw_context *brw) 1574 { 1575 struct gl_context *ctx = &brw->ctx; 1576 /* _NEW_PROGRAM */ 1577 struct gl_program *prog = 1578 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; 1579 /* BRW_NEW_CS_PROG_DATA */ 1580 const struct brw_cs_prog_data *cs_prog_data = 1581 brw_cs_prog_data(brw->cs.base.prog_data); 1582 1583 if (prog && cs_prog_data->uses_num_work_groups) { 1584 const unsigned surf_idx = 1585 cs_prog_data->binding_table.work_groups_start; 1586 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx]; 1587 struct brw_bo *bo; 1588 uint32_t bo_offset; 1589 1590 if (brw->compute.num_work_groups_bo == NULL) { 1591 bo = NULL; 1592 intel_upload_data(brw, 1593 (void *)brw->compute.num_work_groups, 1594 3 * sizeof(GLuint), 1595 sizeof(GLuint), 1596 &bo, 1597 &bo_offset); 1598 } else { 1599 bo = brw->compute.num_work_groups_bo; 1600 bo_offset = brw->compute.num_work_groups_offset; 1601 } 1602 1603 brw_emit_buffer_surface_state(brw, surf_offset, 1604 bo, bo_offset, 1605 ISL_FORMAT_RAW, 1606 3 * sizeof(GLuint), 1, 1607 RELOC_WRITE); 1608 brw->ctx.NewDriverState |= BRW_NEW_SURFACES; 1609 } 1610 } 1611 1612 const struct brw_tracked_state brw_cs_work_groups_surface = { 1613 .dirty = { 1614 .brw = BRW_NEW_CS_PROG_DATA | 1615 BRW_NEW_CS_WORK_GROUPS 1616 }, 1617 .emit = brw_upload_cs_work_groups_surface, 1618 }; 1619