1 /* 2 * Copyright 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "main/context.h" 25 #include "main/teximage.h" 26 #include "main/blend.h" 27 #include "main/bufferobj.h" 28 #include "main/enums.h" 29 #include "main/fbobject.h" 30 #include "main/image.h" 31 #include "main/renderbuffer.h" 32 #include "main/glformats.h" 33 34 #include "brw_blorp.h" 35 #include "brw_context.h" 36 #include "brw_defines.h" 37 #include "brw_meta_util.h" 38 #include "brw_state.h" 39 #include "intel_buffer_objects.h" 40 #include "intel_fbo.h" 41 #include "common/gen_debug.h" 42 43 #define FILE_DEBUG_FLAG DEBUG_BLORP 44 45 static bool 46 brw_blorp_lookup_shader(struct blorp_context *blorp, 47 const void *key, uint32_t key_size, 48 uint32_t *kernel_out, void *prog_data_out) 49 { 50 struct brw_context *brw = blorp->driver_ctx; 51 return brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG, 52 key, key_size, kernel_out, prog_data_out); 53 } 54 55 static bool 56 brw_blorp_upload_shader(struct blorp_context *blorp, 57 const void *key, uint32_t key_size, 58 const void *kernel, uint32_t kernel_size, 59 const struct brw_stage_prog_data *prog_data, 60 uint32_t prog_data_size, 61 uint32_t *kernel_out, void *prog_data_out) 62 { 63 struct brw_context *brw = blorp->driver_ctx; 64 brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size, 65 kernel, kernel_size, prog_data, prog_data_size, 66 kernel_out, prog_data_out); 67 return true; 68 } 69 70 void 71 brw_blorp_init(struct brw_context *brw) 72 { 73 const struct gen_device_info *devinfo = &brw->screen->devinfo; 74 75 blorp_init(&brw->blorp, brw, &brw->isl_dev); 76 77 brw->blorp.compiler = brw->screen->compiler; 78 79 switch (devinfo->gen) { 80 case 4: 81 if (devinfo->is_g4x) { 82 brw->blorp.exec = gen45_blorp_exec; 83 } else { 84 brw->blorp.exec = gen4_blorp_exec; 85 } 86 break; 87 case 5: 88 brw->blorp.exec = gen5_blorp_exec; 89 break; 90 case 6: 91 brw->blorp.exec = gen6_blorp_exec; 92 break; 93 case 7: 94 if (devinfo->is_haswell) { 95 brw->blorp.exec = gen75_blorp_exec; 96 } else { 97 brw->blorp.exec = gen7_blorp_exec; 98 } 99 break; 100 case 8: 101 brw->blorp.exec = gen8_blorp_exec; 102 break; 103 case 9: 104 brw->blorp.exec = gen9_blorp_exec; 105 break; 106 case 10: 107 brw->blorp.exec = gen10_blorp_exec; 108 break; 109 default: 110 unreachable("Invalid gen"); 111 } 112 113 brw->blorp.lookup_shader = brw_blorp_lookup_shader; 114 brw->blorp.upload_shader = brw_blorp_upload_shader; 115 } 116 117 static void 118 blorp_surf_for_miptree(struct brw_context *brw, 119 struct blorp_surf *surf, 120 struct intel_mipmap_tree *mt, 121 enum isl_aux_usage aux_usage, 122 bool is_render_target, 123 unsigned *level, 124 unsigned start_layer, unsigned num_layers, 125 struct isl_surf tmp_surfs[1]) 126 { 127 const struct gen_device_info *devinfo = &brw->screen->devinfo; 128 129 if (mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY) { 130 const unsigned num_samples = mt->surf.samples; 131 for (unsigned i = 0; i < num_layers; i++) { 132 for (unsigned s = 0; s < num_samples; s++) { 133 const unsigned phys_layer = (start_layer + i) * num_samples + s; 134 intel_miptree_check_level_layer(mt, *level, phys_layer); 135 } 136 } 137 } else { 138 for (unsigned i = 0; i < num_layers; i++) 139 intel_miptree_check_level_layer(mt, *level, start_layer + i); 140 } 141 142 *surf = (struct blorp_surf) { 143 .surf = &mt->surf, 144 .addr = (struct blorp_address) { 145 .buffer = mt->bo, 146 .offset = mt->offset, 147 .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, 148 .mocs = brw_get_bo_mocs(devinfo, mt->bo), 149 }, 150 .aux_usage = aux_usage, 151 }; 152 153 struct isl_surf *aux_surf = NULL; 154 if (mt->mcs_buf) 155 aux_surf = &mt->mcs_buf->surf; 156 else if (mt->hiz_buf) 157 aux_surf = &mt->hiz_buf->surf; 158 159 if (mt->format == MESA_FORMAT_S_UINT8 && is_render_target && 160 devinfo->gen <= 7) 161 mt->r8stencil_needs_update = true; 162 163 if (surf->aux_usage == ISL_AUX_USAGE_HIZ && 164 !intel_miptree_level_has_hiz(mt, *level)) 165 surf->aux_usage = ISL_AUX_USAGE_NONE; 166 167 if (surf->aux_usage != ISL_AUX_USAGE_NONE) { 168 /* We only really need a clear color if we also have an auxiliary 169 * surface. Without one, it does nothing. 170 */ 171 surf->clear_color = mt->fast_clear_color; 172 173 surf->aux_surf = aux_surf; 174 surf->aux_addr = (struct blorp_address) { 175 .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, 176 .mocs = surf->addr.mocs, 177 }; 178 179 if (mt->mcs_buf) { 180 surf->aux_addr.buffer = mt->mcs_buf->bo; 181 surf->aux_addr.offset = mt->mcs_buf->offset; 182 } else { 183 assert(mt->hiz_buf); 184 assert(surf->aux_usage == ISL_AUX_USAGE_HIZ); 185 186 surf->aux_addr.buffer = mt->hiz_buf->bo; 187 surf->aux_addr.offset = mt->hiz_buf->offset; 188 } 189 } else { 190 surf->aux_addr = (struct blorp_address) { 191 .buffer = NULL, 192 }; 193 memset(&surf->clear_color, 0, sizeof(surf->clear_color)); 194 } 195 assert((surf->aux_usage == ISL_AUX_USAGE_NONE) == 196 (surf->aux_addr.buffer == NULL)); 197 198 /* ISL wants real levels, not offset ones. */ 199 *level -= mt->first_level; 200 } 201 202 static enum isl_format 203 brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format, 204 bool is_render_target) 205 { 206 switch (format) { 207 case MESA_FORMAT_NONE: 208 return ISL_FORMAT_UNSUPPORTED; 209 case MESA_FORMAT_S_UINT8: 210 return ISL_FORMAT_R8_UINT; 211 case MESA_FORMAT_Z24_UNORM_X8_UINT: 212 case MESA_FORMAT_Z24_UNORM_S8_UINT: 213 return ISL_FORMAT_R24_UNORM_X8_TYPELESS; 214 case MESA_FORMAT_Z_FLOAT32: 215 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: 216 return ISL_FORMAT_R32_FLOAT; 217 case MESA_FORMAT_Z_UNORM16: 218 return ISL_FORMAT_R16_UNORM; 219 default: { 220 if (is_render_target) { 221 assert(brw->mesa_format_supports_render[format]); 222 return brw->mesa_to_isl_render_format[format]; 223 } else { 224 return brw_isl_format_for_mesa_format(format); 225 } 226 break; 227 } 228 } 229 } 230 231 /** 232 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+ 233 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are 234 * 235 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE 236 * 0 1 2 3 4 5 237 * 4 5 6 7 0 1 238 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE 239 * 240 * which is simply adding 4 then modding by 8 (or anding with 7). 241 * 242 * We then may need to apply workarounds for textureGather hardware bugs. 243 */ 244 static enum isl_channel_select 245 swizzle_to_scs(GLenum swizzle) 246 { 247 return (enum isl_channel_select)((swizzle + 4) & 7); 248 } 249 250 /** 251 * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using 252 * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is 253 * the physical layer holding sample 0. So, for example, if 254 * src_mt->surf.samples == 4, then logical layer n corresponds to src_layer == 255 * 4*n. 256 */ 257 void 258 brw_blorp_blit_miptrees(struct brw_context *brw, 259 struct intel_mipmap_tree *src_mt, 260 unsigned src_level, unsigned src_layer, 261 mesa_format src_format, int src_swizzle, 262 struct intel_mipmap_tree *dst_mt, 263 unsigned dst_level, unsigned dst_layer, 264 mesa_format dst_format, 265 float src_x0, float src_y0, 266 float src_x1, float src_y1, 267 float dst_x0, float dst_y0, 268 float dst_x1, float dst_y1, 269 GLenum filter, bool mirror_x, bool mirror_y, 270 bool decode_srgb, bool encode_srgb) 271 { 272 const struct gen_device_info *devinfo = &brw->screen->devinfo; 273 274 DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f)" 275 "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n", 276 __func__, 277 src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt, 278 src_level, src_layer, src_x0, src_y0, src_x1, src_y1, 279 dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt, 280 dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1, 281 mirror_x, mirror_y); 282 283 if (!decode_srgb && _mesa_get_format_color_encoding(src_format) == GL_SRGB) 284 src_format = _mesa_get_srgb_format_linear(src_format); 285 286 if (!encode_srgb && _mesa_get_format_color_encoding(dst_format) == GL_SRGB) 287 dst_format = _mesa_get_srgb_format_linear(dst_format); 288 289 /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F 290 * texture, the above code configures the source format for L32_FLOAT or 291 * I32_FLOAT, and the destination format for R32_FLOAT. On Sandy Bridge, 292 * the SAMPLE message appears to handle multisampled L32_FLOAT and 293 * I32_FLOAT textures incorrectly, resulting in blocky artifacts. So work 294 * around the problem by using a source format of R32_FLOAT. This 295 * shouldn't affect rendering correctness, since the destination format is 296 * R32_FLOAT, so only the contents of the red channel matters. 297 */ 298 if (devinfo->gen == 6 && 299 src_mt->surf.samples > 1 && dst_mt->surf.samples <= 1 && 300 src_mt->format == dst_mt->format && 301 (dst_format == MESA_FORMAT_L_FLOAT32 || 302 dst_format == MESA_FORMAT_I_FLOAT32)) { 303 src_format = dst_format = MESA_FORMAT_R_FLOAT32; 304 } 305 306 enum isl_format src_isl_format = 307 brw_blorp_to_isl_format(brw, src_format, false); 308 enum isl_aux_usage src_aux_usage = 309 intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format); 310 /* We do format workarounds for some depth formats so we can't reliably 311 * sample with HiZ. One of these days, we should fix that. 312 */ 313 if (src_aux_usage == ISL_AUX_USAGE_HIZ) 314 src_aux_usage = ISL_AUX_USAGE_NONE; 315 const bool src_clear_supported = 316 src_aux_usage != ISL_AUX_USAGE_NONE && src_mt->format == src_format; 317 intel_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1, 318 src_aux_usage, src_clear_supported); 319 320 enum isl_format dst_isl_format = 321 brw_blorp_to_isl_format(brw, dst_format, true); 322 enum isl_aux_usage dst_aux_usage = 323 intel_miptree_render_aux_usage(brw, dst_mt, dst_isl_format, 324 false, false); 325 const bool dst_clear_supported = dst_aux_usage != ISL_AUX_USAGE_NONE; 326 intel_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1, 327 dst_aux_usage, dst_clear_supported); 328 329 struct isl_surf tmp_surfs[2]; 330 struct blorp_surf src_surf, dst_surf; 331 blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false, 332 &src_level, src_layer, 1, &tmp_surfs[0]); 333 blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true, 334 &dst_level, dst_layer, 1, &tmp_surfs[1]); 335 336 struct isl_swizzle src_isl_swizzle = { 337 .r = swizzle_to_scs(GET_SWZ(src_swizzle, 0)), 338 .g = swizzle_to_scs(GET_SWZ(src_swizzle, 1)), 339 .b = swizzle_to_scs(GET_SWZ(src_swizzle, 2)), 340 .a = swizzle_to_scs(GET_SWZ(src_swizzle, 3)), 341 }; 342 343 struct blorp_batch batch; 344 blorp_batch_init(&brw->blorp, &batch, brw, 0); 345 blorp_blit(&batch, &src_surf, src_level, src_layer, 346 src_isl_format, src_isl_swizzle, 347 &dst_surf, dst_level, dst_layer, 348 dst_isl_format, ISL_SWIZZLE_IDENTITY, 349 src_x0, src_y0, src_x1, src_y1, 350 dst_x0, dst_y0, dst_x1, dst_y1, 351 filter, mirror_x, mirror_y); 352 blorp_batch_finish(&batch); 353 354 intel_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1, 355 dst_aux_usage); 356 } 357 358 void 359 brw_blorp_copy_miptrees(struct brw_context *brw, 360 struct intel_mipmap_tree *src_mt, 361 unsigned src_level, unsigned src_layer, 362 struct intel_mipmap_tree *dst_mt, 363 unsigned dst_level, unsigned dst_layer, 364 unsigned src_x, unsigned src_y, 365 unsigned dst_x, unsigned dst_y, 366 unsigned src_width, unsigned src_height) 367 { 368 const struct gen_device_info *devinfo = &brw->screen->devinfo; 369 370 DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d" 371 "to %dx %s mt %p %d %d (%d,%d)\n", 372 __func__, 373 src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt, 374 src_level, src_layer, src_x, src_y, src_width, src_height, 375 dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt, 376 dst_level, dst_layer, dst_x, dst_y); 377 378 enum isl_aux_usage src_aux_usage, dst_aux_usage; 379 bool src_clear_supported, dst_clear_supported; 380 381 switch (src_mt->aux_usage) { 382 case ISL_AUX_USAGE_MCS: 383 case ISL_AUX_USAGE_CCS_E: 384 src_aux_usage = src_mt->aux_usage; 385 /* Prior to gen9, fast-clear only supported 0/1 clear colors. Since 386 * we're going to re-interpret the format as an integer format possibly 387 * with a different number of components, we can't handle clear colors 388 * until gen9. 389 */ 390 src_clear_supported = devinfo->gen >= 9; 391 break; 392 default: 393 src_aux_usage = ISL_AUX_USAGE_NONE; 394 src_clear_supported = false; 395 break; 396 } 397 398 switch (dst_mt->aux_usage) { 399 case ISL_AUX_USAGE_MCS: 400 case ISL_AUX_USAGE_CCS_E: 401 dst_aux_usage = dst_mt->aux_usage; 402 /* Prior to gen9, fast-clear only supported 0/1 clear colors. Since 403 * we're going to re-interpret the format as an integer format possibly 404 * with a different number of components, we can't handle clear colors 405 * until gen9. 406 */ 407 dst_clear_supported = devinfo->gen >= 9; 408 break; 409 default: 410 dst_aux_usage = ISL_AUX_USAGE_NONE; 411 dst_clear_supported = false; 412 break; 413 } 414 415 intel_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1, 416 src_aux_usage, src_clear_supported); 417 intel_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1, 418 dst_aux_usage, dst_clear_supported); 419 420 struct isl_surf tmp_surfs[2]; 421 struct blorp_surf src_surf, dst_surf; 422 blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false, 423 &src_level, src_layer, 1, &tmp_surfs[0]); 424 blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true, 425 &dst_level, dst_layer, 1, &tmp_surfs[1]); 426 427 /* The hardware seems to have issues with having a two different format 428 * views of the same texture in the sampler cache at the same time. It's 429 * unclear exactly what the issue is but it hurts glCopyImageSubData 430 * particularly badly because it does a lot of format reinterprets. We 431 * badly need better understanding of the issue and a better fix but this 432 * works for now and fixes CTS tests. 433 * 434 * TODO: Remove this hack! 435 */ 436 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | 437 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 438 439 struct blorp_batch batch; 440 blorp_batch_init(&brw->blorp, &batch, brw, 0); 441 blorp_copy(&batch, &src_surf, src_level, src_layer, 442 &dst_surf, dst_level, dst_layer, 443 src_x, src_y, dst_x, dst_y, src_width, src_height); 444 blorp_batch_finish(&batch); 445 446 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | 447 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 448 449 intel_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1, 450 dst_aux_usage); 451 } 452 453 void 454 brw_blorp_copy_buffers(struct brw_context *brw, 455 struct brw_bo *src_bo, 456 unsigned src_offset, 457 struct brw_bo *dst_bo, 458 unsigned dst_offset, 459 unsigned size) 460 { 461 DBG("%s %d bytes from %p[%d] to %p[%d]", 462 __func__, size, src_bo, src_offset, dst_bo, dst_offset); 463 464 struct blorp_batch batch; 465 struct blorp_address src = { .buffer = src_bo, .offset = src_offset }; 466 struct blorp_address dst = { .buffer = dst_bo, .offset = dst_offset }; 467 468 blorp_batch_init(&brw->blorp, &batch, brw, 0); 469 blorp_buffer_copy(&batch, src, dst, size); 470 blorp_batch_finish(&batch); 471 } 472 473 474 static struct intel_mipmap_tree * 475 find_miptree(GLbitfield buffer_bit, struct intel_renderbuffer *irb) 476 { 477 struct intel_mipmap_tree *mt = irb->mt; 478 if (buffer_bit == GL_STENCIL_BUFFER_BIT && mt->stencil_mt) 479 mt = mt->stencil_mt; 480 return mt; 481 } 482 483 static int 484 blorp_get_texture_swizzle(const struct intel_renderbuffer *irb) 485 { 486 return irb->Base.Base._BaseFormat == GL_RGB ? 487 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE) : 488 SWIZZLE_XYZW; 489 } 490 491 static void 492 do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit, 493 struct intel_renderbuffer *src_irb, mesa_format src_format, 494 struct intel_renderbuffer *dst_irb, mesa_format dst_format, 495 GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1, 496 GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1, 497 GLenum filter, bool mirror_x, bool mirror_y) 498 { 499 const struct gl_context *ctx = &brw->ctx; 500 501 /* Find source/dst miptrees */ 502 struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb); 503 struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb); 504 505 const bool do_srgb = ctx->Color.sRGBEnabled; 506 507 /* Do the blit */ 508 brw_blorp_blit_miptrees(brw, 509 src_mt, src_irb->mt_level, src_irb->mt_layer, 510 src_format, blorp_get_texture_swizzle(src_irb), 511 dst_mt, dst_irb->mt_level, dst_irb->mt_layer, 512 dst_format, 513 srcX0, srcY0, srcX1, srcY1, 514 dstX0, dstY0, dstX1, dstY1, 515 filter, mirror_x, mirror_y, 516 do_srgb, do_srgb); 517 518 dst_irb->need_downsample = true; 519 } 520 521 static bool 522 try_blorp_blit(struct brw_context *brw, 523 const struct gl_framebuffer *read_fb, 524 const struct gl_framebuffer *draw_fb, 525 GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1, 526 GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1, 527 GLenum filter, GLbitfield buffer_bit) 528 { 529 const struct gen_device_info *devinfo = &brw->screen->devinfo; 530 struct gl_context *ctx = &brw->ctx; 531 532 /* Sync up the state of window system buffers. We need to do this before 533 * we go looking for the buffers. 534 */ 535 intel_prepare_render(brw); 536 537 bool mirror_x, mirror_y; 538 if (brw_meta_mirror_clip_and_scissor(ctx, read_fb, draw_fb, 539 &srcX0, &srcY0, &srcX1, &srcY1, 540 &dstX0, &dstY0, &dstX1, &dstY1, 541 &mirror_x, &mirror_y)) 542 return true; 543 544 /* Find buffers */ 545 struct intel_renderbuffer *src_irb; 546 struct intel_renderbuffer *dst_irb; 547 struct intel_mipmap_tree *src_mt; 548 struct intel_mipmap_tree *dst_mt; 549 switch (buffer_bit) { 550 case GL_COLOR_BUFFER_BIT: 551 src_irb = intel_renderbuffer(read_fb->_ColorReadBuffer); 552 for (unsigned i = 0; i < draw_fb->_NumColorDrawBuffers; ++i) { 553 dst_irb = intel_renderbuffer(draw_fb->_ColorDrawBuffers[i]); 554 if (dst_irb) 555 do_blorp_blit(brw, buffer_bit, 556 src_irb, src_irb->Base.Base.Format, 557 dst_irb, dst_irb->Base.Base.Format, 558 srcX0, srcY0, srcX1, srcY1, 559 dstX0, dstY0, dstX1, dstY1, 560 filter, mirror_x, mirror_y); 561 } 562 break; 563 case GL_DEPTH_BUFFER_BIT: 564 src_irb = 565 intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer); 566 dst_irb = 567 intel_renderbuffer(draw_fb->Attachment[BUFFER_DEPTH].Renderbuffer); 568 src_mt = find_miptree(buffer_bit, src_irb); 569 dst_mt = find_miptree(buffer_bit, dst_irb); 570 571 /* We can't handle format conversions between Z24 and other formats 572 * since we have to lie about the surface format. See the comments in 573 * brw_blorp_surface_info::set(). 574 */ 575 if ((src_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT) != 576 (dst_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT)) 577 return false; 578 579 /* We also can't handle any combined depth-stencil formats because we 580 * have to reinterpret as a color format. 581 */ 582 if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL || 583 _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL) 584 return false; 585 586 do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE, 587 dst_irb, MESA_FORMAT_NONE, srcX0, srcY0, 588 srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, 589 filter, mirror_x, mirror_y); 590 break; 591 case GL_STENCIL_BUFFER_BIT: 592 /* Blorp doesn't support combined depth stencil which is all we have 593 * prior to gen6. 594 */ 595 if (devinfo->gen < 6) 596 return false; 597 598 src_irb = 599 intel_renderbuffer(read_fb->Attachment[BUFFER_STENCIL].Renderbuffer); 600 dst_irb = 601 intel_renderbuffer(draw_fb->Attachment[BUFFER_STENCIL].Renderbuffer); 602 do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE, 603 dst_irb, MESA_FORMAT_NONE, srcX0, srcY0, 604 srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, 605 filter, mirror_x, mirror_y); 606 break; 607 default: 608 unreachable("not reached"); 609 } 610 611 return true; 612 } 613 614 static void 615 apply_y_flip(int *y0, int *y1, int height) 616 { 617 int tmp = height - *y0; 618 *y0 = height - *y1; 619 *y1 = tmp; 620 } 621 622 bool 623 brw_blorp_copytexsubimage(struct brw_context *brw, 624 struct gl_renderbuffer *src_rb, 625 struct gl_texture_image *dst_image, 626 int slice, 627 int srcX0, int srcY0, 628 int dstX0, int dstY0, 629 int width, int height) 630 { 631 struct gl_context *ctx = &brw->ctx; 632 struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb); 633 struct intel_texture_image *intel_image = intel_texture_image(dst_image); 634 635 /* No pixel transfer operations (zoom, bias, mapping), just a blit */ 636 if (brw->ctx._ImageTransferState) 637 return false; 638 639 /* Sync up the state of window system buffers. We need to do this before 640 * we go looking at the src renderbuffer's miptree. 641 */ 642 intel_prepare_render(brw); 643 644 struct intel_mipmap_tree *src_mt = src_irb->mt; 645 struct intel_mipmap_tree *dst_mt = intel_image->mt; 646 647 /* There is support for only up to eight samples. */ 648 if (src_mt->surf.samples > 8 || dst_mt->surf.samples > 8) 649 return false; 650 651 if (_mesa_get_format_base_format(src_rb->Format) != 652 _mesa_get_format_base_format(dst_image->TexFormat)) { 653 return false; 654 } 655 656 /* We can't handle format conversions between Z24 and other formats since 657 * we have to lie about the surface format. See the comments in 658 * brw_blorp_surface_info::set(). 659 */ 660 if ((src_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT) != 661 (dst_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT)) { 662 return false; 663 } 664 665 /* We also can't handle any combined depth-stencil formats because we 666 * have to reinterpret as a color format. 667 */ 668 if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL || 669 _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL) 670 return false; 671 672 if (!brw->mesa_format_supports_render[dst_image->TexFormat]) 673 return false; 674 675 /* Source clipping shouldn't be necessary, since copytexsubimage (in 676 * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which 677 * takes care of it. 678 * 679 * Destination clipping shouldn't be necessary since the restrictions on 680 * glCopyTexSubImage prevent the user from specifying a destination rectangle 681 * that falls outside the bounds of the destination texture. 682 * See error_check_subtexture_dimensions(). 683 */ 684 685 int srcY1 = srcY0 + height; 686 int srcX1 = srcX0 + width; 687 int dstX1 = dstX0 + width; 688 int dstY1 = dstY0 + height; 689 690 /* Account for the fact that in the system framebuffer, the origin is at 691 * the lower left. 692 */ 693 bool mirror_y = _mesa_is_winsys_fbo(ctx->ReadBuffer); 694 if (mirror_y) 695 apply_y_flip(&srcY0, &srcY1, src_rb->Height); 696 697 /* Account for face selection and texture view MinLayer */ 698 int dst_slice = slice + dst_image->TexObject->MinLayer + dst_image->Face; 699 int dst_level = dst_image->Level + dst_image->TexObject->MinLevel; 700 701 brw_blorp_blit_miptrees(brw, 702 src_mt, src_irb->mt_level, src_irb->mt_layer, 703 src_rb->Format, blorp_get_texture_swizzle(src_irb), 704 dst_mt, dst_level, dst_slice, 705 dst_image->TexFormat, 706 srcX0, srcY0, srcX1, srcY1, 707 dstX0, dstY0, dstX1, dstY1, 708 GL_NEAREST, false, mirror_y, 709 false, false); 710 711 /* If we're copying to a packed depth stencil texture and the source 712 * framebuffer has separate stencil, we need to also copy the stencil data 713 * over. 714 */ 715 src_rb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; 716 if (_mesa_get_format_bits(dst_image->TexFormat, GL_STENCIL_BITS) > 0 && 717 src_rb != NULL) { 718 src_irb = intel_renderbuffer(src_rb); 719 src_mt = src_irb->mt; 720 721 if (src_mt->stencil_mt) 722 src_mt = src_mt->stencil_mt; 723 if (dst_mt->stencil_mt) 724 dst_mt = dst_mt->stencil_mt; 725 726 if (src_mt != dst_mt) { 727 brw_blorp_blit_miptrees(brw, 728 src_mt, src_irb->mt_level, src_irb->mt_layer, 729 src_mt->format, 730 blorp_get_texture_swizzle(src_irb), 731 dst_mt, dst_level, dst_slice, 732 dst_mt->format, 733 srcX0, srcY0, srcX1, srcY1, 734 dstX0, dstY0, dstX1, dstY1, 735 GL_NEAREST, false, mirror_y, 736 false, false); 737 } 738 } 739 740 return true; 741 } 742 743 744 GLbitfield 745 brw_blorp_framebuffer(struct brw_context *brw, 746 struct gl_framebuffer *readFb, 747 struct gl_framebuffer *drawFb, 748 GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, 749 GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, 750 GLbitfield mask, GLenum filter) 751 { 752 static GLbitfield buffer_bits[] = { 753 GL_COLOR_BUFFER_BIT, 754 GL_DEPTH_BUFFER_BIT, 755 GL_STENCIL_BUFFER_BIT, 756 }; 757 758 for (unsigned int i = 0; i < ARRAY_SIZE(buffer_bits); ++i) { 759 if ((mask & buffer_bits[i]) && 760 try_blorp_blit(brw, readFb, drawFb, 761 srcX0, srcY0, srcX1, srcY1, 762 dstX0, dstY0, dstX1, dstY1, 763 filter, buffer_bits[i])) { 764 mask &= ~buffer_bits[i]; 765 } 766 } 767 768 return mask; 769 } 770 771 static struct brw_bo * 772 blorp_get_client_bo(struct brw_context *brw, 773 unsigned w, unsigned h, unsigned d, 774 GLenum target, GLenum format, GLenum type, 775 const void *pixels, 776 const struct gl_pixelstore_attrib *packing, 777 uint32_t *offset_out, uint32_t *row_stride_out, 778 uint32_t *image_stride_out, bool read_only) 779 { 780 /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */ 781 const GLuint dims = _mesa_get_texture_dimensions(target); 782 const uint32_t first_pixel = _mesa_image_offset(dims, packing, w, h, 783 format, type, 0, 0, 0); 784 const uint32_t last_pixel = _mesa_image_offset(dims, packing, w, h, 785 format, type, 786 d - 1, h - 1, w); 787 const uint32_t stride = _mesa_image_row_stride(packing, w, format, type); 788 const uint32_t cpp = _mesa_bytes_per_pixel(format, type); 789 const uint32_t size = last_pixel - first_pixel; 790 791 *row_stride_out = stride; 792 *image_stride_out = _mesa_image_image_stride(packing, w, h, format, type); 793 794 if (_mesa_is_bufferobj(packing->BufferObj)) { 795 const uint32_t offset = first_pixel + (intptr_t)pixels; 796 if (!read_only && ((offset % cpp) || (stride % cpp))) { 797 perf_debug("Bad PBO alignment; fallback to CPU mapping\n"); 798 return NULL; 799 } 800 801 /* This is a user-provided PBO. We just need to get the BO out */ 802 struct intel_buffer_object *intel_pbo = 803 intel_buffer_object(packing->BufferObj); 804 struct brw_bo *bo = 805 intel_bufferobj_buffer(brw, intel_pbo, offset, size, !read_only); 806 807 /* We take a reference to the BO so that the caller can just always 808 * unref without having to worry about whether it's a user PBO or one 809 * we created. 810 */ 811 brw_bo_reference(bo); 812 813 *offset_out = offset; 814 return bo; 815 } else { 816 /* Someone should have already checked that there is data to upload. */ 817 assert(pixels); 818 819 /* Creating a temp buffer currently only works for upload */ 820 assert(read_only); 821 822 /* This is not a user-provided PBO. Instead, pixels is a pointer to CPU 823 * data which we need to copy into a BO. 824 */ 825 struct brw_bo *bo = 826 brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size, 64); 827 if (bo == NULL) { 828 perf_debug("intel_texsubimage: temp bo creation failed: size = %u\n", 829 size); 830 return NULL; 831 } 832 833 if (brw_bo_subdata(bo, 0, size, pixels + first_pixel)) { 834 perf_debug("intel_texsubimage: temp bo upload failed\n"); 835 brw_bo_unreference(bo); 836 return NULL; 837 } 838 839 *offset_out = 0; 840 return bo; 841 } 842 } 843 844 /* Consider all the restrictions and determine the format of the source. */ 845 static mesa_format 846 blorp_get_client_format(struct brw_context *brw, 847 GLenum format, GLenum type, 848 const struct gl_pixelstore_attrib *packing) 849 { 850 if (brw->ctx._ImageTransferState) 851 return MESA_FORMAT_NONE; 852 853 if (packing->SwapBytes || packing->LsbFirst || packing->Invert) { 854 perf_debug("intel_texsubimage_blorp: unsupported gl_pixelstore_attrib\n"); 855 return MESA_FORMAT_NONE; 856 } 857 858 if (format != GL_RED && 859 format != GL_RG && 860 format != GL_RGB && 861 format != GL_BGR && 862 format != GL_RGBA && 863 format != GL_BGRA && 864 format != GL_ALPHA && 865 format != GL_RED_INTEGER && 866 format != GL_RG_INTEGER && 867 format != GL_RGB_INTEGER && 868 format != GL_BGR_INTEGER && 869 format != GL_RGBA_INTEGER && 870 format != GL_BGRA_INTEGER) { 871 perf_debug("intel_texsubimage_blorp: %s not supported", 872 _mesa_enum_to_string(format)); 873 return MESA_FORMAT_NONE; 874 } 875 876 return _mesa_tex_format_from_format_and_type(&brw->ctx, format, type); 877 } 878 879 static bool 880 need_signed_unsigned_int_conversion(mesa_format src_format, 881 mesa_format dst_format) 882 { 883 const GLenum src_type = _mesa_get_format_datatype(src_format); 884 const GLenum dst_type = _mesa_get_format_datatype(dst_format); 885 return (src_type == GL_INT && dst_type == GL_UNSIGNED_INT) || 886 (src_type == GL_UNSIGNED_INT && dst_type == GL_INT); 887 } 888 889 bool 890 brw_blorp_upload_miptree(struct brw_context *brw, 891 struct intel_mipmap_tree *dst_mt, 892 mesa_format dst_format, 893 uint32_t level, uint32_t x, uint32_t y, uint32_t z, 894 uint32_t width, uint32_t height, uint32_t depth, 895 GLenum target, GLenum format, GLenum type, 896 const void *pixels, 897 const struct gl_pixelstore_attrib *packing) 898 { 899 const mesa_format src_format = 900 blorp_get_client_format(brw, format, type, packing); 901 if (src_format == MESA_FORMAT_NONE) 902 return false; 903 904 if (!brw->mesa_format_supports_render[dst_format]) { 905 perf_debug("intel_texsubimage: can't use %s as render target\n", 906 _mesa_get_format_name(dst_format)); 907 return false; 908 } 909 910 /* This function relies on blorp_blit to upload the pixel data to the 911 * miptree. But, blorp_blit doesn't support signed to unsigned or 912 * unsigned to signed integer conversions. 913 */ 914 if (need_signed_unsigned_int_conversion(src_format, dst_format)) 915 return false; 916 917 uint32_t src_offset, src_row_stride, src_image_stride; 918 struct brw_bo *src_bo = 919 blorp_get_client_bo(brw, width, height, depth, 920 target, format, type, pixels, packing, 921 &src_offset, &src_row_stride, 922 &src_image_stride, true); 923 if (src_bo == NULL) 924 return false; 925 926 /* Now that source is offset to correct starting point, adjust the 927 * given dimensions to treat 1D arrays as 2D. 928 */ 929 if (target == GL_TEXTURE_1D_ARRAY) { 930 assert(depth == 1); 931 assert(z == 0); 932 depth = height; 933 height = 1; 934 z = y; 935 y = 0; 936 src_image_stride = src_row_stride; 937 } 938 939 intel_miptree_check_level_layer(dst_mt, level, z + depth - 1); 940 941 bool result = false; 942 943 /* Blit slice-by-slice creating a single-slice miptree for each layer. Even 944 * in case of linear buffers hardware wants image arrays to be aligned by 945 * four rows. This way hardware only gets one image at a time and any 946 * source alignment will do. 947 */ 948 for (unsigned i = 0; i < depth; ++i) { 949 struct intel_mipmap_tree *src_mt = intel_miptree_create_for_bo( 950 brw, src_bo, src_format, 951 src_offset + i * src_image_stride, 952 width, height, 1, 953 src_row_stride, 954 ISL_TILING_LINEAR, 0); 955 956 if (!src_mt) { 957 perf_debug("intel_texsubimage: miptree creation for src failed\n"); 958 goto err; 959 } 960 961 /* In case exact match is needed, copy using equivalent UINT formats 962 * preventing hardware from changing presentation for SNORM -1. 963 */ 964 if (src_mt->format == dst_format) { 965 brw_blorp_copy_miptrees(brw, src_mt, 0, 0, 966 dst_mt, level, z + i, 967 0, 0, x, y, width, height); 968 } else { 969 brw_blorp_blit_miptrees(brw, src_mt, 0, 0, 970 src_format, SWIZZLE_XYZW, 971 dst_mt, level, z + i, 972 dst_format, 973 0, 0, width, height, 974 x, y, x + width, y + height, 975 GL_NEAREST, false, false, false, false); 976 } 977 978 intel_miptree_release(&src_mt); 979 } 980 981 result = true; 982 983 err: 984 brw_bo_unreference(src_bo); 985 986 return result; 987 } 988 989 bool 990 brw_blorp_download_miptree(struct brw_context *brw, 991 struct intel_mipmap_tree *src_mt, 992 mesa_format src_format, uint32_t src_swizzle, 993 uint32_t level, uint32_t x, uint32_t y, uint32_t z, 994 uint32_t width, uint32_t height, uint32_t depth, 995 GLenum target, GLenum format, GLenum type, 996 bool y_flip, const void *pixels, 997 const struct gl_pixelstore_attrib *packing) 998 { 999 const mesa_format dst_format = 1000 blorp_get_client_format(brw, format, type, packing); 1001 if (dst_format == MESA_FORMAT_NONE) 1002 return false; 1003 1004 if (!brw->mesa_format_supports_render[dst_format]) { 1005 perf_debug("intel_texsubimage: can't use %s as render target\n", 1006 _mesa_get_format_name(dst_format)); 1007 return false; 1008 } 1009 1010 /* This function relies on blorp_blit to download the pixel data from the 1011 * miptree. But, blorp_blit doesn't support signed to unsigned or unsigned 1012 * to signed integer conversions. 1013 */ 1014 if (need_signed_unsigned_int_conversion(src_format, dst_format)) 1015 return false; 1016 1017 /* We can't fetch from LUMINANCE or intensity as that would require a 1018 * non-trivial swizzle. 1019 */ 1020 switch (_mesa_get_format_base_format(src_format)) { 1021 case GL_LUMINANCE: 1022 case GL_LUMINANCE_ALPHA: 1023 case GL_INTENSITY: 1024 return false; 1025 default: 1026 break; 1027 } 1028 1029 /* This pass only works for PBOs */ 1030 assert(_mesa_is_bufferobj(packing->BufferObj)); 1031 1032 uint32_t dst_offset, dst_row_stride, dst_image_stride; 1033 struct brw_bo *dst_bo = 1034 blorp_get_client_bo(brw, width, height, depth, 1035 target, format, type, pixels, packing, 1036 &dst_offset, &dst_row_stride, 1037 &dst_image_stride, false); 1038 if (dst_bo == NULL) 1039 return false; 1040 1041 /* Now that source is offset to correct starting point, adjust the 1042 * given dimensions to treat 1D arrays as 2D. 1043 */ 1044 if (target == GL_TEXTURE_1D_ARRAY) { 1045 assert(depth == 1); 1046 assert(z == 0); 1047 depth = height; 1048 height = 1; 1049 z = y; 1050 y = 0; 1051 dst_image_stride = dst_row_stride; 1052 } 1053 1054 intel_miptree_check_level_layer(src_mt, level, z + depth - 1); 1055 1056 int y0 = y; 1057 int y1 = y + height; 1058 if (y_flip) { 1059 apply_y_flip(&y0, &y1, minify(src_mt->surf.phys_level0_sa.height, 1060 level - src_mt->first_level)); 1061 } 1062 1063 bool result = false; 1064 1065 /* Blit slice-by-slice creating a single-slice miptree for each layer. Even 1066 * in case of linear buffers hardware wants image arrays to be aligned by 1067 * four rows. This way hardware only gets one image at a time and any 1068 * source alignment will do. 1069 */ 1070 for (unsigned i = 0; i < depth; ++i) { 1071 struct intel_mipmap_tree *dst_mt = intel_miptree_create_for_bo( 1072 brw, dst_bo, dst_format, 1073 dst_offset + i * dst_image_stride, 1074 width, height, 1, 1075 dst_row_stride, 1076 ISL_TILING_LINEAR, 0); 1077 1078 if (!dst_mt) { 1079 perf_debug("intel_texsubimage: miptree creation for src failed\n"); 1080 goto err; 1081 } 1082 1083 /* In case exact match is needed, copy using equivalent UINT formats 1084 * preventing hardware from changing presentation for SNORM -1. 1085 */ 1086 if (dst_mt->format == src_format && !y_flip && 1087 src_swizzle == SWIZZLE_XYZW) { 1088 brw_blorp_copy_miptrees(brw, src_mt, level, z + i, 1089 dst_mt, 0, 0, 1090 x, y, 0, 0, width, height); 1091 } else { 1092 brw_blorp_blit_miptrees(brw, src_mt, level, z + i, 1093 src_format, src_swizzle, 1094 dst_mt, 0, 0, dst_format, 1095 x, y0, x + width, y1, 1096 0, 0, width, height, 1097 GL_NEAREST, false, y_flip, false, false); 1098 } 1099 1100 intel_miptree_release(&dst_mt); 1101 } 1102 1103 result = true; 1104 1105 /* As we implement PBO transfers by binding the user-provided BO as a 1106 * fake framebuffer and rendering to it. This breaks the invariant of the 1107 * GL that nothing is able to render to a BO, causing nondeterministic 1108 * corruption issues because the render cache is not coherent with a 1109 * number of other caches that the BO could potentially be bound to 1110 * afterwards. 1111 * 1112 * This could be solved in the same way that we guarantee texture 1113 * coherency after a texture is attached to a framebuffer and 1114 * rendered to, but that would involve checking *all* BOs bound to 1115 * the pipeline for the case we need to emit a cache flush due to 1116 * previous rendering to any of them -- Including vertex, index, 1117 * uniform, atomic counter, shader image, transform feedback, 1118 * indirect draw buffers, etc. 1119 * 1120 * That would increase the per-draw call overhead even though it's 1121 * very unlikely that any of the BOs bound to the pipeline has been 1122 * rendered to via a PBO at any point, so it seems better to just 1123 * flush here unconditionally. 1124 */ 1125 brw_emit_mi_flush(brw); 1126 1127 err: 1128 brw_bo_unreference(dst_bo); 1129 1130 return result; 1131 } 1132 1133 static bool 1134 set_write_disables(const struct intel_renderbuffer *irb, 1135 const GLubyte *color_mask, bool *color_write_disable) 1136 { 1137 /* Format information in the renderbuffer represents the requirements 1138 * given by the client. There are cases where the backing miptree uses, 1139 * for example, RGBA to represent RGBX. Since the client is only expecting 1140 * RGB we can treat alpha as not used and write whatever we like into it. 1141 */ 1142 const GLenum base_format = irb->Base.Base._BaseFormat; 1143 const int components = _mesa_base_format_component_count(base_format); 1144 bool disables = false; 1145 1146 assert(components > 0); 1147 1148 for (int i = 0; i < components; i++) { 1149 color_write_disable[i] = !color_mask[i]; 1150 disables = disables || !color_mask[i]; 1151 } 1152 1153 return disables; 1154 } 1155 1156 static void 1157 do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb, 1158 struct gl_renderbuffer *rb, unsigned buf, 1159 bool partial_clear, bool encode_srgb) 1160 { 1161 struct gl_context *ctx = &brw->ctx; 1162 struct intel_renderbuffer *irb = intel_renderbuffer(rb); 1163 uint32_t x0, x1, y0, y1; 1164 1165 mesa_format format = irb->Base.Base.Format; 1166 if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB) 1167 format = _mesa_get_srgb_format_linear(format); 1168 enum isl_format isl_format = brw->mesa_to_isl_render_format[format]; 1169 1170 x0 = fb->_Xmin; 1171 x1 = fb->_Xmax; 1172 if (rb->Name != 0) { 1173 y0 = fb->_Ymin; 1174 y1 = fb->_Ymax; 1175 } else { 1176 y0 = rb->Height - fb->_Ymax; 1177 y1 = rb->Height - fb->_Ymin; 1178 } 1179 1180 /* If the clear region is empty, just return. */ 1181 if (x0 == x1 || y0 == y1) 1182 return; 1183 1184 bool can_fast_clear = !partial_clear; 1185 1186 bool color_write_disable[4] = { false, false, false, false }; 1187 if (set_write_disables(irb, ctx->Color.ColorMask[buf], color_write_disable)) 1188 can_fast_clear = false; 1189 1190 /* We store clear colors as floats or uints as needed. If there are 1191 * texture views in play, the formats will not properly be respected 1192 * during resolves because the resolve operations only know about the 1193 * miptree and not the renderbuffer. 1194 */ 1195 if (irb->Base.Base.Format != irb->mt->format) 1196 can_fast_clear = false; 1197 1198 if (!irb->mt->supports_fast_clear || 1199 !brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor)) 1200 can_fast_clear = false; 1201 1202 /* Surface state can only record one fast clear color value. Therefore 1203 * unless different levels/layers agree on the color it can be used to 1204 * represent only single level/layer. Here it will be reserved for the 1205 * first slice (level 0, layer 0). 1206 */ 1207 if (irb->layer_count > 1 || irb->mt_level || irb->mt_layer) 1208 can_fast_clear = false; 1209 1210 unsigned level = irb->mt_level; 1211 const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1; 1212 1213 /* If the MCS buffer hasn't been allocated yet, we need to allocate it now. 1214 */ 1215 if (can_fast_clear && !irb->mt->mcs_buf) { 1216 assert(irb->mt->aux_usage == ISL_AUX_USAGE_CCS_D); 1217 if (!intel_miptree_alloc_ccs(brw, irb->mt)) { 1218 /* There are a few reasons in addition to out-of-memory, that can 1219 * cause intel_miptree_alloc_non_msrt_mcs to fail. Try to recover by 1220 * falling back to non-fast clear. 1221 */ 1222 can_fast_clear = false; 1223 } 1224 } 1225 1226 if (can_fast_clear) { 1227 const enum isl_aux_state aux_state = 1228 intel_miptree_get_aux_state(irb->mt, irb->mt_level, irb->mt_layer); 1229 union isl_color_value clear_color = 1230 brw_meta_convert_fast_clear_color(brw, irb->mt, 1231 &ctx->Color.ClearColor); 1232 1233 bool same_clear_color = 1234 !intel_miptree_set_clear_color(ctx, irb->mt, clear_color); 1235 1236 /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear 1237 * is redundant and can be skipped. 1238 */ 1239 if (aux_state == ISL_AUX_STATE_CLEAR && same_clear_color) 1240 return; 1241 1242 DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__, 1243 irb->mt, irb->mt_level, irb->mt_layer, num_layers); 1244 1245 /* We can't setup the blorp_surf until we've allocated the MCS above */ 1246 struct isl_surf isl_tmp[2]; 1247 struct blorp_surf surf; 1248 blorp_surf_for_miptree(brw, &surf, irb->mt, irb->mt->aux_usage, true, 1249 &level, irb->mt_layer, num_layers, isl_tmp); 1250 1251 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 1252 * 1253 * "Any transition from any value in {Clear, Render, Resolve} to a 1254 * different value in {Clear, Render, Resolve} requires end of pipe 1255 * synchronization." 1256 * 1257 * In other words, fast clear ops are not properly synchronized with 1258 * other drawing. We need to use a PIPE_CONTROL to ensure that the 1259 * contents of the previous draw hit the render target before we resolve 1260 * and again afterwards to ensure that the resolve is complete before we 1261 * do any more regular drawing. 1262 */ 1263 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); 1264 1265 struct blorp_batch batch; 1266 blorp_batch_init(&brw->blorp, &batch, brw, 0); 1267 blorp_fast_clear(&batch, &surf, isl_format, 1268 level, irb->mt_layer, num_layers, 1269 x0, y0, x1, y1); 1270 blorp_batch_finish(&batch); 1271 1272 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); 1273 1274 /* Now that the fast clear has occurred, put the buffer in 1275 * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing 1276 * redundant clears. 1277 */ 1278 intel_miptree_set_aux_state(brw, irb->mt, irb->mt_level, 1279 irb->mt_layer, num_layers, 1280 ISL_AUX_STATE_CLEAR); 1281 } else { 1282 DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__, 1283 irb->mt, irb->mt_level, irb->mt_layer, num_layers); 1284 1285 enum isl_aux_usage aux_usage = 1286 intel_miptree_render_aux_usage(brw, irb->mt, isl_format, 1287 false, false); 1288 intel_miptree_prepare_render(brw, irb->mt, level, irb->mt_layer, 1289 num_layers, aux_usage); 1290 1291 struct isl_surf isl_tmp[2]; 1292 struct blorp_surf surf; 1293 blorp_surf_for_miptree(brw, &surf, irb->mt, aux_usage, true, 1294 &level, irb->mt_layer, num_layers, isl_tmp); 1295 1296 union isl_color_value clear_color; 1297 memcpy(clear_color.f32, ctx->Color.ClearColor.f, sizeof(float) * 4); 1298 1299 struct blorp_batch batch; 1300 blorp_batch_init(&brw->blorp, &batch, brw, 0); 1301 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 1302 level, irb->mt_layer, num_layers, 1303 x0, y0, x1, y1, 1304 clear_color, color_write_disable); 1305 blorp_batch_finish(&batch); 1306 1307 intel_miptree_finish_render(brw, irb->mt, level, irb->mt_layer, 1308 num_layers, aux_usage); 1309 } 1310 1311 return; 1312 } 1313 1314 void 1315 brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb, 1316 GLbitfield mask, bool partial_clear, bool encode_srgb) 1317 { 1318 for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) { 1319 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf]; 1320 struct intel_renderbuffer *irb = intel_renderbuffer(rb); 1321 1322 /* Only clear the buffers present in the provided mask */ 1323 if (((1 << fb->_ColorDrawBufferIndexes[buf]) & mask) == 0) 1324 continue; 1325 1326 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported, 1327 * the framebuffer can be complete with some attachments missing. In 1328 * this case the _ColorDrawBuffers pointer will be NULL. 1329 */ 1330 if (rb == NULL) 1331 continue; 1332 1333 do_single_blorp_clear(brw, fb, rb, buf, partial_clear, encode_srgb); 1334 irb->need_downsample = true; 1335 } 1336 1337 return; 1338 } 1339 1340 void 1341 brw_blorp_clear_depth_stencil(struct brw_context *brw, 1342 struct gl_framebuffer *fb, 1343 GLbitfield mask, bool partial_clear) 1344 { 1345 const struct gl_context *ctx = &brw->ctx; 1346 struct gl_renderbuffer *depth_rb = 1347 fb->Attachment[BUFFER_DEPTH].Renderbuffer; 1348 struct gl_renderbuffer *stencil_rb = 1349 fb->Attachment[BUFFER_STENCIL].Renderbuffer; 1350 1351 if (!depth_rb || ctx->Depth.Mask == GL_FALSE) 1352 mask &= ~BUFFER_BIT_DEPTH; 1353 1354 if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0) 1355 mask &= ~BUFFER_BIT_STENCIL; 1356 1357 if (!(mask & (BUFFER_BITS_DEPTH_STENCIL))) 1358 return; 1359 1360 uint32_t x0, x1, y0, y1, rb_name, rb_height; 1361 if (depth_rb) { 1362 rb_name = depth_rb->Name; 1363 rb_height = depth_rb->Height; 1364 if (stencil_rb) { 1365 assert(depth_rb->Width == stencil_rb->Width); 1366 assert(depth_rb->Height == stencil_rb->Height); 1367 } 1368 } else { 1369 assert(stencil_rb); 1370 rb_name = stencil_rb->Name; 1371 rb_height = stencil_rb->Height; 1372 } 1373 1374 x0 = fb->_Xmin; 1375 x1 = fb->_Xmax; 1376 if (rb_name != 0) { 1377 y0 = fb->_Ymin; 1378 y1 = fb->_Ymax; 1379 } else { 1380 y0 = rb_height - fb->_Ymax; 1381 y1 = rb_height - fb->_Ymin; 1382 } 1383 1384 /* If the clear region is empty, just return. */ 1385 if (x0 == x1 || y0 == y1) 1386 return; 1387 1388 uint32_t level, start_layer, num_layers; 1389 struct isl_surf isl_tmp[4]; 1390 struct blorp_surf depth_surf, stencil_surf; 1391 1392 struct intel_mipmap_tree *depth_mt = NULL; 1393 if (mask & BUFFER_BIT_DEPTH) { 1394 struct intel_renderbuffer *irb = intel_renderbuffer(depth_rb); 1395 depth_mt = find_miptree(GL_DEPTH_BUFFER_BIT, irb); 1396 1397 level = irb->mt_level; 1398 start_layer = irb->mt_layer; 1399 num_layers = fb->MaxNumLayers ? irb->layer_count : 1; 1400 1401 intel_miptree_prepare_depth(brw, depth_mt, level, 1402 start_layer, num_layers); 1403 1404 unsigned depth_level = level; 1405 blorp_surf_for_miptree(brw, &depth_surf, depth_mt, depth_mt->aux_usage, 1406 true, &depth_level, start_layer, num_layers, 1407 &isl_tmp[0]); 1408 assert(depth_level == level); 1409 } 1410 1411 uint8_t stencil_mask = 0; 1412 struct intel_mipmap_tree *stencil_mt = NULL; 1413 if (mask & BUFFER_BIT_STENCIL) { 1414 struct intel_renderbuffer *irb = intel_renderbuffer(stencil_rb); 1415 stencil_mt = find_miptree(GL_STENCIL_BUFFER_BIT, irb); 1416 1417 if (mask & BUFFER_BIT_DEPTH) { 1418 assert(level == irb->mt_level); 1419 assert(start_layer == irb->mt_layer); 1420 assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1); 1421 } else { 1422 level = irb->mt_level; 1423 start_layer = irb->mt_layer; 1424 num_layers = fb->MaxNumLayers ? irb->layer_count : 1; 1425 } 1426 1427 stencil_mask = ctx->Stencil.WriteMask[0] & 0xff; 1428 1429 intel_miptree_prepare_access(brw, stencil_mt, level, 1, 1430 start_layer, num_layers, 1431 ISL_AUX_USAGE_NONE, false); 1432 1433 unsigned stencil_level = level; 1434 blorp_surf_for_miptree(brw, &stencil_surf, stencil_mt, 1435 ISL_AUX_USAGE_NONE, true, 1436 &stencil_level, start_layer, num_layers, 1437 &isl_tmp[2]); 1438 } 1439 1440 assert((mask & BUFFER_BIT_DEPTH) || stencil_mask); 1441 1442 struct blorp_batch batch; 1443 blorp_batch_init(&brw->blorp, &batch, brw, 0); 1444 blorp_clear_depth_stencil(&batch, &depth_surf, &stencil_surf, 1445 level, start_layer, num_layers, 1446 x0, y0, x1, y1, 1447 (mask & BUFFER_BIT_DEPTH), ctx->Depth.Clear, 1448 stencil_mask, ctx->Stencil.Clear); 1449 blorp_batch_finish(&batch); 1450 1451 if (mask & BUFFER_BIT_DEPTH) { 1452 intel_miptree_finish_depth(brw, depth_mt, level, 1453 start_layer, num_layers, true); 1454 } 1455 1456 if (stencil_mask) { 1457 intel_miptree_finish_write(brw, stencil_mt, level, 1458 start_layer, num_layers, 1459 ISL_AUX_USAGE_NONE); 1460 } 1461 } 1462 1463 void 1464 brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt, 1465 unsigned level, unsigned layer, 1466 enum blorp_fast_clear_op resolve_op) 1467 { 1468 DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer); 1469 1470 const mesa_format format = _mesa_get_srgb_format_linear(mt->format); 1471 1472 struct isl_surf isl_tmp[1]; 1473 struct blorp_surf surf; 1474 blorp_surf_for_miptree(brw, &surf, mt, mt->aux_usage, true, 1475 &level, layer, 1 /* num_layers */, 1476 isl_tmp); 1477 1478 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 1479 * 1480 * "Any transition from any value in {Clear, Render, Resolve} to a 1481 * different value in {Clear, Render, Resolve} requires end of pipe 1482 * synchronization." 1483 * 1484 * In other words, fast clear ops are not properly synchronized with 1485 * other drawing. We need to use a PIPE_CONTROL to ensure that the 1486 * contents of the previous draw hit the render target before we resolve 1487 * and again afterwards to ensure that the resolve is complete before we 1488 * do any more regular drawing. 1489 */ 1490 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); 1491 1492 1493 struct blorp_batch batch; 1494 blorp_batch_init(&brw->blorp, &batch, brw, 0); 1495 blorp_ccs_resolve(&batch, &surf, level, layer, 1, 1496 brw_blorp_to_isl_format(brw, format, true), 1497 resolve_op); 1498 blorp_batch_finish(&batch); 1499 1500 /* See comment above */ 1501 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); 1502 } 1503 1504 void 1505 brw_blorp_mcs_partial_resolve(struct brw_context *brw, 1506 struct intel_mipmap_tree *mt, 1507 uint32_t start_layer, uint32_t num_layers) 1508 { 1509 DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt, 1510 start_layer, start_layer + num_layers - 1); 1511 1512 assert(mt->aux_usage == ISL_AUX_USAGE_MCS); 1513 1514 const mesa_format format = _mesa_get_srgb_format_linear(mt->format); 1515 enum isl_format isl_format = brw_blorp_to_isl_format(brw, format, true); 1516 1517 struct isl_surf isl_tmp[1]; 1518 struct blorp_surf surf; 1519 uint32_t level = 0; 1520 blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_MCS, true, 1521 &level, start_layer, num_layers, isl_tmp); 1522 1523 struct blorp_batch batch; 1524 blorp_batch_init(&brw->blorp, &batch, brw, 0); 1525 blorp_mcs_partial_resolve(&batch, &surf, isl_format, 1526 start_layer, num_layers); 1527 blorp_batch_finish(&batch); 1528 } 1529 1530 /** 1531 * Perform a HiZ or depth resolve operation. 1532 * 1533 * For an overview of HiZ ops, see the following sections of the Sandy Bridge 1534 * PRM, Volume 1, Part 2: 1535 * - 7.5.3.1 Depth Buffer Clear 1536 * - 7.5.3.2 Depth Buffer Resolve 1537 * - 7.5.3.3 Hierarchical Depth Buffer Resolve 1538 */ 1539 void 1540 intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, 1541 unsigned int level, unsigned int start_layer, 1542 unsigned int num_layers, enum blorp_hiz_op op) 1543 { 1544 assert(intel_miptree_level_has_hiz(mt, level)); 1545 assert(op != BLORP_HIZ_OP_NONE); 1546 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1547 const char *opname = NULL; 1548 1549 switch (op) { 1550 case BLORP_HIZ_OP_DEPTH_RESOLVE: 1551 opname = "depth resolve"; 1552 break; 1553 case BLORP_HIZ_OP_HIZ_RESOLVE: 1554 opname = "hiz ambiguate"; 1555 break; 1556 case BLORP_HIZ_OP_DEPTH_CLEAR: 1557 opname = "depth clear"; 1558 break; 1559 case BLORP_HIZ_OP_NONE: 1560 opname = "noop?"; 1561 break; 1562 } 1563 1564 DBG("%s %s to mt %p level %d layers %d-%d\n", 1565 __func__, opname, mt, level, start_layer, start_layer + num_layers - 1); 1566 1567 /* The following stalls and flushes are only documented to be required for 1568 * HiZ clear operations. However, they also seem to be required for 1569 * resolve operations. 1570 */ 1571 if (devinfo->gen == 6) { 1572 /* From the Sandy Bridge PRM, volume 2 part 1, page 313: 1573 * 1574 * "If other rendering operations have preceded this clear, a 1575 * PIPE_CONTROL with write cache flush enabled and Z-inhibit 1576 * disabled must be issued before the rectangle primitive used for 1577 * the depth buffer clear operation. 1578 */ 1579 brw_emit_pipe_control_flush(brw, 1580 PIPE_CONTROL_RENDER_TARGET_FLUSH | 1581 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 1582 PIPE_CONTROL_CS_STALL); 1583 } else if (devinfo->gen >= 7) { 1584 /* 1585 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear": 1586 * 1587 * If other rendering operations have preceded this clear, a 1588 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit 1589 * enabled must be issued before the rectangle primitive used for 1590 * the depth buffer clear operation. 1591 * 1592 * Same applies for Gen8 and Gen9. 1593 * 1594 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 1595 * PIPE_CONTROL, Depth Cache Flush Enable: 1596 * 1597 * This bit must not be set when Depth Stall Enable bit is set in 1598 * this packet. 1599 * 1600 * This is confirmed to hold for real, HSW gets immediate gpu hangs. 1601 * 1602 * Therefore issue two pipe control flushes, one for cache flush and 1603 * another for depth stall. 1604 */ 1605 brw_emit_pipe_control_flush(brw, 1606 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 1607 PIPE_CONTROL_CS_STALL); 1608 1609 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); 1610 } 1611 1612 assert(mt->aux_usage == ISL_AUX_USAGE_HIZ && mt->hiz_buf); 1613 1614 struct isl_surf isl_tmp[2]; 1615 struct blorp_surf surf; 1616 blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_HIZ, true, 1617 &level, start_layer, num_layers, isl_tmp); 1618 1619 struct blorp_batch batch; 1620 blorp_batch_init(&brw->blorp, &batch, brw, 0); 1621 blorp_hiz_op(&batch, &surf, level, start_layer, num_layers, op); 1622 blorp_batch_finish(&batch); 1623 1624 /* The following stalls and flushes are only documented to be required for 1625 * HiZ clear operations. However, they also seem to be required for 1626 * resolve operations. 1627 */ 1628 if (devinfo->gen == 6) { 1629 /* From the Sandy Bridge PRM, volume 2 part 1, page 314: 1630 * 1631 * "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be 1632 * followed by a PIPE_CONTROL command with DEPTH_STALL bit set 1633 * and Then followed by Depth FLUSH' 1634 */ 1635 brw_emit_pipe_control_flush(brw, 1636 PIPE_CONTROL_DEPTH_STALL); 1637 1638 brw_emit_pipe_control_flush(brw, 1639 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 1640 PIPE_CONTROL_CS_STALL); 1641 } else if (devinfo->gen >= 8) { 1642 /* 1643 * From the Broadwell PRM, volume 7, "Depth Buffer Clear": 1644 * 1645 * "Depth buffer clear pass using any of the methods (WM_STATE, 1646 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a 1647 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits 1648 * "set" before starting to render. DepthStall and DepthFlush are 1649 * not needed between consecutive depth clear passes nor is it 1650 * required if the depth clear pass was done with 1651 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP." 1652 * 1653 * TODO: Such as the spec says, this could be conditional. 1654 */ 1655 brw_emit_pipe_control_flush(brw, 1656 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 1657 PIPE_CONTROL_DEPTH_STALL); 1658 1659 } 1660 } 1661