1 /* 2 * Copyright 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "util/ralloc.h" 25 26 #include "main/macros.h" /* Needed for MAX3 and MAX2 for format_rgb9e5 */ 27 #include "util/format_rgb9e5.h" 28 #include "util/format_srgb.h" 29 30 #include "blorp_priv.h" 31 #include "compiler/brw_eu_defines.h" 32 33 #include "blorp_nir_builder.h" 34 35 #define FILE_DEBUG_FLAG DEBUG_BLORP 36 37 struct brw_blorp_const_color_prog_key 38 { 39 enum blorp_shader_type shader_type; /* Must be BLORP_SHADER_TYPE_CLEAR */ 40 bool use_simd16_replicated_data; 41 bool pad[3]; 42 }; 43 44 static bool 45 blorp_params_get_clear_kernel(struct blorp_context *blorp, 46 struct blorp_params *params, 47 bool use_replicated_data) 48 { 49 const struct brw_blorp_const_color_prog_key blorp_key = { 50 .shader_type = BLORP_SHADER_TYPE_CLEAR, 51 .use_simd16_replicated_data = use_replicated_data, 52 }; 53 54 if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key), 55 ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) 56 return true; 57 58 void *mem_ctx = ralloc_context(NULL); 59 60 nir_builder b; 61 nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL); 62 b.shader->info.name = ralloc_strdup(b.shader, "BLORP-clear"); 63 64 nir_variable *v_color = 65 BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type()); 66 67 nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out, 68 glsl_vec4_type(), 69 "gl_FragColor"); 70 frag_color->data.location = FRAG_RESULT_COLOR; 71 72 nir_copy_var(&b, frag_color, v_color); 73 74 struct brw_wm_prog_key wm_key; 75 brw_blorp_init_wm_prog_key(&wm_key); 76 77 struct brw_wm_prog_data prog_data; 78 const unsigned *program = 79 blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, use_replicated_data, 80 &prog_data); 81 82 bool result = 83 blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key), 84 program, prog_data.base.program_size, 85 &prog_data.base, sizeof(prog_data), 86 ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); 87 88 ralloc_free(mem_ctx); 89 return result; 90 } 91 92 struct layer_offset_vs_key { 93 enum blorp_shader_type shader_type; 94 unsigned num_inputs; 95 }; 96 97 /* In the case of doing attachment clears, we are using a surface state that 98 * is handed to us so we can't set (and don't even know) the base array layer. 99 * In order to do a layered clear in this scenario, we need some way of adding 100 * the base array layer to the instance id. Unfortunately, our hardware has 101 * no real concept of "base instance", so we have to do it manually in a 102 * vertex shader. 103 */ 104 static bool 105 blorp_params_get_layer_offset_vs(struct blorp_context *blorp, 106 struct blorp_params *params) 107 { 108 struct layer_offset_vs_key blorp_key = { 109 .shader_type = BLORP_SHADER_TYPE_LAYER_OFFSET_VS, 110 }; 111 112 if (params->wm_prog_data) 113 blorp_key.num_inputs = params->wm_prog_data->num_varying_inputs; 114 115 if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key), 116 ¶ms->vs_prog_kernel, ¶ms->vs_prog_data)) 117 return true; 118 119 void *mem_ctx = ralloc_context(NULL); 120 121 nir_builder b; 122 nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_VERTEX, NULL); 123 b.shader->info.name = ralloc_strdup(b.shader, "BLORP-layer-offset-vs"); 124 125 const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4); 126 127 /* First we deal with the header which has instance and base instance */ 128 nir_variable *a_header = nir_variable_create(b.shader, nir_var_shader_in, 129 uvec4_type, "header"); 130 a_header->data.location = VERT_ATTRIB_GENERIC0; 131 132 nir_variable *v_layer = nir_variable_create(b.shader, nir_var_shader_out, 133 glsl_int_type(), "layer_id"); 134 v_layer->data.location = VARYING_SLOT_LAYER; 135 136 /* Compute the layer id */ 137 nir_ssa_def *header = nir_load_var(&b, a_header); 138 nir_ssa_def *base_layer = nir_channel(&b, header, 0); 139 nir_ssa_def *instance = nir_channel(&b, header, 1); 140 nir_store_var(&b, v_layer, nir_iadd(&b, instance, base_layer), 0x1); 141 142 /* Then we copy the vertex from the next slot to VARYING_SLOT_POS */ 143 nir_variable *a_vertex = nir_variable_create(b.shader, nir_var_shader_in, 144 glsl_vec4_type(), "a_vertex"); 145 a_vertex->data.location = VERT_ATTRIB_GENERIC1; 146 147 nir_variable *v_pos = nir_variable_create(b.shader, nir_var_shader_out, 148 glsl_vec4_type(), "v_pos"); 149 v_pos->data.location = VARYING_SLOT_POS; 150 151 nir_copy_var(&b, v_pos, a_vertex); 152 153 /* Then we copy everything else */ 154 for (unsigned i = 0; i < blorp_key.num_inputs; i++) { 155 nir_variable *a_in = nir_variable_create(b.shader, nir_var_shader_in, 156 uvec4_type, "input"); 157 a_in->data.location = VERT_ATTRIB_GENERIC2 + i; 158 159 nir_variable *v_out = nir_variable_create(b.shader, nir_var_shader_out, 160 uvec4_type, "output"); 161 v_out->data.location = VARYING_SLOT_VAR0 + i; 162 163 nir_copy_var(&b, v_out, a_in); 164 } 165 166 struct brw_vs_prog_data vs_prog_data; 167 memset(&vs_prog_data, 0, sizeof(vs_prog_data)); 168 169 const unsigned *program = 170 blorp_compile_vs(blorp, mem_ctx, b.shader, &vs_prog_data); 171 172 bool result = 173 blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key), 174 program, vs_prog_data.base.base.program_size, 175 &vs_prog_data.base.base, sizeof(vs_prog_data), 176 ¶ms->vs_prog_kernel, ¶ms->vs_prog_data); 177 178 ralloc_free(mem_ctx); 179 return result; 180 } 181 182 /* The x0, y0, x1, and y1 parameters must already be populated with the render 183 * area of the framebuffer to be cleared. 184 */ 185 static void 186 get_fast_clear_rect(const struct isl_device *dev, 187 const struct isl_surf *aux_surf, 188 unsigned *x0, unsigned *y0, 189 unsigned *x1, unsigned *y1) 190 { 191 unsigned int x_align, y_align; 192 unsigned int x_scaledown, y_scaledown; 193 194 /* Only single sampled surfaces need to (and actually can) be resolved. */ 195 if (aux_surf->usage == ISL_SURF_USAGE_CCS_BIT) { 196 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 197 * Target(s)", beneath the "Fast Color Clear" bullet (p327): 198 * 199 * Clear pass must have a clear rectangle that must follow 200 * alignment rules in terms of pixels and lines as shown in the 201 * table below. Further, the clear-rectangle height and width 202 * must be multiple of the following dimensions. If the height 203 * and width of the render target being cleared do not meet these 204 * requirements, an MCS buffer can be created such that it 205 * follows the requirement and covers the RT. 206 * 207 * The alignment size in the table that follows is related to the 208 * alignment size that is baked into the CCS surface format but with X 209 * alignment multiplied by 16 and Y alignment multiplied by 32. 210 */ 211 x_align = isl_format_get_layout(aux_surf->format)->bw; 212 y_align = isl_format_get_layout(aux_surf->format)->bh; 213 214 x_align *= 16; 215 216 /* SKL+ line alignment requirement for Y-tiled are half those of the prior 217 * generations. 218 */ 219 if (dev->info->gen >= 9) 220 y_align *= 16; 221 else 222 y_align *= 32; 223 224 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 225 * Target(s)", beneath the "Fast Color Clear" bullet (p327): 226 * 227 * In order to optimize the performance MCS buffer (when bound to 228 * 1X RT) clear similarly to MCS buffer clear for MSRT case, 229 * clear rect is required to be scaled by the following factors 230 * in the horizontal and vertical directions: 231 * 232 * The X and Y scale down factors in the table that follows are each 233 * equal to half the alignment value computed above. 234 */ 235 x_scaledown = x_align / 2; 236 y_scaledown = y_align / 2; 237 238 /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel 239 * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color 240 * Clear of Non-MultiSampled Render Target Restrictions": 241 * 242 * Clear rectangle must be aligned to two times the number of 243 * pixels in the table shown below due to 16x16 hashing across the 244 * slice. 245 */ 246 x_align *= 2; 247 y_align *= 2; 248 } else { 249 assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT); 250 251 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 252 * Target(s)", beneath the "MSAA Compression" bullet (p326): 253 * 254 * Clear pass for this case requires that scaled down primitive 255 * is sent down with upper left co-ordinate to coincide with 256 * actual rectangle being cleared. For MSAA, clear rectangles 257 * height and width need to as show in the following table in 258 * terms of (width,height) of the RT. 259 * 260 * MSAA Width of Clear Rect Height of Clear Rect 261 * 2X Ceil(1/8*width) Ceil(1/2*height) 262 * 4X Ceil(1/8*width) Ceil(1/2*height) 263 * 8X Ceil(1/2*width) Ceil(1/2*height) 264 * 16X width Ceil(1/2*height) 265 * 266 * The text "with upper left co-ordinate to coincide with actual 267 * rectangle being cleared" is a little confusing--it seems to imply 268 * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to 269 * feed the pipeline using the rectangle (x,y) to 270 * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on 271 * the number of samples. Experiments indicate that this is not 272 * quite correct; actually, what the hardware appears to do is to 273 * align whatever rectangle is sent down the pipeline to the nearest 274 * multiple of 2x2 blocks, and then scale it up by a factor of N 275 * horizontally and 2 vertically. So the resulting alignment is 4 276 * vertically and either 4 or 16 horizontally, and the scaledown 277 * factor is 2 vertically and either 2 or 8 horizontally. 278 */ 279 switch (aux_surf->format) { 280 case ISL_FORMAT_MCS_2X: 281 case ISL_FORMAT_MCS_4X: 282 x_scaledown = 8; 283 break; 284 case ISL_FORMAT_MCS_8X: 285 x_scaledown = 2; 286 break; 287 case ISL_FORMAT_MCS_16X: 288 x_scaledown = 1; 289 break; 290 default: 291 unreachable("Unexpected MCS format for fast clear"); 292 } 293 y_scaledown = 2; 294 x_align = x_scaledown * 2; 295 y_align = y_scaledown * 2; 296 } 297 298 *x0 = ROUND_DOWN_TO(*x0, x_align) / x_scaledown; 299 *y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown; 300 *x1 = ALIGN(*x1, x_align) / x_scaledown; 301 *y1 = ALIGN(*y1, y_align) / y_scaledown; 302 } 303 304 void 305 blorp_fast_clear(struct blorp_batch *batch, 306 const struct blorp_surf *surf, enum isl_format format, 307 uint32_t level, uint32_t start_layer, uint32_t num_layers, 308 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) 309 { 310 /* Ensure that all layers undergoing the clear have an auxiliary buffer. */ 311 assert(start_layer + num_layers <= 312 MAX2(surf->aux_surf->logical_level0_px.depth >> level, 313 surf->aux_surf->logical_level0_px.array_len)); 314 315 struct blorp_params params; 316 blorp_params_init(¶ms); 317 params.num_layers = num_layers; 318 319 params.x0 = x0; 320 params.y0 = y0; 321 params.x1 = x1; 322 params.y1 = y1; 323 324 memset(¶ms.wm_inputs.clear_color, 0xff, 4*sizeof(float)); 325 params.fast_clear_op = BLORP_FAST_CLEAR_OP_CLEAR; 326 327 get_fast_clear_rect(batch->blorp->isl_dev, surf->aux_surf, 328 ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); 329 330 if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true)) 331 return; 332 333 brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, 334 start_layer, format, true); 335 params.num_samples = params.dst.surf.samples; 336 337 batch->blorp->exec(batch, ¶ms); 338 } 339 340 static union isl_color_value 341 swizzle_color_value(union isl_color_value src, struct isl_swizzle swizzle) 342 { 343 union isl_color_value dst = { .u32 = { 0, } }; 344 345 /* We assign colors in ABGR order so that the first one will be taken in 346 * RGBA precedence order. According to the PRM docs for shader channel 347 * select, this matches Haswell hardware behavior. 348 */ 349 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4) 350 dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3]; 351 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4) 352 dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2]; 353 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4) 354 dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1]; 355 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4) 356 dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0]; 357 358 return dst; 359 } 360 361 void 362 blorp_clear(struct blorp_batch *batch, 363 const struct blorp_surf *surf, 364 enum isl_format format, struct isl_swizzle swizzle, 365 uint32_t level, uint32_t start_layer, uint32_t num_layers, 366 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 367 union isl_color_value clear_color, 368 const bool color_write_disable[4]) 369 { 370 struct blorp_params params; 371 blorp_params_init(¶ms); 372 373 /* Manually apply the clear destination swizzle. This way swizzled clears 374 * will work for swizzles which we can't normally use for rendering and it 375 * also ensures that they work on pre-Haswell hardware which can't swizlle 376 * at all. 377 */ 378 clear_color = swizzle_color_value(clear_color, swizzle); 379 swizzle = ISL_SWIZZLE_IDENTITY; 380 381 if (format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) { 382 clear_color.u32[0] = float3_to_rgb9e5(clear_color.f32); 383 format = ISL_FORMAT_R32_UINT; 384 } else if (format == ISL_FORMAT_L8_UNORM_SRGB) { 385 clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]); 386 format = ISL_FORMAT_R8_UNORM; 387 } else if (format == ISL_FORMAT_A4B4G4R4_UNORM) { 388 /* Broadwell and earlier cannot render to this format so we need to work 389 * around it by swapping the colors around and using B4G4R4A4 instead. 390 */ 391 const struct isl_swizzle ARGB = ISL_SWIZZLE(ALPHA, RED, GREEN, BLUE); 392 clear_color = swizzle_color_value(clear_color, ARGB); 393 format = ISL_FORMAT_B4G4R4A4_UNORM; 394 } 395 396 memcpy(¶ms.wm_inputs.clear_color, clear_color.f32, sizeof(float) * 4); 397 398 bool use_simd16_replicated_data = true; 399 400 /* From the SNB PRM (Vol4_Part1): 401 * 402 * "Replicated data (Message Type = 111) is only supported when 403 * accessing tiled memory. Using this Message Type to access linear 404 * (untiled) memory is UNDEFINED." 405 */ 406 if (surf->surf->tiling == ISL_TILING_LINEAR) 407 use_simd16_replicated_data = false; 408 409 /* Replicated clears don't work yet before gen6 */ 410 if (batch->blorp->isl_dev->info->gen < 6) 411 use_simd16_replicated_data = false; 412 413 /* Constant color writes ignore everyting in blend and color calculator 414 * state. This is not documented. 415 */ 416 if (color_write_disable) { 417 for (unsigned i = 0; i < 4; i++) { 418 params.color_write_disable[i] = color_write_disable[i]; 419 if (color_write_disable[i]) 420 use_simd16_replicated_data = false; 421 } 422 } 423 424 if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, 425 use_simd16_replicated_data)) 426 return; 427 428 if (!blorp_ensure_sf_program(batch->blorp, ¶ms)) 429 return; 430 431 while (num_layers > 0) { 432 brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, 433 start_layer, format, true); 434 params.dst.view.swizzle = swizzle; 435 436 params.x0 = x0; 437 params.y0 = y0; 438 params.x1 = x1; 439 params.y1 = y1; 440 441 /* The MinLOD and MinimumArrayElement don't work properly for cube maps. 442 * Convert them to a single slice on gen4. 443 */ 444 if (batch->blorp->isl_dev->info->gen == 4 && 445 (params.dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)) { 446 blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, ¶ms.dst); 447 } 448 449 if (isl_format_is_compressed(params.dst.surf.format)) { 450 blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev, ¶ms.dst, 451 NULL, NULL, NULL, NULL); 452 //&dst_x, &dst_y, &dst_w, &dst_h); 453 } 454 455 if (params.dst.tile_x_sa || params.dst.tile_y_sa) { 456 /* Either we're on gen4 where there is no multisampling or the 457 * surface is compressed which also implies no multisampling. 458 * Therefore, sa == px and we don't need to do a conversion. 459 */ 460 assert(params.dst.surf.samples == 1); 461 params.x0 += params.dst.tile_x_sa; 462 params.y0 += params.dst.tile_y_sa; 463 params.x1 += params.dst.tile_x_sa; 464 params.y1 += params.dst.tile_y_sa; 465 } 466 467 params.num_samples = params.dst.surf.samples; 468 469 /* We may be restricted on the number of layers we can bind at any one 470 * time. In particular, Sandy Bridge has a maximum number of layers of 471 * 512 but a maximum 3D texture size is much larger. 472 */ 473 params.num_layers = MIN2(params.dst.view.array_len, num_layers); 474 batch->blorp->exec(batch, ¶ms); 475 476 start_layer += params.num_layers; 477 num_layers -= params.num_layers; 478 } 479 } 480 481 void 482 blorp_clear_depth_stencil(struct blorp_batch *batch, 483 const struct blorp_surf *depth, 484 const struct blorp_surf *stencil, 485 uint32_t level, uint32_t start_layer, 486 uint32_t num_layers, 487 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 488 bool clear_depth, float depth_value, 489 uint8_t stencil_mask, uint8_t stencil_value) 490 { 491 struct blorp_params params; 492 blorp_params_init(¶ms); 493 494 params.x0 = x0; 495 params.y0 = y0; 496 params.x1 = x1; 497 params.y1 = y1; 498 499 if (ISL_DEV_GEN(batch->blorp->isl_dev) == 6) { 500 /* For some reason, Sandy Bridge gets occlusion queries wrong if we 501 * don't have a shader. In particular, it records samples even though 502 * we disable statistics in 3DSTATE_WM. Give it the usual clear shader 503 * to work around the issue. 504 */ 505 if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, false)) 506 return; 507 } 508 509 while (num_layers > 0) { 510 params.num_layers = num_layers; 511 512 if (stencil_mask) { 513 brw_blorp_surface_info_init(batch->blorp, ¶ms.stencil, stencil, 514 level, start_layer, 515 ISL_FORMAT_UNSUPPORTED, true); 516 params.stencil_mask = stencil_mask; 517 params.stencil_ref = stencil_value; 518 519 params.dst.surf.samples = params.stencil.surf.samples; 520 params.dst.surf.logical_level0_px = 521 params.stencil.surf.logical_level0_px; 522 params.dst.view = params.depth.view; 523 524 params.num_samples = params.stencil.surf.samples; 525 526 /* We may be restricted on the number of layers we can bind at any 527 * one time. In particular, Sandy Bridge has a maximum number of 528 * layers of 512 but a maximum 3D texture size is much larger. 529 */ 530 if (params.stencil.view.array_len < params.num_layers) 531 params.num_layers = params.stencil.view.array_len; 532 } 533 534 if (clear_depth) { 535 brw_blorp_surface_info_init(batch->blorp, ¶ms.depth, depth, 536 level, start_layer, 537 ISL_FORMAT_UNSUPPORTED, true); 538 params.z = depth_value; 539 params.depth_format = 540 isl_format_get_depth_format(depth->surf->format, false); 541 542 params.dst.surf.samples = params.depth.surf.samples; 543 params.dst.surf.logical_level0_px = 544 params.depth.surf.logical_level0_px; 545 params.dst.view = params.depth.view; 546 547 params.num_samples = params.depth.surf.samples; 548 549 /* We may be restricted on the number of layers we can bind at any 550 * one time. In particular, Sandy Bridge has a maximum number of 551 * layers of 512 but a maximum 3D texture size is much larger. 552 */ 553 if (params.depth.view.array_len < params.num_layers) 554 params.num_layers = params.depth.view.array_len; 555 } 556 557 batch->blorp->exec(batch, ¶ms); 558 559 start_layer += params.num_layers; 560 num_layers -= params.num_layers; 561 } 562 } 563 564 bool 565 blorp_can_hiz_clear_depth(uint8_t gen, enum isl_format format, 566 uint32_t num_samples, 567 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) 568 { 569 /* This function currently doesn't support any gen prior to gen8 */ 570 assert(gen >= 8); 571 572 if (gen == 8 && format == ISL_FORMAT_R16_UNORM) { 573 /* Apply the D16 alignment restrictions. On BDW, HiZ has an 8x4 sample 574 * block with the following property: as the number of samples increases, 575 * the number of pixels representable by this block decreases by a factor 576 * of the sample dimensions. Sample dimensions scale following the MSAA 577 * interleaved pattern. 578 * 579 * Sample|Sample|Pixel 580 * Count |Dim |Dim 581 * =================== 582 * 1 | 1x1 | 8x4 583 * 2 | 2x1 | 4x4 584 * 4 | 2x2 | 4x2 585 * 8 | 4x2 | 2x2 586 * 16 | 4x4 | 2x1 587 * 588 * Table: Pixel Dimensions in a HiZ Sample Block Pre-SKL 589 */ 590 const struct isl_extent2d sa_block_dim = 591 isl_get_interleaved_msaa_px_size_sa(num_samples); 592 const uint8_t align_px_w = 8 / sa_block_dim.w; 593 const uint8_t align_px_h = 4 / sa_block_dim.h; 594 595 /* Fast depth clears clear an entire sample block at a time. As a result, 596 * the rectangle must be aligned to the dimensions of the encompassing 597 * pixel block for a successful operation. 598 * 599 * Fast clears can still work if the upper-left corner is aligned and the 600 * bottom-rigtht corner touches the edge of a depth buffer whose extent 601 * is unaligned. This is because each miplevel in the depth buffer is 602 * padded by the Pixel Dim (similar to a standard compressed texture). 603 * In this case, the clear rectangle could be padded by to match the full 604 * depth buffer extent but to support multiple clearing techniques, we 605 * chose to be unaware of the depth buffer's extent and thus don't handle 606 * this case. 607 */ 608 if (x0 % align_px_w || y0 % align_px_h || 609 x1 % align_px_w || y1 % align_px_h) 610 return false; 611 } 612 return true; 613 } 614 615 /* Given a depth stencil attachment, this function performs a fast depth clear 616 * on a depth portion and a regular clear on the stencil portion. When 617 * performing a fast depth clear on the depth portion, the HiZ buffer is simply 618 * tagged as cleared so the depth clear value is not actually needed. 619 */ 620 void 621 blorp_gen8_hiz_clear_attachments(struct blorp_batch *batch, 622 uint32_t num_samples, 623 uint32_t x0, uint32_t y0, 624 uint32_t x1, uint32_t y1, 625 bool clear_depth, bool clear_stencil, 626 uint8_t stencil_value) 627 { 628 assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); 629 630 struct blorp_params params; 631 blorp_params_init(¶ms); 632 params.num_layers = 1; 633 params.hiz_op = BLORP_HIZ_OP_DEPTH_CLEAR; 634 params.x0 = x0; 635 params.y0 = y0; 636 params.x1 = x1; 637 params.y1 = y1; 638 params.num_samples = num_samples; 639 params.depth.enabled = clear_depth; 640 params.stencil.enabled = clear_stencil; 641 params.stencil_ref = stencil_value; 642 batch->blorp->exec(batch, ¶ms); 643 } 644 645 /** Clear active color/depth/stencili attachments 646 * 647 * This function performs a clear operation on the currently bound 648 * color/depth/stencil attachments. It is assumed that any information passed 649 * in here is valid, consistent, and in-bounds relative to the currently 650 * attached depth/stencil. The binding_table_offset parameter is the 32-bit 651 * offset relative to surface state base address where pre-baked binding table 652 * that we are to use lives. If clear_color is false, binding_table_offset 653 * must point to a binding table with one entry which is a valid null surface 654 * that matches the currently bound depth and stencil. 655 */ 656 void 657 blorp_clear_attachments(struct blorp_batch *batch, 658 uint32_t binding_table_offset, 659 enum isl_format depth_format, 660 uint32_t num_samples, 661 uint32_t start_layer, uint32_t num_layers, 662 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 663 bool clear_color, union isl_color_value color_value, 664 bool clear_depth, float depth_value, 665 uint8_t stencil_mask, uint8_t stencil_value) 666 { 667 struct blorp_params params; 668 blorp_params_init(¶ms); 669 670 assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); 671 672 params.x0 = x0; 673 params.y0 = y0; 674 params.x1 = x1; 675 params.y1 = y1; 676 677 params.use_pre_baked_binding_table = true; 678 params.pre_baked_binding_table_offset = binding_table_offset; 679 680 params.num_layers = num_layers; 681 params.num_samples = num_samples; 682 683 if (clear_color) { 684 params.dst.enabled = true; 685 686 memcpy(¶ms.wm_inputs.clear_color, color_value.f32, sizeof(float) * 4); 687 688 /* Unfortunately, without knowing whether or not our destination surface 689 * is tiled or not, we have to assume it may be linear. This means no 690 * SIMD16_REPDATA for us. :-( 691 */ 692 if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, false)) 693 return; 694 } 695 696 if (clear_depth) { 697 params.depth.enabled = true; 698 699 params.z = depth_value; 700 params.depth_format = isl_format_get_depth_format(depth_format, false); 701 } 702 703 if (stencil_mask) { 704 params.stencil.enabled = true; 705 706 params.stencil_mask = stencil_mask; 707 params.stencil_ref = stencil_value; 708 } 709 710 if (!blorp_params_get_layer_offset_vs(batch->blorp, ¶ms)) 711 return; 712 713 params.vs_inputs.base_layer = start_layer; 714 715 batch->blorp->exec(batch, ¶ms); 716 } 717 718 void 719 blorp_ccs_resolve(struct blorp_batch *batch, 720 struct blorp_surf *surf, uint32_t level, 721 uint32_t start_layer, uint32_t num_layers, 722 enum isl_format format, 723 enum blorp_fast_clear_op resolve_op) 724 { 725 struct blorp_params params; 726 727 blorp_params_init(¶ms); 728 brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, 729 level, start_layer, format, true); 730 731 /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve": 732 * 733 * A rectangle primitive must be scaled down by the following factors 734 * with respect to render target being resolved. 735 * 736 * The scaledown factors in the table that follows are related to the block 737 * size of the CCS format. For IVB and HSW, we divide by two, for BDW we 738 * multiply by 8 and 16. On Sky Lake, we multiply by 8. 739 */ 740 const struct isl_format_layout *aux_fmtl = 741 isl_format_get_layout(params.dst.aux_surf.format); 742 assert(aux_fmtl->txc == ISL_TXC_CCS); 743 744 unsigned x_scaledown, y_scaledown; 745 if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 9) { 746 x_scaledown = aux_fmtl->bw * 8; 747 y_scaledown = aux_fmtl->bh * 8; 748 } else if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) { 749 x_scaledown = aux_fmtl->bw * 8; 750 y_scaledown = aux_fmtl->bh * 16; 751 } else { 752 x_scaledown = aux_fmtl->bw / 2; 753 y_scaledown = aux_fmtl->bh / 2; 754 } 755 params.x0 = params.y0 = 0; 756 params.x1 = minify(params.dst.aux_surf.logical_level0_px.width, level); 757 params.y1 = minify(params.dst.aux_surf.logical_level0_px.height, level); 758 params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown; 759 params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown; 760 761 if (batch->blorp->isl_dev->info->gen >= 9) { 762 assert(resolve_op == BLORP_FAST_CLEAR_OP_RESOLVE_FULL || 763 resolve_op == BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL); 764 } else { 765 /* Broadwell and earlier do not have a partial resolve */ 766 assert(resolve_op == BLORP_FAST_CLEAR_OP_RESOLVE_FULL); 767 } 768 params.fast_clear_op = resolve_op; 769 params.num_layers = num_layers; 770 771 /* Note: there is no need to initialize push constants because it doesn't 772 * matter what data gets dispatched to the render target. However, we must 773 * ensure that the fragment shader delivers the data using the "replicated 774 * color" message. 775 */ 776 777 if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true)) 778 return; 779 780 batch->blorp->exec(batch, ¶ms); 781 } 782 783 struct blorp_mcs_partial_resolve_key 784 { 785 enum blorp_shader_type shader_type; 786 uint32_t num_samples; 787 }; 788 789 static bool 790 blorp_params_get_mcs_partial_resolve_kernel(struct blorp_context *blorp, 791 struct blorp_params *params) 792 { 793 const struct blorp_mcs_partial_resolve_key blorp_key = { 794 .shader_type = BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE, 795 .num_samples = params->num_samples, 796 }; 797 798 if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key), 799 ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) 800 return true; 801 802 void *mem_ctx = ralloc_context(NULL); 803 804 nir_builder b; 805 nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL); 806 b.shader->info.name = ralloc_strdup(b.shader, "BLORP-mcs-partial-resolve"); 807 808 nir_variable *v_color = 809 BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type()); 810 811 nir_variable *frag_color = 812 nir_variable_create(b.shader, nir_var_shader_out, 813 glsl_vec4_type(), "gl_FragColor"); 814 frag_color->data.location = FRAG_RESULT_COLOR; 815 816 /* Do an MCS fetch and check if it is equal to the magic clear value */ 817 nir_ssa_def *mcs = 818 blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, blorp_nir_frag_coord(&b)), 819 nir_load_layer_id(&b)); 820 nir_ssa_def *is_clear = 821 blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples); 822 823 /* If we aren't the clear value, discard. */ 824 nir_intrinsic_instr *discard = 825 nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if); 826 discard->src[0] = nir_src_for_ssa(nir_inot(&b, is_clear)); 827 nir_builder_instr_insert(&b, &discard->instr); 828 829 nir_copy_var(&b, frag_color, v_color); 830 831 struct brw_wm_prog_key wm_key; 832 brw_blorp_init_wm_prog_key(&wm_key); 833 wm_key.tex.compressed_multisample_layout_mask = 1; 834 wm_key.tex.msaa_16 = blorp_key.num_samples == 16; 835 wm_key.multisample_fbo = true; 836 837 struct brw_wm_prog_data prog_data; 838 const unsigned *program = 839 blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, false, 840 &prog_data); 841 842 bool result = 843 blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key), 844 program, prog_data.base.program_size, 845 &prog_data.base, sizeof(prog_data), 846 ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); 847 848 ralloc_free(mem_ctx); 849 return result; 850 } 851 852 void 853 blorp_mcs_partial_resolve(struct blorp_batch *batch, 854 struct blorp_surf *surf, 855 enum isl_format format, 856 uint32_t start_layer, uint32_t num_layers) 857 { 858 struct blorp_params params; 859 blorp_params_init(¶ms); 860 861 assert(batch->blorp->isl_dev->info->gen >= 7); 862 863 params.x0 = 0; 864 params.y0 = 0; 865 params.x1 = surf->surf->logical_level0_px.width; 866 params.y1 = surf->surf->logical_level0_px.height; 867 868 brw_blorp_surface_info_init(batch->blorp, ¶ms.src, surf, 0, 869 start_layer, format, false); 870 brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, 0, 871 start_layer, format, true); 872 873 params.num_samples = params.dst.surf.samples; 874 params.num_layers = num_layers; 875 876 memcpy(¶ms.wm_inputs.clear_color, 877 surf->clear_color.f32, sizeof(float) * 4); 878 879 if (!blorp_params_get_mcs_partial_resolve_kernel(batch->blorp, ¶ms)) 880 return; 881 882 batch->blorp->exec(batch, ¶ms); 883 } 884