1 /* 2 * Copyright 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <errno.h> 25 26 #include "program/prog_instruction.h" 27 28 #include "blorp_priv.h" 29 #include "compiler/brw_compiler.h" 30 #include "compiler/brw_nir.h" 31 32 void 33 blorp_init(struct blorp_context *blorp, void *driver_ctx, 34 struct isl_device *isl_dev) 35 { 36 blorp->driver_ctx = driver_ctx; 37 blorp->isl_dev = isl_dev; 38 } 39 40 void 41 blorp_finish(struct blorp_context *blorp) 42 { 43 blorp->driver_ctx = NULL; 44 } 45 46 void 47 blorp_batch_init(struct blorp_context *blorp, 48 struct blorp_batch *batch, void *driver_batch, 49 enum blorp_batch_flags flags) 50 { 51 batch->blorp = blorp; 52 batch->driver_batch = driver_batch; 53 batch->flags = flags; 54 } 55 56 void 57 blorp_batch_finish(struct blorp_batch *batch) 58 { 59 batch->blorp = NULL; 60 } 61 62 void 63 brw_blorp_surface_info_init(struct blorp_context *blorp, 64 struct brw_blorp_surface_info *info, 65 const struct blorp_surf *surf, 66 unsigned int level, unsigned int layer, 67 enum isl_format format, bool is_render_target) 68 { 69 assert(level < surf->surf->levels); 70 assert(layer < MAX2(surf->surf->logical_level0_px.depth >> level, 71 surf->surf->logical_level0_px.array_len)); 72 73 info->enabled = true; 74 75 if (format == ISL_FORMAT_UNSUPPORTED) 76 format = surf->surf->format; 77 78 if (format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { 79 /* Unfortunately, ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as 80 * a render target, which would prevent us from blitting to 24-bit 81 * depth. The miptree consists of 32 bits per pixel, arranged as 24-bit 82 * depth values interleaved with 8 "don't care" bits. Since depth 83 * values don't require any blending, it doesn't matter how we interpret 84 * the bit pattern as long as we copy the right amount of data, so just 85 * map it as 8-bit BGRA. 86 */ 87 format = ISL_FORMAT_B8G8R8A8_UNORM; 88 } 89 90 info->surf = *surf->surf; 91 info->addr = surf->addr; 92 93 info->aux_usage = surf->aux_usage; 94 if (info->aux_usage != ISL_AUX_USAGE_NONE) { 95 info->aux_surf = *surf->aux_surf; 96 info->aux_addr = surf->aux_addr; 97 assert(level < info->aux_surf.levels); 98 assert(layer < MAX2(info->aux_surf.logical_level0_px.depth >> level, 99 info->aux_surf.logical_level0_px.array_len)); 100 } 101 102 info->clear_color = surf->clear_color; 103 info->clear_color_addr = surf->clear_color_addr; 104 105 info->view = (struct isl_view) { 106 .usage = is_render_target ? ISL_SURF_USAGE_RENDER_TARGET_BIT : 107 ISL_SURF_USAGE_TEXTURE_BIT, 108 .format = format, 109 .base_level = level, 110 .levels = 1, 111 .swizzle = ISL_SWIZZLE_IDENTITY, 112 }; 113 114 info->view.array_len = MAX2(info->surf.logical_level0_px.depth, 115 info->surf.logical_level0_px.array_len); 116 117 if (!is_render_target && 118 (info->surf.dim == ISL_SURF_DIM_3D || 119 info->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY)) { 120 /* 3-D textures don't support base_array layer and neither do 2-D 121 * multisampled textures on IVB so we need to pass it through the 122 * sampler in those cases. These are also two cases where we are 123 * guaranteed that we won't be doing any funny surface hacks. 124 */ 125 info->view.base_array_layer = 0; 126 info->z_offset = layer; 127 } else { 128 info->view.base_array_layer = layer; 129 130 assert(info->view.array_len >= info->view.base_array_layer); 131 info->view.array_len -= info->view.base_array_layer; 132 info->z_offset = 0; 133 } 134 135 /* Sandy Bridge and earlier have a limit of a maximum of 512 layers for 136 * layered rendering. 137 */ 138 if (is_render_target && blorp->isl_dev->info->gen <= 6) 139 info->view.array_len = MIN2(info->view.array_len, 512); 140 } 141 142 143 void 144 blorp_params_init(struct blorp_params *params) 145 { 146 memset(params, 0, sizeof(*params)); 147 params->num_samples = 1; 148 params->num_draw_buffers = 1; 149 params->num_layers = 1; 150 } 151 152 void 153 brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key) 154 { 155 memset(wm_key, 0, sizeof(*wm_key)); 156 wm_key->nr_color_regions = 1; 157 for (int i = 0; i < MAX_SAMPLERS; i++) 158 wm_key->tex.swizzles[i] = SWIZZLE_XYZW; 159 } 160 161 const unsigned * 162 blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx, 163 struct nir_shader *nir, 164 struct brw_wm_prog_key *wm_key, 165 bool use_repclear, 166 struct brw_wm_prog_data *wm_prog_data) 167 { 168 const struct brw_compiler *compiler = blorp->compiler; 169 170 nir->options = 171 compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions; 172 173 memset(wm_prog_data, 0, sizeof(*wm_prog_data)); 174 175 assert(exec_list_is_empty(&nir->uniforms)); 176 wm_prog_data->base.nr_params = 0; 177 wm_prog_data->base.param = NULL; 178 179 /* BLORP always uses the first two binding table entries: 180 * - Surface 0 is the render target (which always start from 0) 181 * - Surface 1 is the source texture 182 */ 183 wm_prog_data->base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX; 184 185 nir = brw_preprocess_nir(compiler, nir); 186 nir_remove_dead_variables(nir, nir_var_shader_in); 187 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 188 189 if (blorp->compiler->devinfo->gen < 6) { 190 if (nir->info.fs.uses_discard) 191 wm_key->iz_lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT; 192 193 wm_key->input_slots_valid = nir->info.inputs_read | VARYING_BIT_POS; 194 } 195 196 const unsigned *program = 197 brw_compile_fs(compiler, blorp->driver_ctx, mem_ctx, wm_key, 198 wm_prog_data, nir, NULL, -1, -1, false, use_repclear, 199 NULL, NULL); 200 201 return program; 202 } 203 204 const unsigned * 205 blorp_compile_vs(struct blorp_context *blorp, void *mem_ctx, 206 struct nir_shader *nir, 207 struct brw_vs_prog_data *vs_prog_data) 208 { 209 const struct brw_compiler *compiler = blorp->compiler; 210 211 nir->options = 212 compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions; 213 214 nir = brw_preprocess_nir(compiler, nir); 215 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 216 217 vs_prog_data->inputs_read = nir->info.inputs_read; 218 219 brw_compute_vue_map(compiler->devinfo, 220 &vs_prog_data->base.vue_map, 221 nir->info.outputs_written, 222 nir->info.separate_shader); 223 224 struct brw_vs_prog_key vs_key = { 0, }; 225 226 const unsigned *program = 227 brw_compile_vs(compiler, blorp->driver_ctx, mem_ctx, 228 &vs_key, vs_prog_data, nir, -1, NULL); 229 230 return program; 231 } 232 233 struct blorp_sf_key { 234 enum blorp_shader_type shader_type; /* Must be BLORP_SHADER_TYPE_GEN4_SF */ 235 236 struct brw_sf_prog_key key; 237 }; 238 239 bool 240 blorp_ensure_sf_program(struct blorp_context *blorp, 241 struct blorp_params *params) 242 { 243 const struct brw_wm_prog_data *wm_prog_data = params->wm_prog_data; 244 assert(params->wm_prog_data); 245 246 /* Gen6+ doesn't need a strips and fans program */ 247 if (blorp->compiler->devinfo->gen >= 6) 248 return true; 249 250 struct blorp_sf_key key = { 251 .shader_type = BLORP_SHADER_TYPE_GEN4_SF, 252 }; 253 254 /* Everything gets compacted in vertex setup, so we just need a 255 * pass-through for the correct number of input varyings. 256 */ 257 const uint64_t slots_valid = VARYING_BIT_POS | 258 ((1ull << wm_prog_data->num_varying_inputs) - 1) << VARYING_SLOT_VAR0; 259 260 key.key.attrs = slots_valid; 261 key.key.primitive = BRW_SF_PRIM_TRIANGLES; 262 key.key.contains_flat_varying = wm_prog_data->contains_flat_varying; 263 264 STATIC_ASSERT(sizeof(key.key.interp_mode) == 265 sizeof(wm_prog_data->interp_mode)); 266 memcpy(key.key.interp_mode, wm_prog_data->interp_mode, 267 sizeof(key.key.interp_mode)); 268 269 if (blorp->lookup_shader(blorp, &key, sizeof(key), 270 ¶ms->sf_prog_kernel, ¶ms->sf_prog_data)) 271 return true; 272 273 void *mem_ctx = ralloc_context(NULL); 274 275 const unsigned *program; 276 unsigned program_size; 277 278 struct brw_vue_map vue_map; 279 brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false); 280 281 struct brw_sf_prog_data prog_data_tmp; 282 program = brw_compile_sf(blorp->compiler, mem_ctx, &key.key, 283 &prog_data_tmp, &vue_map, &program_size); 284 285 bool result = 286 blorp->upload_shader(blorp, &key, sizeof(key), program, program_size, 287 (void *)&prog_data_tmp, sizeof(prog_data_tmp), 288 ¶ms->sf_prog_kernel, ¶ms->sf_prog_data); 289 290 ralloc_free(mem_ctx); 291 292 return result; 293 } 294 295 void 296 blorp_hiz_op(struct blorp_batch *batch, struct blorp_surf *surf, 297 uint32_t level, uint32_t start_layer, uint32_t num_layers, 298 enum blorp_hiz_op op) 299 { 300 struct blorp_params params; 301 blorp_params_init(¶ms); 302 303 params.hiz_op = op; 304 params.full_surface_hiz_op = true; 305 306 for (uint32_t a = 0; a < num_layers; a++) { 307 const uint32_t layer = start_layer + a; 308 309 brw_blorp_surface_info_init(batch->blorp, ¶ms.depth, surf, level, 310 layer, surf->surf->format, true); 311 312 /* Align the rectangle primitive to 8x4 pixels. 313 * 314 * During fast depth clears, the emitted rectangle primitive must be 315 * aligned to 8x4 pixels. From the Ivybridge PRM, Vol 2 Part 1 Section 316 * 11.5.3.1 Depth Buffer Clear (and the matching section in the 317 * Sandybridge PRM): 318 * 319 * If Number of Multisamples is NUMSAMPLES_1, the rectangle must be 320 * aligned to an 8x4 pixel block relative to the upper left corner 321 * of the depth buffer [...] 322 * 323 * For hiz resolves, the rectangle must also be 8x4 aligned. Item 324 * WaHizAmbiguate8x4Aligned from the Haswell workarounds page and the 325 * Ivybridge simulator require the alignment. 326 * 327 * To be safe, let's just align the rect for all hiz operations and all 328 * hardware generations. 329 * 330 * However, for some miptree slices of a Z24 texture, emitting an 8x4 331 * aligned rectangle that covers the slice may clobber adjacent slices 332 * if we strictly adhered to the texture alignments specified in the 333 * PRM. The Ivybridge PRM, Section "Alignment Unit Size", states that 334 * SURFACE_STATE.Surface_Horizontal_Alignment should be 4 for Z24 335 * surfaces, not 8. But commit 1f112cc increased the alignment from 4 to 336 * 8, which prevents the clobbering. 337 */ 338 params.x1 = minify(params.depth.surf.logical_level0_px.width, 339 params.depth.view.base_level); 340 params.y1 = minify(params.depth.surf.logical_level0_px.height, 341 params.depth.view.base_level); 342 params.x1 = ALIGN(params.x1, 8); 343 params.y1 = ALIGN(params.y1, 4); 344 345 if (params.depth.view.base_level == 0) { 346 /* TODO: What about MSAA? */ 347 params.depth.surf.logical_level0_px.width = params.x1; 348 params.depth.surf.logical_level0_px.height = params.y1; 349 } 350 351 params.dst.surf.samples = params.depth.surf.samples; 352 params.dst.surf.logical_level0_px = params.depth.surf.logical_level0_px; 353 params.depth_format = 354 isl_format_get_depth_format(surf->surf->format, false); 355 params.num_samples = params.depth.surf.samples; 356 357 batch->blorp->exec(batch, ¶ms); 358 } 359 } 360