1 /* 2 * Copyright 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "intel_batchbuffer.h" 25 #include "intel_mipmap_tree.h" 26 #include "intel_regions.h" 27 #include "intel_fbo.h" 28 #include "brw_context.h" 29 #include "brw_state.h" 30 #include "brw_defines.h" 31 32 static void emit_depthbuffer(struct brw_context *brw) 33 { 34 struct intel_context *intel = &brw->intel; 35 struct gl_context *ctx = &intel->ctx; 36 struct gl_framebuffer *fb = ctx->DrawBuffer; 37 38 /* _NEW_BUFFERS */ 39 struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH); 40 struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL); 41 struct intel_mipmap_tree *depth_mt = NULL, 42 *stencil_mt = NULL, 43 *hiz_mt = NULL; 44 45 /* Amount by which drawing should be offset in order to draw to the 46 * appropriate miplevel/zoffset/cubeface. We will extract these values 47 * from depth_irb or stencil_irb once we determine which is present. 48 */ 49 uint32_t draw_x = 0, draw_y = 0; 50 51 /* Masks used to determine how much of the draw_x and draw_y offsets should 52 * be performed using the fine adjustment of "depth coordinate offset X/Y" 53 * (dw5 of 3DSTATE_DEPTH_BUFFER). Any remaining coarse adjustment will be 54 * performed by changing the base addresses of the buffers. 55 * 56 * Since the HiZ, depth, and stencil buffers all use the same "depth 57 * coordinate offset X/Y" values, we need to make sure that the coarse 58 * adjustment will be possible to apply to all three buffers. Since coarse 59 * adjustment can only be applied in multiples of the tile size, we will OR 60 * together the tile masks of all the buffers to determine which offsets to 61 * perform as fine adjustments. 62 */ 63 uint32_t tile_mask_x = 0, tile_mask_y = 0; 64 65 if (drb) 66 depth_mt = drb->mt; 67 68 if (depth_mt) { 69 hiz_mt = depth_mt->hiz_mt; 70 71 intel_region_get_tile_masks(depth_mt->region, 72 &tile_mask_x, &tile_mask_y, false); 73 74 if (hiz_mt) { 75 uint32_t hiz_tile_mask_x, hiz_tile_mask_y; 76 intel_region_get_tile_masks(hiz_mt->region, 77 &hiz_tile_mask_x, &hiz_tile_mask_y, 78 false); 79 80 /* Each HiZ row represents 2 rows of pixels */ 81 hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1; 82 83 tile_mask_x |= hiz_tile_mask_x; 84 tile_mask_y |= hiz_tile_mask_y; 85 } 86 } 87 88 if (srb) { 89 stencil_mt = srb->mt; 90 if (stencil_mt->stencil_mt) 91 stencil_mt = stencil_mt->stencil_mt; 92 93 assert(stencil_mt->format == MESA_FORMAT_S8); 94 95 /* Stencil buffer uses 64x64 tiles. */ 96 tile_mask_x |= 63; 97 tile_mask_y |= 63; 98 } 99 100 /* Gen7 doesn't support packed depth/stencil */ 101 assert(stencil_mt == NULL || depth_mt != stencil_mt); 102 assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format)); 103 104 intel_emit_depth_stall_flushes(intel); 105 106 if (depth_mt == NULL) { 107 uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18; 108 uint32_t dw3 = 0; 109 uint32_t tile_x = 0, tile_y = 0; 110 111 if (stencil_mt == NULL) { 112 dw1 |= (BRW_SURFACE_NULL << 29); 113 } else { 114 /* _NEW_STENCIL: enable stencil buffer writes */ 115 dw1 |= ((ctx->Stencil.WriteMask != 0) << 27); 116 117 draw_x = srb->draw_x; 118 draw_y = srb->draw_y; 119 tile_x = draw_x & tile_mask_x; 120 tile_y = draw_y & tile_mask_y; 121 122 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 123 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth 124 * Coordinate Offset X/Y": 125 * 126 * "The 3 LSBs of both offsets must be zero to ensure correct 127 * alignment" 128 * 129 * We have no guarantee that tile_x and tile_y are correctly aligned, 130 * since they are determined by the mipmap layout, which is only 131 * aligned to multiples of 4. 132 * 133 * So, to avoid hanging the GPU, just smash the low order 3 bits of 134 * tile_x and tile_y to 0. This is a temporary workaround until we 135 * come up with a better solution. 136 */ 137 tile_x &= ~7; 138 tile_y &= ~7; 139 140 /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */ 141 dw1 |= (BRW_SURFACE_2D << 29); 142 dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) | 143 ((srb->Base.Base.Height + tile_y - 1) << 18); 144 } 145 146 BEGIN_BATCH(7); 147 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); 148 OUT_BATCH(dw1); 149 OUT_BATCH(0); 150 OUT_BATCH(dw3); 151 OUT_BATCH(0); 152 OUT_BATCH(tile_x | (tile_y << 16)); 153 OUT_BATCH(0); 154 ADVANCE_BATCH(); 155 } else { 156 struct intel_region *region = depth_mt->region; 157 uint32_t tile_x, tile_y, offset; 158 159 draw_x = drb->draw_x; 160 draw_y = drb->draw_y; 161 tile_x = draw_x & tile_mask_x; 162 tile_y = draw_y & tile_mask_y; 163 164 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 165 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth 166 * Coordinate Offset X/Y": 167 * 168 * "The 3 LSBs of both offsets must be zero to ensure correct 169 * alignment" 170 * 171 * We have no guarantee that tile_x and tile_y are correctly aligned, 172 * since they are determined by the mipmap layout, which is only aligned 173 * to multiples of 4. 174 * 175 * So, to avoid hanging the GPU, just smash the low order 3 bits of 176 * tile_x and tile_y to 0. This is a temporary workaround until we come 177 * up with a better solution. 178 */ 179 tile_x &= ~7; 180 tile_y &= ~7; 181 182 offset = intel_region_get_aligned_offset(region, 183 draw_x & ~tile_mask_x, 184 draw_y & ~tile_mask_y, 185 false); 186 187 assert(region->tiling == I915_TILING_Y); 188 189 /* _NEW_DEPTH, _NEW_STENCIL */ 190 BEGIN_BATCH(7); 191 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); 192 OUT_BATCH(((region->pitch * region->cpp) - 1) | 193 (brw_depthbuffer_format(brw) << 18) | 194 ((hiz_mt ? 1 : 0) << 22) | /* hiz enable */ 195 ((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) | 196 ((ctx->Depth.Mask != 0) << 28) | 197 (BRW_SURFACE_2D << 29)); 198 OUT_RELOC(region->bo, 199 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 200 offset); 201 OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) | 202 (((drb->Base.Base.Height + tile_y) - 1) << 18)); 203 OUT_BATCH(0); 204 OUT_BATCH(tile_x | (tile_y << 16)); 205 OUT_BATCH(0); 206 ADVANCE_BATCH(); 207 } 208 209 if (hiz_mt == NULL) { 210 BEGIN_BATCH(3); 211 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); 212 OUT_BATCH(0); 213 OUT_BATCH(0); 214 ADVANCE_BATCH(); 215 } else { 216 uint32_t hiz_offset = 217 intel_region_get_aligned_offset(hiz_mt->region, 218 draw_x & ~tile_mask_x, 219 (draw_y & ~tile_mask_y) / 2, 220 false); 221 BEGIN_BATCH(3); 222 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); 223 OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1); 224 OUT_RELOC(hiz_mt->region->bo, 225 I915_GEM_DOMAIN_RENDER, 226 I915_GEM_DOMAIN_RENDER, 227 hiz_offset); 228 ADVANCE_BATCH(); 229 } 230 231 if (stencil_mt == NULL) { 232 BEGIN_BATCH(3); 233 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); 234 OUT_BATCH(0); 235 OUT_BATCH(0); 236 ADVANCE_BATCH(); 237 } else { 238 const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0; 239 240 /* Note: We can't compute the stencil offset using 241 * intel_region_get_aligned_offset(), because the stencil region claims 242 * that the region is untiled; in fact it's W tiled. 243 */ 244 uint32_t stencil_offset = 245 (draw_y & ~tile_mask_y) * stencil_mt->region->pitch + 246 (draw_x & ~tile_mask_x) * 64; 247 248 BEGIN_BATCH(3); 249 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); 250 /* The stencil buffer has quirky pitch requirements. From the Graphics 251 * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing 252 * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+], 253 * field "Surface Pitch": 254 * 255 * The pitch must be set to 2x the value computed based on width, as 256 * the stencil buffer is stored with two rows interleaved. 257 * 258 * (Note that it is not 100% clear whether this intended to apply to 259 * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would 260 * imply that it doesn't), however the comment appears on a "DevIVB+" 261 * page (which would imply that it does). Experiments with the hardware 262 * indicate that it does. 263 */ 264 OUT_BATCH(enabled | 265 (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1)); 266 OUT_RELOC(stencil_mt->region->bo, 267 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 268 stencil_offset); 269 ADVANCE_BATCH(); 270 } 271 272 BEGIN_BATCH(3); 273 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); 274 OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0); 275 OUT_BATCH(1); 276 ADVANCE_BATCH(); 277 } 278 279 /** 280 * \see brw_context.state.depth_region 281 */ 282 const struct brw_tracked_state gen7_depthbuffer = { 283 .dirty = { 284 .mesa = (_NEW_BUFFERS | _NEW_DEPTH | _NEW_STENCIL), 285 .brw = BRW_NEW_BATCH, 286 .cache = 0, 287 }, 288 .emit = emit_depthbuffer, 289 }; 290