Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "intel_batchbuffer.h"
     25 #include "intel_mipmap_tree.h"
     26 #include "intel_regions.h"
     27 #include "intel_fbo.h"
     28 #include "brw_context.h"
     29 #include "brw_state.h"
     30 #include "brw_defines.h"
     31 
     32 static void emit_depthbuffer(struct brw_context *brw)
     33 {
     34    struct intel_context *intel = &brw->intel;
     35    struct gl_context *ctx = &intel->ctx;
     36    struct gl_framebuffer *fb = ctx->DrawBuffer;
     37 
     38    /* _NEW_BUFFERS */
     39    struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
     40    struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
     41    struct intel_mipmap_tree *depth_mt = NULL,
     42 			    *stencil_mt = NULL,
     43 			    *hiz_mt = NULL;
     44 
     45    /* Amount by which drawing should be offset in order to draw to the
     46     * appropriate miplevel/zoffset/cubeface.  We will extract these values
     47     * from depth_irb or stencil_irb once we determine which is present.
     48     */
     49    uint32_t draw_x = 0, draw_y = 0;
     50 
     51    /* Masks used to determine how much of the draw_x and draw_y offsets should
     52     * be performed using the fine adjustment of "depth coordinate offset X/Y"
     53     * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
     54     * performed by changing the base addresses of the buffers.
     55     *
     56     * Since the HiZ, depth, and stencil buffers all use the same "depth
     57     * coordinate offset X/Y" values, we need to make sure that the coarse
     58     * adjustment will be possible to apply to all three buffers.  Since coarse
     59     * adjustment can only be applied in multiples of the tile size, we will OR
     60     * together the tile masks of all the buffers to determine which offsets to
     61     * perform as fine adjustments.
     62     */
     63    uint32_t tile_mask_x = 0, tile_mask_y = 0;
     64 
     65    if (drb)
     66       depth_mt = drb->mt;
     67 
     68    if (depth_mt) {
     69       hiz_mt = depth_mt->hiz_mt;
     70 
     71       intel_region_get_tile_masks(depth_mt->region,
     72                                   &tile_mask_x, &tile_mask_y, false);
     73 
     74       if (hiz_mt) {
     75          uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
     76          intel_region_get_tile_masks(hiz_mt->region,
     77                                      &hiz_tile_mask_x, &hiz_tile_mask_y,
     78                                      false);
     79 
     80          /* Each HiZ row represents 2 rows of pixels */
     81          hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
     82 
     83          tile_mask_x |= hiz_tile_mask_x;
     84          tile_mask_y |= hiz_tile_mask_y;
     85       }
     86    }
     87 
     88    if (srb) {
     89       stencil_mt = srb->mt;
     90       if (stencil_mt->stencil_mt)
     91 	 stencil_mt = stencil_mt->stencil_mt;
     92 
     93       assert(stencil_mt->format == MESA_FORMAT_S8);
     94 
     95       /* Stencil buffer uses 64x64 tiles. */
     96       tile_mask_x |= 63;
     97       tile_mask_y |= 63;
     98    }
     99 
    100    /* Gen7 doesn't support packed depth/stencil */
    101    assert(stencil_mt == NULL || depth_mt != stencil_mt);
    102    assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format));
    103 
    104    intel_emit_depth_stall_flushes(intel);
    105 
    106    if (depth_mt == NULL) {
    107       uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
    108       uint32_t dw3 = 0;
    109       uint32_t tile_x = 0, tile_y = 0;
    110 
    111       if (stencil_mt == NULL) {
    112 	 dw1 |= (BRW_SURFACE_NULL << 29);
    113       } else {
    114 	 /* _NEW_STENCIL: enable stencil buffer writes */
    115 	 dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);
    116 
    117          draw_x = srb->draw_x;
    118          draw_y = srb->draw_y;
    119          tile_x = draw_x & tile_mask_x;
    120          tile_y = draw_y & tile_mask_y;
    121 
    122          /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
    123           * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
    124           * Coordinate Offset X/Y":
    125           *
    126           *   "The 3 LSBs of both offsets must be zero to ensure correct
    127           *   alignment"
    128           *
    129           * We have no guarantee that tile_x and tile_y are correctly aligned,
    130           * since they are determined by the mipmap layout, which is only
    131           * aligned to multiples of 4.
    132           *
    133           * So, to avoid hanging the GPU, just smash the low order 3 bits of
    134           * tile_x and tile_y to 0.  This is a temporary workaround until we
    135           * come up with a better solution.
    136           */
    137          tile_x &= ~7;
    138          tile_y &= ~7;
    139 
    140 	 /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
    141 	 dw1 |= (BRW_SURFACE_2D << 29);
    142 	 dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
    143 	       ((srb->Base.Base.Height + tile_y - 1) << 18);
    144       }
    145 
    146       BEGIN_BATCH(7);
    147       OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
    148       OUT_BATCH(dw1);
    149       OUT_BATCH(0);
    150       OUT_BATCH(dw3);
    151       OUT_BATCH(0);
    152       OUT_BATCH(tile_x | (tile_y << 16));
    153       OUT_BATCH(0);
    154       ADVANCE_BATCH();
    155    } else {
    156       struct intel_region *region = depth_mt->region;
    157       uint32_t tile_x, tile_y, offset;
    158 
    159       draw_x = drb->draw_x;
    160       draw_y = drb->draw_y;
    161       tile_x = draw_x & tile_mask_x;
    162       tile_y = draw_y & tile_mask_y;
    163 
    164       /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
    165        * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
    166        * Coordinate Offset X/Y":
    167        *
    168        *   "The 3 LSBs of both offsets must be zero to ensure correct
    169        *   alignment"
    170        *
    171        * We have no guarantee that tile_x and tile_y are correctly aligned,
    172        * since they are determined by the mipmap layout, which is only aligned
    173        * to multiples of 4.
    174        *
    175        * So, to avoid hanging the GPU, just smash the low order 3 bits of
    176        * tile_x and tile_y to 0.  This is a temporary workaround until we come
    177        * up with a better solution.
    178        */
    179       tile_x &= ~7;
    180       tile_y &= ~7;
    181 
    182       offset = intel_region_get_aligned_offset(region,
    183                                                draw_x & ~tile_mask_x,
    184                                                draw_y & ~tile_mask_y,
    185                                                false);
    186 
    187       assert(region->tiling == I915_TILING_Y);
    188 
    189       /* _NEW_DEPTH, _NEW_STENCIL */
    190       BEGIN_BATCH(7);
    191       OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
    192       OUT_BATCH(((region->pitch * region->cpp) - 1) |
    193 		(brw_depthbuffer_format(brw) << 18) |
    194 		((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
    195 		((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) |
    196 		((ctx->Depth.Mask != 0) << 28) |
    197 		(BRW_SURFACE_2D << 29));
    198       OUT_RELOC(region->bo,
    199 	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    200 		offset);
    201       OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) |
    202                 (((drb->Base.Base.Height + tile_y) - 1) << 18));
    203       OUT_BATCH(0);
    204       OUT_BATCH(tile_x | (tile_y << 16));
    205       OUT_BATCH(0);
    206       ADVANCE_BATCH();
    207    }
    208 
    209    if (hiz_mt == NULL) {
    210       BEGIN_BATCH(3);
    211       OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
    212       OUT_BATCH(0);
    213       OUT_BATCH(0);
    214       ADVANCE_BATCH();
    215    } else {
    216       uint32_t hiz_offset =
    217          intel_region_get_aligned_offset(hiz_mt->region,
    218                                          draw_x & ~tile_mask_x,
    219                                          (draw_y & ~tile_mask_y) / 2,
    220                                          false);
    221       BEGIN_BATCH(3);
    222       OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
    223       OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
    224       OUT_RELOC(hiz_mt->region->bo,
    225                 I915_GEM_DOMAIN_RENDER,
    226                 I915_GEM_DOMAIN_RENDER,
    227                 hiz_offset);
    228       ADVANCE_BATCH();
    229    }
    230 
    231    if (stencil_mt == NULL) {
    232       BEGIN_BATCH(3);
    233       OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
    234       OUT_BATCH(0);
    235       OUT_BATCH(0);
    236       ADVANCE_BATCH();
    237    } else {
    238       const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;
    239 
    240       /* Note: We can't compute the stencil offset using
    241        * intel_region_get_aligned_offset(), because the stencil region claims
    242        * that the region is untiled; in fact it's W tiled.
    243        */
    244       uint32_t stencil_offset =
    245          (draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
    246          (draw_x & ~tile_mask_x) * 64;
    247 
    248       BEGIN_BATCH(3);
    249       OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
    250       /* The stencil buffer has quirky pitch requirements.  From the Graphics
    251        * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
    252        * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
    253        * field "Surface Pitch":
    254        *
    255        *    The pitch must be set to 2x the value computed based on width, as
    256        *    the stencil buffer is stored with two rows interleaved.
    257        *
    258        * (Note that it is not 100% clear whether this intended to apply to
    259        * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
    260        * imply that it doesn't), however the comment appears on a "DevIVB+"
    261        * page (which would imply that it does).  Experiments with the hardware
    262        * indicate that it does.
    263        */
    264       OUT_BATCH(enabled |
    265 	        (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
    266       OUT_RELOC(stencil_mt->region->bo,
    267 	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    268 		stencil_offset);
    269       ADVANCE_BATCH();
    270    }
    271 
    272    BEGIN_BATCH(3);
    273    OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
    274    OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
    275    OUT_BATCH(1);
    276    ADVANCE_BATCH();
    277 }
    278 
    279 /**
    280  * \see brw_context.state.depth_region
    281  */
    282 const struct brw_tracked_state gen7_depthbuffer = {
    283    .dirty = {
    284       .mesa = (_NEW_BUFFERS | _NEW_DEPTH | _NEW_STENCIL),
    285       .brw = BRW_NEW_BATCH,
    286       .cache = 0,
    287    },
    288    .emit = emit_depthbuffer,
    289 };
    290