Home | History | Annotate | Download | only in i965
      1 /**************************************************************************
      2  *
      3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "intel_context.h"
     29 #include "intel_batchbuffer.h"
     30 #include "intel_buffer_objects.h"
     31 #include "intel_reg.h"
     32 #include "intel_bufmgr.h"
     33 #include "intel_buffers.h"
     34 
     35 struct cached_batch_item {
     36    struct cached_batch_item *next;
     37    uint16_t header;
     38    uint16_t size;
     39 };
     40 
     41 static void clear_cache( struct intel_context *intel )
     42 {
     43    struct cached_batch_item *item = intel->batch.cached_items;
     44 
     45    while (item) {
     46       struct cached_batch_item *next = item->next;
     47       free(item);
     48       item = next;
     49    }
     50 
     51    intel->batch.cached_items = NULL;
     52 }
     53 
     54 void
     55 intel_batchbuffer_init(struct intel_context *intel)
     56 {
     57    intel_batchbuffer_reset(intel);
     58 
     59    if (intel->gen >= 6) {
     60       /* We can't just use brw_state_batch to get a chunk of space for
     61        * the gen6 workaround because it involves actually writing to
     62        * the buffer, and the kernel doesn't let us write to the batch.
     63        */
     64       intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr,
     65 						      "pipe_control workaround",
     66 						      4096, 4096);
     67    }
     68 }
     69 
     70 void
     71 intel_batchbuffer_reset(struct intel_context *intel)
     72 {
     73    if (intel->batch.last_bo != NULL) {
     74       drm_intel_bo_unreference(intel->batch.last_bo);
     75       intel->batch.last_bo = NULL;
     76    }
     77    intel->batch.last_bo = intel->batch.bo;
     78 
     79    clear_cache(intel);
     80 
     81    intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
     82 					intel->maxBatchSize, 4096);
     83 
     84    intel->batch.reserved_space = BATCH_RESERVED;
     85    intel->batch.state_batch_offset = intel->batch.bo->size;
     86    intel->batch.used = 0;
     87    intel->batch.needs_sol_reset = false;
     88 }
     89 
     90 void
     91 intel_batchbuffer_save_state(struct intel_context *intel)
     92 {
     93    intel->batch.saved.used = intel->batch.used;
     94    intel->batch.saved.reloc_count =
     95       drm_intel_gem_bo_get_reloc_count(intel->batch.bo);
     96 }
     97 
     98 void
     99 intel_batchbuffer_reset_to_saved(struct intel_context *intel)
    100 {
    101    drm_intel_gem_bo_clear_relocs(intel->batch.bo, intel->batch.saved.reloc_count);
    102 
    103    intel->batch.used = intel->batch.saved.used;
    104 
    105    /* Cached batch state is dead, since we just cleared some unknown part of the
    106     * batchbuffer.  Assume that the caller resets any other state necessary.
    107     */
    108    clear_cache(intel);
    109 }
    110 
    111 void
    112 intel_batchbuffer_free(struct intel_context *intel)
    113 {
    114    drm_intel_bo_unreference(intel->batch.last_bo);
    115    drm_intel_bo_unreference(intel->batch.bo);
    116    drm_intel_bo_unreference(intel->batch.workaround_bo);
    117    clear_cache(intel);
    118 }
    119 
    120 static void
    121 do_batch_dump(struct intel_context *intel)
    122 {
    123    struct drm_intel_decode *decode;
    124    struct intel_batchbuffer *batch = &intel->batch;
    125    int ret;
    126 
    127    decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID);
    128    if (!decode)
    129       return;
    130 
    131    ret = drm_intel_bo_map(batch->bo, false);
    132    if (ret == 0) {
    133       drm_intel_decode_set_batch_pointer(decode,
    134 					 batch->bo->virtual,
    135 					 batch->bo->offset,
    136 					 batch->used);
    137    } else {
    138       fprintf(stderr,
    139 	      "WARNING: failed to map batchbuffer (%s), "
    140 	      "dumping uploaded data instead.\n", strerror(ret));
    141 
    142       drm_intel_decode_set_batch_pointer(decode,
    143 					 batch->map,
    144 					 batch->bo->offset,
    145 					 batch->used);
    146    }
    147 
    148    drm_intel_decode(decode);
    149 
    150    drm_intel_decode_context_free(decode);
    151 
    152    if (ret == 0) {
    153       drm_intel_bo_unmap(batch->bo);
    154 
    155       if (intel->vtbl.debug_batch != NULL)
    156 	 intel->vtbl.debug_batch(intel);
    157    }
    158 }
    159 
    160 /* TODO: Push this whole function into bufmgr.
    161  */
    162 static int
    163 do_flush_locked(struct intel_context *intel)
    164 {
    165    struct intel_batchbuffer *batch = &intel->batch;
    166    int ret = 0;
    167 
    168    ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
    169    if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
    170       ret = drm_intel_bo_subdata(batch->bo,
    171 				 batch->state_batch_offset,
    172 				 batch->bo->size - batch->state_batch_offset,
    173 				 (char *)batch->map + batch->state_batch_offset);
    174    }
    175 
    176    if (!intel->intelScreen->no_hw) {
    177       int flags;
    178 
    179       if (intel->gen < 6 || !batch->is_blit) {
    180 	 flags = I915_EXEC_RENDER;
    181       } else {
    182 	 flags = I915_EXEC_BLT;
    183       }
    184 
    185       if (batch->needs_sol_reset)
    186 	 flags |= I915_EXEC_GEN7_SOL_RESET;
    187 
    188       if (ret == 0) {
    189          if (unlikely(INTEL_DEBUG & DEBUG_AUB) && intel->vtbl.annotate_aub)
    190             intel->vtbl.annotate_aub(intel);
    191 	 if (intel->hw_ctx == NULL || batch->is_blit) {
    192 	    ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
    193 					flags);
    194 	 } else {
    195 	    ret = drm_intel_gem_bo_context_exec(batch->bo, intel->hw_ctx,
    196 						4 * batch->used, flags);
    197 	 }
    198       }
    199    }
    200 
    201    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
    202       do_batch_dump(intel);
    203 
    204    if (ret != 0) {
    205       fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));
    206       exit(1);
    207    }
    208    intel->vtbl.new_batch(intel);
    209 
    210    return ret;
    211 }
    212 
    213 int
    214 _intel_batchbuffer_flush(struct intel_context *intel,
    215 			 const char *file, int line)
    216 {
    217    int ret;
    218 
    219    if (intel->batch.used == 0)
    220       return 0;
    221 
    222    if (intel->first_post_swapbuffers_batch == NULL) {
    223       intel->first_post_swapbuffers_batch = intel->batch.bo;
    224       drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
    225    }
    226 
    227    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
    228       fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
    229 	      4*intel->batch.used);
    230 
    231    intel->batch.reserved_space = 0;
    232 
    233    if (intel->vtbl.finish_batch)
    234       intel->vtbl.finish_batch(intel);
    235 
    236    /* Mark the end of the buffer. */
    237    intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END);
    238    if (intel->batch.used & 1) {
    239       /* Round batchbuffer usage to 2 DWORDs. */
    240       intel_batchbuffer_emit_dword(intel, MI_NOOP);
    241    }
    242 
    243    intel_upload_finish(intel);
    244 
    245    /* Check that we didn't just wrap our batchbuffer at a bad time. */
    246    assert(!intel->no_batch_wrap);
    247 
    248    ret = do_flush_locked(intel);
    249 
    250    if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
    251       fprintf(stderr, "waiting for idle\n");
    252       drm_intel_bo_wait_rendering(intel->batch.bo);
    253    }
    254 
    255    /* Reset the buffer:
    256     */
    257    intel_batchbuffer_reset(intel);
    258 
    259    return ret;
    260 }
    261 
    262 
    263 /*  This is the only way buffers get added to the validate list.
    264  */
    265 bool
    266 intel_batchbuffer_emit_reloc(struct intel_context *intel,
    267                              drm_intel_bo *buffer,
    268                              uint32_t read_domains, uint32_t write_domain,
    269 			     uint32_t delta)
    270 {
    271    int ret;
    272 
    273    ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,
    274 				 buffer, delta,
    275 				 read_domains, write_domain);
    276    assert(ret == 0);
    277    (void)ret;
    278 
    279    /*
    280     * Using the old buffer offset, write in what the right data would be, in case
    281     * the buffer doesn't move and we can short-circuit the relocation processing
    282     * in the kernel
    283     */
    284    intel_batchbuffer_emit_dword(intel, buffer->offset + delta);
    285 
    286    return true;
    287 }
    288 
    289 bool
    290 intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
    291 				    drm_intel_bo *buffer,
    292 				    uint32_t read_domains,
    293 				    uint32_t write_domain,
    294 				    uint32_t delta)
    295 {
    296    int ret;
    297 
    298    ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,
    299 				       buffer, delta,
    300 				       read_domains, write_domain);
    301    assert(ret == 0);
    302    (void)ret;
    303 
    304    /*
    305     * Using the old buffer offset, write in what the right data would
    306     * be, in case the buffer doesn't move and we can short-circuit the
    307     * relocation processing in the kernel
    308     */
    309    intel_batchbuffer_emit_dword(intel, buffer->offset + delta);
    310 
    311    return true;
    312 }
    313 
    314 void
    315 intel_batchbuffer_data(struct intel_context *intel,
    316                        const void *data, GLuint bytes, bool is_blit)
    317 {
    318    assert((bytes & 3) == 0);
    319    intel_batchbuffer_require_space(intel, bytes, is_blit);
    320    __memcpy(intel->batch.map + intel->batch.used, data, bytes);
    321    intel->batch.used += bytes >> 2;
    322 }
    323 
    324 void
    325 intel_batchbuffer_cached_advance(struct intel_context *intel)
    326 {
    327    struct cached_batch_item **prev = &intel->batch.cached_items, *item;
    328    uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);
    329    uint32_t *start = intel->batch.map + intel->batch.emit;
    330    uint16_t op = *start >> 16;
    331 
    332    while (*prev) {
    333       uint32_t *old;
    334 
    335       item = *prev;
    336       old = intel->batch.map + item->header;
    337       if (op == *old >> 16) {
    338 	 if (item->size == sz && memcmp(old, start, sz) == 0) {
    339 	    if (prev != &intel->batch.cached_items) {
    340 	       *prev = item->next;
    341 	       item->next = intel->batch.cached_items;
    342 	       intel->batch.cached_items = item;
    343 	    }
    344 	    intel->batch.used = intel->batch.emit;
    345 	    return;
    346 	 }
    347 
    348 	 goto emit;
    349       }
    350       prev = &item->next;
    351    }
    352 
    353    item = malloc(sizeof(struct cached_batch_item));
    354    if (item == NULL)
    355       return;
    356 
    357    item->next = intel->batch.cached_items;
    358    intel->batch.cached_items = item;
    359 
    360 emit:
    361    item->size = sz;
    362    item->header = intel->batch.emit;
    363 }
    364 
    365 /**
    366  * Restriction [DevSNB, DevIVB]:
    367  *
    368  * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
    369  * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
    370  * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
    371  * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
    372  * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
    373  * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
    374  * unless SW can otherwise guarantee that the pipeline from WM onwards is
    375  * already flushed (e.g., via a preceding MI_FLUSH).
    376  */
    377 void
    378 intel_emit_depth_stall_flushes(struct intel_context *intel)
    379 {
    380    assert(intel->gen >= 6 && intel->gen <= 7);
    381 
    382    BEGIN_BATCH(4);
    383    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
    384    OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
    385    OUT_BATCH(0); /* address */
    386    OUT_BATCH(0); /* write data */
    387    ADVANCE_BATCH()
    388 
    389    BEGIN_BATCH(4);
    390    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
    391    OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH);
    392    OUT_BATCH(0); /* address */
    393    OUT_BATCH(0); /* write data */
    394    ADVANCE_BATCH();
    395 
    396    BEGIN_BATCH(4);
    397    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
    398    OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
    399    OUT_BATCH(0); /* address */
    400    OUT_BATCH(0); /* write data */
    401    ADVANCE_BATCH();
    402 }
    403 
    404 /**
    405  * From the BSpec, volume 2a.03: VS Stage Input / State:
    406  * "[DevIVB] A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
    407  *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
    408  *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
    409  *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
    410  *  to be sent before any combination of VS associated 3DSTATE."
    411  */
    412 void
    413 gen7_emit_vs_workaround_flush(struct intel_context *intel)
    414 {
    415    assert(intel->gen == 7);
    416 
    417    BEGIN_BATCH(4);
    418    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
    419    OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
    420    OUT_RELOC(intel->batch.workaround_bo,
    421 	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
    422    OUT_BATCH(0); /* write data */
    423    ADVANCE_BATCH();
    424 }
    425 
    426 /**
    427  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
    428  * implementing two workarounds on gen6.  From section 1.4.7.1
    429  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
    430  *
    431  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
    432  * produced by non-pipelined state commands), software needs to first
    433  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
    434  * 0.
    435  *
    436  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
    437  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
    438  *
    439  * And the workaround for these two requires this workaround first:
    440  *
    441  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
    442  * BEFORE the pipe-control with a post-sync op and no write-cache
    443  * flushes.
    444  *
    445  * And this last workaround is tricky because of the requirements on
    446  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
    447  * volume 2 part 1:
    448  *
    449  *     "1 of the following must also be set:
    450  *      - Render Target Cache Flush Enable ([12] of DW1)
    451  *      - Depth Cache Flush Enable ([0] of DW1)
    452  *      - Stall at Pixel Scoreboard ([1] of DW1)
    453  *      - Depth Stall ([13] of DW1)
    454  *      - Post-Sync Operation ([13] of DW1)
    455  *      - Notify Enable ([8] of DW1)"
    456  *
    457  * The cache flushes require the workaround flush that triggered this
    458  * one, so we can't use it.  Depth stall would trigger the same.
    459  * Post-sync nonzero is what triggered this second workaround, so we
    460  * can't use that one either.  Notify enable is IRQs, which aren't
    461  * really our business.  That leaves only stall at scoreboard.
    462  */
    463 void
    464 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
    465 {
    466    if (!intel->batch.need_workaround_flush)
    467       return;
    468 
    469    BEGIN_BATCH(4);
    470    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
    471    OUT_BATCH(PIPE_CONTROL_CS_STALL |
    472 	     PIPE_CONTROL_STALL_AT_SCOREBOARD);
    473    OUT_BATCH(0); /* address */
    474    OUT_BATCH(0); /* write data */
    475    ADVANCE_BATCH();
    476 
    477    BEGIN_BATCH(4);
    478    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
    479    OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
    480    OUT_RELOC(intel->batch.workaround_bo,
    481 	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
    482    OUT_BATCH(0); /* write data */
    483    ADVANCE_BATCH();
    484 
    485    intel->batch.need_workaround_flush = false;
    486 }
    487 
    488 /* Emit a pipelined flush to either flush render and texture cache for
    489  * reading from a FBO-drawn texture, or flush so that frontbuffer
    490  * render appears on the screen in DRI1.
    491  *
    492  * This is also used for the always_flush_cache driconf debug option.
    493  */
    494 void
    495 intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
    496 {
    497    if (intel->gen >= 6) {
    498       if (intel->batch.is_blit) {
    499 	 BEGIN_BATCH_BLT(4);
    500 	 OUT_BATCH(MI_FLUSH_DW);
    501 	 OUT_BATCH(0);
    502 	 OUT_BATCH(0);
    503 	 OUT_BATCH(0);
    504 	 ADVANCE_BATCH();
    505       } else {
    506 	 if (intel->gen == 6) {
    507 	    /* Hardware workaround: SNB B-Spec says:
    508 	     *
    509 	     * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
    510 	     * Flush Enable =1, a PIPE_CONTROL with any non-zero
    511 	     * post-sync-op is required.
    512 	     */
    513 	    intel_emit_post_sync_nonzero_flush(intel);
    514 	 }
    515 
    516 	 BEGIN_BATCH(4);
    517 	 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
    518 	 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
    519 		   PIPE_CONTROL_WRITE_FLUSH |
    520 		   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
    521                    PIPE_CONTROL_VF_CACHE_INVALIDATE |
    522 		   PIPE_CONTROL_TC_FLUSH |
    523 		   PIPE_CONTROL_NO_WRITE |
    524                    PIPE_CONTROL_CS_STALL);
    525 	 OUT_BATCH(0); /* write address */
    526 	 OUT_BATCH(0); /* write data */
    527 	 ADVANCE_BATCH();
    528       }
    529    } else if (intel->gen >= 4) {
    530       BEGIN_BATCH(4);
    531       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
    532 		PIPE_CONTROL_WRITE_FLUSH |
    533 		PIPE_CONTROL_NO_WRITE);
    534       OUT_BATCH(0); /* write address */
    535       OUT_BATCH(0); /* write data */
    536       OUT_BATCH(0); /* write data */
    537       ADVANCE_BATCH();
    538    } else {
    539       BEGIN_BATCH(1);
    540       OUT_BATCH(MI_FLUSH);
    541       ADVANCE_BATCH();
    542    }
    543 }
    544