Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_context.h"
     25 #include "brw_defines.h"
     26 #include "intel_batchbuffer.h"
     27 #include "intel_fbo.h"
     28 
     29 /**
     30  * According to the latest documentation, any PIPE_CONTROL with the
     31  * "Command Streamer Stall" bit set must also have another bit set,
     32  * with five different options:
     33  *
     34  *  - Render Target Cache Flush
     35  *  - Depth Cache Flush
     36  *  - Stall at Pixel Scoreboard
     37  *  - Post-Sync Operation
     38  *  - Depth Stall
     39  *  - DC Flush Enable
     40  *
     41  * I chose "Stall at Pixel Scoreboard" since we've used it effectively
     42  * in the past, but the choice is fairly arbitrary.
     43  */
     44 static void
     45 gen8_add_cs_stall_workaround_bits(uint32_t *flags)
     46 {
     47    uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
     48                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
     49                       PIPE_CONTROL_WRITE_IMMEDIATE |
     50                       PIPE_CONTROL_WRITE_DEPTH_COUNT |
     51                       PIPE_CONTROL_WRITE_TIMESTAMP |
     52                       PIPE_CONTROL_STALL_AT_SCOREBOARD |
     53                       PIPE_CONTROL_DEPTH_STALL |
     54                       PIPE_CONTROL_DATA_CACHE_FLUSH;
     55 
     56    /* If we're doing a CS stall, and don't already have one of the
     57     * workaround bits set, add "Stall at Pixel Scoreboard."
     58     */
     59    if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
     60       *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
     61 }
     62 
     63 /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
     64  *
     65  * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
     66  *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
     67  *
     68  * Note that the kernel does CS stalls between batches, so we only need
     69  * to count them within a batch.
     70  */
     71 static uint32_t
     72 gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
     73 {
     74    const struct gen_device_info *devinfo = &brw->screen->devinfo;
     75 
     76    if (devinfo->gen == 7 && !devinfo->is_haswell) {
     77       if (flags & PIPE_CONTROL_CS_STALL) {
     78          /* If we're doing a CS stall, reset the counter and carry on. */
     79          brw->pipe_controls_since_last_cs_stall = 0;
     80          return 0;
     81       }
     82 
     83       /* If this is the fourth pipe control without a CS stall, do one now. */
     84       if (++brw->pipe_controls_since_last_cs_stall == 4) {
     85          brw->pipe_controls_since_last_cs_stall = 0;
     86          return PIPE_CONTROL_CS_STALL;
     87       }
     88    }
     89    return 0;
     90 }
     91 
     92 /* #1130 from gen10 workarounds page in h/w specs:
     93  * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
     94  *  not enabled in same PIPE CONTROL and Enable Pixel score board stall if
     95  *  Render target cache flush is enabled."
     96  *
     97  * Applicable to CNL B0 and C0 steppings only.
     98  */
     99 static void
    100 gen10_add_rcpfe_workaround_bits(uint32_t *flags)
    101 {
    102    if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
    103       *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
    104    } else if (*flags &
    105              (PIPE_CONTROL_WRITE_IMMEDIATE |
    106               PIPE_CONTROL_WRITE_DEPTH_COUNT |
    107               PIPE_CONTROL_WRITE_TIMESTAMP)) {
    108       *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
    109    }
    110 }
    111 
    112 static void
    113 brw_emit_pipe_control(struct brw_context *brw, uint32_t flags,
    114                       struct brw_bo *bo, uint32_t offset, uint64_t imm)
    115 {
    116    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    117 
    118    if (devinfo->gen >= 8) {
    119       if (devinfo->gen == 8)
    120          gen8_add_cs_stall_workaround_bits(&flags);
    121 
    122       if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
    123          if (devinfo->gen == 9) {
    124             /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
    125              * lists several workarounds:
    126              *
    127              *    "Project: SKL, KBL, BXT
    128              *
    129              *     If the VF Cache Invalidation Enable is set to a 1 in a
    130              *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
    131              *     sets to 0, with the VF Cache Invalidation Enable set to 0
    132              *     needs to be sent prior to the PIPE_CONTROL with VF Cache
    133              *     Invalidation Enable set to a 1."
    134              */
    135             brw_emit_pipe_control_flush(brw, 0);
    136          }
    137 
    138          if (devinfo->gen >= 9) {
    139             /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
    140              *
    141              *    "Project: BDW+
    142              *
    143              *     When VF Cache Invalidate is set Post Sync Operation must
    144              *     be enabled to Write Immediate Data or Write PS Depth
    145              *     Count or Write Timestamp."
    146              *
    147              * If there's a BO, we're already doing some kind of write.
    148              * If not, add a write to the workaround BO.
    149              *
    150              * XXX: This causes GPU hangs on Broadwell, so restrict it to
    151              *      Gen9+ for now...see this bug for more information:
    152              *      https://bugs.freedesktop.org/show_bug.cgi?id=103787
    153              */
    154             if (!bo) {
    155                flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
    156                bo = brw->workaround_bo;
    157             }
    158          }
    159       }
    160 
    161       if (devinfo->gen == 10)
    162          gen10_add_rcpfe_workaround_bits(&flags);
    163 
    164       BEGIN_BATCH(6);
    165       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
    166       OUT_BATCH(flags);
    167       if (bo) {
    168          OUT_RELOC64(bo, RELOC_WRITE, offset);
    169       } else {
    170          OUT_BATCH(0);
    171          OUT_BATCH(0);
    172       }
    173       OUT_BATCH(imm);
    174       OUT_BATCH(imm >> 32);
    175       ADVANCE_BATCH();
    176    } else if (devinfo->gen >= 6) {
    177       if (devinfo->gen == 6 &&
    178           (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
    179          /* Hardware workaround: SNB B-Spec says:
    180           *
    181           *   [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
    182           *   Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
    183           *   required.
    184           */
    185          brw_emit_post_sync_nonzero_flush(brw);
    186       }
    187 
    188       flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
    189 
    190       /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
    191        * on later platforms.  We always use PPGTT on Gen7+.
    192        */
    193       unsigned gen6_gtt = devinfo->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
    194 
    195       BEGIN_BATCH(5);
    196       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
    197       OUT_BATCH(flags);
    198       if (bo) {
    199          OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset);
    200       } else {
    201          OUT_BATCH(0);
    202       }
    203       OUT_BATCH(imm);
    204       OUT_BATCH(imm >> 32);
    205       ADVANCE_BATCH();
    206    } else {
    207       BEGIN_BATCH(4);
    208       OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
    209       if (bo) {
    210          OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
    211       } else {
    212          OUT_BATCH(0);
    213       }
    214       OUT_BATCH(imm);
    215       OUT_BATCH(imm >> 32);
    216       ADVANCE_BATCH();
    217    }
    218 }
    219 
    220 /**
    221  * Emit a PIPE_CONTROL with various flushing flags.
    222  *
    223  * The caller is responsible for deciding what flags are appropriate for the
    224  * given generation.
    225  */
    226 void
    227 brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
    228 {
    229    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    230 
    231    if (devinfo->gen >= 6 &&
    232        (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
    233        (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
    234       /* A pipe control command with flush and invalidate bits set
    235        * simultaneously is an inherently racy operation on Gen6+ if the
    236        * contents of the flushed caches were intended to become visible from
    237        * any of the invalidated caches.  Split it in two PIPE_CONTROLs, the
    238        * first one should stall the pipeline to make sure that the flushed R/W
    239        * caches are coherent with memory once the specified R/O caches are
    240        * invalidated.  On pre-Gen6 hardware the (implicit) R/O cache
    241        * invalidation seems to happen at the bottom of the pipeline together
    242        * with any write cache flush, so this shouldn't be a concern.  In order
    243        * to ensure a full stall, we do an end-of-pipe sync.
    244        */
    245       brw_emit_end_of_pipe_sync(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS));
    246       flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
    247    }
    248 
    249    brw_emit_pipe_control(brw, flags, NULL, 0, 0);
    250 }
    251 
    252 /**
    253  * Emit a PIPE_CONTROL that writes to a buffer object.
    254  *
    255  * \p flags should contain one of the following items:
    256  *  - PIPE_CONTROL_WRITE_IMMEDIATE
    257  *  - PIPE_CONTROL_WRITE_TIMESTAMP
    258  *  - PIPE_CONTROL_WRITE_DEPTH_COUNT
    259  */
    260 void
    261 brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
    262                             struct brw_bo *bo, uint32_t offset,
    263                             uint64_t imm)
    264 {
    265    brw_emit_pipe_control(brw, flags, bo, offset, imm);
    266 }
    267 
    268 /**
    269  * Restriction [DevSNB, DevIVB]:
    270  *
    271  * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
    272  * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
    273  * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
    274  * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
    275  * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
    276  * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
    277  * unless SW can otherwise guarantee that the pipeline from WM onwards is
    278  * already flushed (e.g., via a preceding MI_FLUSH).
    279  */
    280 void
    281 brw_emit_depth_stall_flushes(struct brw_context *brw)
    282 {
    283    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    284 
    285    assert(devinfo->gen >= 6);
    286 
    287    /* Starting on BDW, these pipe controls are unnecessary.
    288     *
    289     *   WM HW will internally manage the draining pipe and flushing of the caches
    290     *   when this command is issued. The PIPE_CONTROL restrictions are removed.
    291     */
    292    if (devinfo->gen >= 8)
    293       return;
    294 
    295    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
    296    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
    297    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
    298 }
    299 
    300 /**
    301  * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
    302  * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
    303  *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
    304  *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
    305  *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
    306  *  to be sent before any combination of VS associated 3DSTATE."
    307  */
    308 void
    309 gen7_emit_vs_workaround_flush(struct brw_context *brw)
    310 {
    311    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    312 
    313    assert(devinfo->gen == 7);
    314    brw_emit_pipe_control_write(brw,
    315                                PIPE_CONTROL_WRITE_IMMEDIATE
    316                                | PIPE_CONTROL_DEPTH_STALL,
    317                                brw->workaround_bo, 0, 0);
    318 }
    319 
    320 /**
    321  * From the PRM, Volume 2a:
    322  *
    323  *    "Indirect State Pointers Disable
    324  *
    325  *    At the completion of the post-sync operation associated with this pipe
    326  *    control packet, the indirect state pointers in the hardware are
    327  *    considered invalid; the indirect pointers are not saved in the context.
    328  *    If any new indirect state commands are executed in the command stream
    329  *    while the pipe control is pending, the new indirect state commands are
    330  *    preserved.
    331  *
    332  *    [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context
    333  *    restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant
    334  *    commands are only considered as Indirect State Pointers. Once ISP is
    335  *    issued in a context, SW must initialize by programming push constant
    336  *    commands for all the shaders (at least to zero length) before attempting
    337  *    any rendering operation for the same context."
    338  *
    339  * 3DSTATE_CONSTANT_* packets are restored during a context restore,
    340  * even though they point to a BO that has been already unreferenced at
    341  * the end of the previous batch buffer. This has been fine so far since
    342  * we are protected by these scratch page (every address not covered by
    343  * a BO should be pointing to the scratch page). But on CNL, it is
    344  * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*
    345  * instruction.
    346  *
    347  * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the
    348  * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a
    349  * context restore, so the mentioned hang doesn't happen. However,
    350  * software must program push constant commands for all stages prior to
    351  * rendering anything, so we flag them as dirty.
    352  */
    353 void
    354 gen10_emit_isp_disable(struct brw_context *brw)
    355 {
    356    brw_emit_pipe_control(brw,
    357                          PIPE_CONTROL_ISP_DIS |
    358                          PIPE_CONTROL_CS_STALL,
    359                          NULL, 0, 0);
    360 
    361    brw->vs.base.push_constants_dirty = true;
    362    brw->tcs.base.push_constants_dirty = true;
    363    brw->tes.base.push_constants_dirty = true;
    364    brw->gs.base.push_constants_dirty = true;
    365    brw->wm.base.push_constants_dirty = true;
    366 }
    367 
    368 /**
    369  * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
    370  */
    371 void
    372 gen7_emit_cs_stall_flush(struct brw_context *brw)
    373 {
    374    brw_emit_pipe_control_write(brw,
    375                                PIPE_CONTROL_CS_STALL
    376                                | PIPE_CONTROL_WRITE_IMMEDIATE,
    377                                brw->workaround_bo, 0, 0);
    378 }
    379 
    380 /**
    381  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
    382  * implementing two workarounds on gen6.  From section 1.4.7.1
    383  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
    384  *
    385  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
    386  * produced by non-pipelined state commands), software needs to first
    387  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
    388  * 0.
    389  *
    390  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
    391  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
    392  *
    393  * And the workaround for these two requires this workaround first:
    394  *
    395  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
    396  * BEFORE the pipe-control with a post-sync op and no write-cache
    397  * flushes.
    398  *
    399  * And this last workaround is tricky because of the requirements on
    400  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
    401  * volume 2 part 1:
    402  *
    403  *     "1 of the following must also be set:
    404  *      - Render Target Cache Flush Enable ([12] of DW1)
    405  *      - Depth Cache Flush Enable ([0] of DW1)
    406  *      - Stall at Pixel Scoreboard ([1] of DW1)
    407  *      - Depth Stall ([13] of DW1)
    408  *      - Post-Sync Operation ([13] of DW1)
    409  *      - Notify Enable ([8] of DW1)"
    410  *
    411  * The cache flushes require the workaround flush that triggered this
    412  * one, so we can't use it.  Depth stall would trigger the same.
    413  * Post-sync nonzero is what triggered this second workaround, so we
    414  * can't use that one either.  Notify enable is IRQs, which aren't
    415  * really our business.  That leaves only stall at scoreboard.
    416  */
    417 void
    418 brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
    419 {
    420    brw_emit_pipe_control_flush(brw,
    421                                PIPE_CONTROL_CS_STALL |
    422                                PIPE_CONTROL_STALL_AT_SCOREBOARD);
    423 
    424    brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
    425                                brw->workaround_bo, 0, 0);
    426 }
    427 
    428 /*
    429  * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
    430  *
    431  *  Write synchronization is a special case of end-of-pipe
    432  *  synchronization that requires that the render cache and/or depth
    433  *  related caches are flushed to memory, where the data will become
    434  *  globally visible. This type of synchronization is required prior to
    435  *  SW (CPU) actually reading the result data from memory, or initiating
    436  *  an operation that will use as a read surface (such as a texture
    437  *  surface) a previous render target and/or depth/stencil buffer
    438  *
    439  *
    440  * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
    441  *
    442  *  Exercising the write cache flush bits (Render Target Cache Flush
    443  *  Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
    444  *  ensures the write caches are flushed and doesn't guarantee the data
    445  *  is globally visible.
    446  *
    447  *  SW can track the completion of the end-of-pipe-synchronization by
    448  *  using "Notify Enable" and "PostSync Operation - Write Immediate
    449  *  Data" in the PIPE_CONTROL command.
    450  */
    451 void
    452 brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags)
    453 {
    454    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    455 
    456    if (devinfo->gen >= 6) {
    457       /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
    458        *
    459        *    "The most common action to perform upon reaching a synchronization
    460        *    point is to write a value out to memory. An immediate value
    461        *    (included with the synchronization command) may be written."
    462        *
    463        *
    464        * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
    465        *
    466        *    "In case the data flushed out by the render engine is to be read
    467        *    back in to the render engine in coherent manner, then the render
    468        *    engine has to wait for the fence completion before accessing the
    469        *    flushed data. This can be achieved by following means on various
    470        *    products: PIPE_CONTROL command with CS Stall and the required
    471        *    write caches flushed with Post-Sync-Operation as Write Immediate
    472        *    Data.
    473        *
    474        *    Example:
    475        *       - Workload-1 (3D/GPGPU/MEDIA)
    476        *       - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
    477        *         Data, Required Write Cache Flush bits set)
    478        *       - Workload-2 (Can use the data produce or output by Workload-1)
    479        */
    480       brw_emit_pipe_control_write(brw,
    481                                   flags | PIPE_CONTROL_CS_STALL |
    482                                   PIPE_CONTROL_WRITE_IMMEDIATE,
    483                                   brw->workaround_bo, 0, 0);
    484 
    485       if (devinfo->is_haswell) {
    486          /* Haswell needs addition work-arounds:
    487           *
    488           * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
    489           *
    490           *    Option 1:
    491           *    PIPE_CONTROL command with the CS Stall and the required write
    492           *    caches flushed with Post-SyncOperation as Write Immediate Data
    493           *    followed by eight dummy MI_STORE_DATA_IMM (write to scratch
    494           *    spce) commands.
    495           *
    496           *    Example:
    497           *       - Workload-1
    498           *       - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
    499           *         Immediate Data, Required Write Cache Flush bits set)
    500           *       - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
    501           *       - Workload-2 (Can use the data produce or output by
    502           *         Workload-1)
    503           *
    504           * Unfortunately, both the PRMs and the internal docs are a bit
    505           * out-of-date in this regard.  What the windows driver does (and
    506           * this appears to actually work) is to emit a register read from the
    507           * memory address written by the pipe control above.
    508           *
    509           * What register we load into doesn't matter.  We choose an indirect
    510           * rendering register because we know it always exists and it's one
    511           * of the first registers the command parser allows us to write.  If
    512           * you don't have command parser support in your kernel (pre-4.2),
    513           * this will get turned into MI_NOOP and you won't get the
    514           * workaround.  Unfortunately, there's just not much we can do in
    515           * that case.  This register is perfectly safe to write since we
    516           * always re-load all of the indirect draw registers right before
    517           * 3DPRIMITIVE when needed anyway.
    518           */
    519          brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE,
    520                                brw->workaround_bo, 0);
    521       }
    522    } else {
    523       /* On gen4-5, a regular pipe control seems to suffice. */
    524       brw_emit_pipe_control_flush(brw, flags);
    525    }
    526 }
    527 
    528 /* Emit a pipelined flush to either flush render and texture cache for
    529  * reading from a FBO-drawn texture, or flush so that frontbuffer
    530  * render appears on the screen in DRI1.
    531  *
    532  * This is also used for the always_flush_cache driconf debug option.
    533  */
    534 void
    535 brw_emit_mi_flush(struct brw_context *brw)
    536 {
    537    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    538 
    539    if (brw->batch.ring == BLT_RING && devinfo->gen >= 6) {
    540       const unsigned n_dwords = devinfo->gen >= 8 ? 5 : 4;
    541       BEGIN_BATCH_BLT(n_dwords);
    542       OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2));
    543       OUT_BATCH(0);
    544       OUT_BATCH(0);
    545       OUT_BATCH(0);
    546       if (n_dwords == 5)
    547          OUT_BATCH(0);
    548       ADVANCE_BATCH();
    549    } else {
    550       int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
    551       if (devinfo->gen >= 6) {
    552          flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
    553                   PIPE_CONTROL_CONST_CACHE_INVALIDATE |
    554                   PIPE_CONTROL_DATA_CACHE_FLUSH |
    555                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
    556                   PIPE_CONTROL_VF_CACHE_INVALIDATE |
    557                   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
    558                   PIPE_CONTROL_CS_STALL;
    559       }
    560       brw_emit_pipe_control_flush(brw, flags);
    561    }
    562 }
    563 
    564 int
    565 brw_init_pipe_control(struct brw_context *brw,
    566                       const struct gen_device_info *devinfo)
    567 {
    568    if (devinfo->gen < 6)
    569       return 0;
    570 
    571    /* We can't just use brw_state_batch to get a chunk of space for
    572     * the gen6 workaround because it involves actually writing to
    573     * the buffer, and the kernel doesn't let us write to the batch.
    574     */
    575    brw->workaround_bo = brw_bo_alloc(brw->bufmgr,
    576                                      "pipe_control workaround",
    577                                      4096, 4096);
    578    if (brw->workaround_bo == NULL)
    579       return -ENOMEM;
    580 
    581    brw->pipe_controls_since_last_cs_stall = 0;
    582 
    583    return 0;
    584 }
    585 
    586 void
    587 brw_fini_pipe_control(struct brw_context *brw)
    588 {
    589    brw_bo_unreference(brw->workaround_bo);
    590 }
    591