Home | History | Annotate | Download | only in i965
      1 #ifndef INTEL_BATCHBUFFER_H
      2 #define INTEL_BATCHBUFFER_H
      3 
      4 #include "main/mtypes.h"
      5 
      6 #include "brw_context.h"
      7 #include "intel_bufmgr.h"
      8 
      9 #ifdef __cplusplus
     10 extern "C" {
     11 #endif
     12 
     13 /**
     14  * Number of bytes to reserve for commands necessary to complete a batch.
     15  *
     16  * This includes:
     17  * - MI_BATCHBUFFER_END (4 bytes)
     18  * - Optional MI_NOOP for ensuring the batch length is qword aligned (4 bytes)
     19  * - Any state emitted by vtbl->finish_batch():
     20  *   - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes)
     21  *   - Disabling OA counters on Gen6+ (3 DWords = 12 bytes)
     22  *   - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs:
     23  *     - Two sets of PIPE_CONTROLs, which become 4 PIPE_CONTROLs each on SNB,
     24  *       which are 5 DWords each ==> 2 * 4 * 5 * 4 = 160 bytes
     25  *     - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+.  ==> 12 bytes.
     26  *       On Ironlake, it's 6 DWords, but we have some slack due to the lack of
     27  *       Sandybridge PIPE_CONTROL madness.
     28  *   - CC_STATE workaround on HSW (17 * 4 = 68 bytes)
     29  *     - 10 dwords for initial mi_flush
     30  *     - 2 dwords for CC state setup
     31  *     - 5 dwords for the required pipe control at the end
     32  *   - Restoring L3 configuration: (24 dwords = 96 bytes)
     33  *     - 2*6 dwords for two PIPE_CONTROL flushes.
     34  *     - 7 dwords for L3 configuration set-up.
     35  *     - 5 dwords for L3 atomic set-up (on HSW).
     36  */
     37 #define BATCH_RESERVED 308
     38 
     39 struct intel_batchbuffer;
     40 
     41 void intel_batchbuffer_emit_render_ring_prelude(struct brw_context *brw);
     42 void intel_batchbuffer_init(struct intel_batchbuffer *batch, dri_bufmgr *bufmgr,
     43                             bool has_llc);
     44 void intel_batchbuffer_free(struct intel_batchbuffer *batch);
     45 void intel_batchbuffer_save_state(struct brw_context *brw);
     46 void intel_batchbuffer_reset_to_saved(struct brw_context *brw);
     47 void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
     48                                      enum brw_gpu_ring ring);
     49 
     50 int _intel_batchbuffer_flush(struct brw_context *brw,
     51 			     const char *file, int line);
     52 
     53 #define intel_batchbuffer_flush(intel) \
     54 	_intel_batchbuffer_flush(intel, __FILE__, __LINE__)
     55 
     56 
     57 
     58 /* Unlike bmBufferData, this currently requires the buffer be mapped.
     59  * Consider it a convenience function wrapping multple
     60  * intel_buffer_dword() calls.
     61  */
     62 void intel_batchbuffer_data(struct brw_context *brw,
     63                             const void *data, GLuint bytes,
     64                             enum brw_gpu_ring ring);
     65 
     66 uint32_t intel_batchbuffer_reloc(struct intel_batchbuffer *batch,
     67                                  drm_intel_bo *buffer,
     68                                  uint32_t offset,
     69                                  uint32_t read_domains,
     70                                  uint32_t write_domain,
     71                                  uint32_t delta);
     72 uint64_t intel_batchbuffer_reloc64(struct intel_batchbuffer *batch,
     73                                    drm_intel_bo *buffer,
     74                                    uint32_t offset,
     75                                    uint32_t read_domains,
     76                                    uint32_t write_domain,
     77                                    uint32_t delta);
     78 
     79 #define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map))
     80 
     81 static inline uint32_t float_as_int(float f)
     82 {
     83    union {
     84       float f;
     85       uint32_t d;
     86    } fi;
     87 
     88    fi.f = f;
     89    return fi.d;
     90 }
     91 
     92 /* Inline functions - might actually be better off with these
     93  * non-inlined.  Certainly better off switching all command packets to
     94  * be passed as structs rather than dwords, but that's a little bit of
     95  * work...
     96  */
     97 static inline unsigned
     98 intel_batchbuffer_space(struct intel_batchbuffer *batch)
     99 {
    100    return (batch->state_batch_offset - batch->reserved_space)
    101       - USED_BATCH(*batch) * 4;
    102 }
    103 
    104 
    105 static inline void
    106 intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
    107 {
    108 #ifdef DEBUG
    109    assert(intel_batchbuffer_space(batch) >= 4);
    110 #endif
    111    *batch->map_next++ = dword;
    112    assert(batch->ring != UNKNOWN_RING);
    113 }
    114 
    115 static inline void
    116 intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f)
    117 {
    118    intel_batchbuffer_emit_dword(batch, float_as_int(f));
    119 }
    120 
    121 static inline void
    122 intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring)
    123 {
    124    intel_batchbuffer_require_space(brw, n * 4, ring);
    125 
    126 #ifdef DEBUG
    127    brw->batch.emit = USED_BATCH(brw->batch);
    128    brw->batch.total = n;
    129 #endif
    130 }
    131 
    132 static inline void
    133 intel_batchbuffer_advance(struct brw_context *brw)
    134 {
    135 #ifdef DEBUG
    136    struct intel_batchbuffer *batch = &brw->batch;
    137    unsigned int _n = USED_BATCH(*batch) - batch->emit;
    138    assert(batch->total != 0);
    139    if (_n != batch->total) {
    140       fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
    141 	      _n, batch->total);
    142       abort();
    143    }
    144    batch->total = 0;
    145 #else
    146    (void) brw;
    147 #endif
    148 }
    149 
    150 #define BEGIN_BATCH(n) do {                            \
    151    intel_batchbuffer_begin(brw, (n), RENDER_RING);     \
    152    uint32_t *__map = brw->batch.map_next;              \
    153    brw->batch.map_next += (n)
    154 
    155 #define BEGIN_BATCH_BLT(n) do {                        \
    156    intel_batchbuffer_begin(brw, (n), BLT_RING);        \
    157    uint32_t *__map = brw->batch.map_next;              \
    158    brw->batch.map_next += (n)
    159 
    160 #define OUT_BATCH(d) *__map++ = (d)
    161 #define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
    162 
    163 #define OUT_RELOC(buf, read_domains, write_domain, delta) do {    \
    164    uint32_t __offset = (__map - brw->batch.map) * 4;              \
    165    OUT_BATCH(intel_batchbuffer_reloc(&brw->batch, (buf), __offset, \
    166                                      (read_domains),              \
    167                                      (write_domain),              \
    168                                      (delta)));                   \
    169 } while (0)
    170 
    171 /* Handle 48-bit address relocations for Gen8+ */
    172 #define OUT_RELOC64(buf, read_domains, write_domain, delta) do {      \
    173    uint32_t __offset = (__map - brw->batch.map) * 4;                  \
    174    uint64_t reloc64 = intel_batchbuffer_reloc64(&brw->batch, (buf), __offset, \
    175                                                 (read_domains),       \
    176                                                 (write_domain),       \
    177                                                 (delta));             \
    178    OUT_BATCH(reloc64);                                                \
    179    OUT_BATCH(reloc64 >> 32);                                          \
    180 } while (0)
    181 
    182 #define ADVANCE_BATCH()                  \
    183    assert(__map == brw->batch.map_next); \
    184    intel_batchbuffer_advance(brw);       \
    185 } while (0)
    186 
    187 #ifdef __cplusplus
    188 }
    189 #endif
    190 
    191 #endif
    192