1 /* 2 * Copyright 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <assert.h> 25 26 #include "intel_batchbuffer.h" 27 #include "intel_mipmap_tree.h" 28 #include "intel_fbo.h" 29 30 #include "brw_context.h" 31 #include "brw_state.h" 32 33 #include "blorp/blorp_genX_exec.h" 34 35 #include "brw_blorp.h" 36 37 static void * 38 blorp_emit_dwords(struct blorp_batch *batch, unsigned n) 39 { 40 assert(batch->blorp->driver_ctx == batch->driver_batch); 41 struct brw_context *brw = batch->driver_batch; 42 43 intel_batchbuffer_begin(brw, n, RENDER_RING); 44 uint32_t *map = brw->batch.map_next; 45 brw->batch.map_next += n; 46 intel_batchbuffer_advance(brw); 47 return map; 48 } 49 50 static uint64_t 51 blorp_emit_reloc(struct blorp_batch *batch, 52 void *location, struct blorp_address address, uint32_t delta) 53 { 54 assert(batch->blorp->driver_ctx == batch->driver_batch); 55 struct brw_context *brw = batch->driver_batch; 56 57 uint32_t offset = (char *)location - (char *)brw->batch.map; 58 if (brw->gen >= 8) { 59 return intel_batchbuffer_reloc64(&brw->batch, address.buffer, offset, 60 address.read_domains, 61 address.write_domain, 62 address.offset + delta); 63 } else { 64 return intel_batchbuffer_reloc(&brw->batch, address.buffer, offset, 65 address.read_domains, 66 address.write_domain, 67 address.offset + delta); 68 } 69 } 70 71 static void 72 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, 73 struct blorp_address address, uint32_t delta) 74 { 75 assert(batch->blorp->driver_ctx == batch->driver_batch); 76 struct brw_context *brw = batch->driver_batch; 77 drm_intel_bo *bo = address.buffer; 78 79 drm_intel_bo_emit_reloc(brw->batch.bo, ss_offset, 80 bo, address.offset + delta, 81 address.read_domains, address.write_domain); 82 83 uint64_t reloc_val = bo->offset64 + address.offset + delta; 84 void *reloc_ptr = (void *)brw->batch.map + ss_offset; 85 #if GEN_GEN >= 8 86 *(uint64_t *)reloc_ptr = reloc_val; 87 #else 88 *(uint32_t *)reloc_ptr = reloc_val; 89 #endif 90 } 91 92 static void * 93 blorp_alloc_dynamic_state(struct blorp_batch *batch, 94 enum aub_state_struct_type type, 95 uint32_t size, 96 uint32_t alignment, 97 uint32_t *offset) 98 { 99 assert(batch->blorp->driver_ctx == batch->driver_batch); 100 struct brw_context *brw = batch->driver_batch; 101 102 return brw_state_batch(brw, type, size, alignment, offset); 103 } 104 105 static void 106 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, 107 unsigned state_size, unsigned state_alignment, 108 uint32_t *bt_offset, uint32_t *surface_offsets, 109 void **surface_maps) 110 { 111 assert(batch->blorp->driver_ctx == batch->driver_batch); 112 struct brw_context *brw = batch->driver_batch; 113 114 uint32_t *bt_map = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, 115 num_entries * sizeof(uint32_t), 32, 116 bt_offset); 117 118 for (unsigned i = 0; i < num_entries; i++) { 119 surface_maps[i] = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 120 state_size, state_alignment, 121 &(surface_offsets)[i]); 122 bt_map[i] = surface_offsets[i]; 123 } 124 } 125 126 static void * 127 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, 128 struct blorp_address *addr) 129 { 130 assert(batch->blorp->driver_ctx == batch->driver_batch); 131 struct brw_context *brw = batch->driver_batch; 132 133 /* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS: 134 * 135 * "The VF cache needs to be invalidated before binding and then using 136 * Vertex Buffers that overlap with any previously bound Vertex Buffer 137 * (at a 64B granularity) since the last invalidation. A VF cache 138 * invalidate is performed by setting the "VF Cache Invalidation Enable" 139 * bit in PIPE_CONTROL." 140 * 141 * This restriction first appears in the Skylake PRM but the internal docs 142 * also list it as being an issue on Broadwell. In order to avoid this 143 * problem, we align all vertex buffer allocations to 64 bytes. 144 */ 145 uint32_t offset; 146 void *data = brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER, 147 size, 64, &offset); 148 149 *addr = (struct blorp_address) { 150 .buffer = brw->batch.bo, 151 .read_domains = I915_GEM_DOMAIN_VERTEX, 152 .write_domain = 0, 153 .offset = offset, 154 }; 155 156 return data; 157 } 158 159 static void 160 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size) 161 { 162 /* All allocated states come from the batch which we will flush before we 163 * submit it. There's nothing for us to do here. 164 */ 165 } 166 167 static void 168 blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size) 169 { 170 assert(batch->blorp->driver_ctx == batch->driver_batch); 171 struct brw_context *brw = batch->driver_batch; 172 173 #if GEN_GEN >= 7 174 if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) && 175 brw->urb.vsize >= vs_entry_size) 176 return; 177 178 brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; 179 180 gen7_upload_urb(brw, vs_entry_size, false, false); 181 #else 182 gen6_upload_urb(brw, vs_entry_size, false, 0); 183 #endif 184 } 185 186 void 187 genX(blorp_exec)(struct blorp_batch *batch, 188 const struct blorp_params *params) 189 { 190 assert(batch->blorp->driver_ctx == batch->driver_batch); 191 struct brw_context *brw = batch->driver_batch; 192 struct gl_context *ctx = &brw->ctx; 193 const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1920 : 1500; 194 bool check_aperture_failed_once = false; 195 196 /* Flush the sampler and render caches. We definitely need to flush the 197 * sampler cache so that we get updated contents from the render cache for 198 * the glBlitFramebuffer() source. Also, we are sometimes warned in the 199 * docs to flush the cache between reinterpretations of the same surface 200 * data with different formats, which blorp does for stencil and depth 201 * data. 202 */ 203 if (params->src.enabled) 204 brw_render_cache_set_check_flush(brw, params->src.addr.buffer); 205 brw_render_cache_set_check_flush(brw, params->dst.addr.buffer); 206 207 brw_select_pipeline(brw, BRW_RENDER_PIPELINE); 208 209 retry: 210 intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING); 211 intel_batchbuffer_save_state(brw); 212 drm_intel_bo *saved_bo = brw->batch.bo; 213 uint32_t saved_used = USED_BATCH(brw->batch); 214 uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; 215 216 #if GEN_GEN == 6 217 /* Emit workaround flushes when we switch from drawing to blorping. */ 218 brw_emit_post_sync_nonzero_flush(brw); 219 #endif 220 221 brw_upload_state_base_address(brw); 222 223 #if GEN_GEN >= 8 224 gen7_l3_state.emit(brw); 225 #endif 226 227 if (brw->use_resource_streamer) 228 gen7_disable_hw_binding_tables(brw); 229 230 brw_emit_depth_stall_flushes(brw); 231 232 #if GEN_GEN == 8 233 gen8_write_pma_stall_bits(brw, 0); 234 #endif 235 236 blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 237 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; 238 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; 239 } 240 241 blorp_exec(batch, params); 242 243 /* Make sure we didn't wrap the batch unintentionally, and make sure we 244 * reserved enough space that a wrap will never happen. 245 */ 246 assert(brw->batch.bo == saved_bo); 247 assert((USED_BATCH(brw->batch) - saved_used) * 4 + 248 (saved_state_batch_offset - brw->batch.state_batch_offset) < 249 estimated_max_batch_usage); 250 /* Shut up compiler warnings on release build */ 251 (void)saved_bo; 252 (void)saved_used; 253 (void)saved_state_batch_offset; 254 255 /* Check if the blorp op we just did would make our batch likely to fail to 256 * map all the BOs into the GPU at batch exec time later. If so, flush the 257 * batch and try again with nothing else in the batch. 258 */ 259 if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { 260 if (!check_aperture_failed_once) { 261 check_aperture_failed_once = true; 262 intel_batchbuffer_reset_to_saved(brw); 263 intel_batchbuffer_flush(brw); 264 goto retry; 265 } else { 266 int ret = intel_batchbuffer_flush(brw); 267 WARN_ONCE(ret == -ENOSPC, 268 "i965: blorp emit exceeded available aperture space\n"); 269 } 270 } 271 272 if (unlikely(brw->always_flush_batch)) 273 intel_batchbuffer_flush(brw); 274 275 /* We've smashed all state compared to what the normal 3D pipeline 276 * rendering tracks for GL. 277 */ 278 brw->ctx.NewDriverState |= BRW_NEW_BLORP; 279 brw->no_depth_or_stencil = false; 280 brw->ib.type = -1; 281 282 if (params->dst.enabled) 283 brw_render_cache_set_add_bo(brw, params->dst.addr.buffer); 284 if (params->depth.enabled) 285 brw_render_cache_set_add_bo(brw, params->depth.addr.buffer); 286 if (params->stencil.enabled) 287 brw_render_cache_set_add_bo(brw, params->stencil.addr.buffer); 288 } 289