1 /* 2 * Copyright 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file brw_binding_tables.c 26 * 27 * State atoms which upload the "binding table" for each shader stage. 28 * 29 * Binding tables map a numeric "surface index" to the SURFACE_STATE structure 30 * for a currently bound surface. This allows SEND messages (such as sampler 31 * or data port messages) to refer to a particular surface by number, rather 32 * than by pointer. 33 * 34 * The binding table is stored as a (sparse) array of SURFACE_STATE entries; 35 * surface indexes are simply indexes into the array. The ordering of the 36 * entries is entirely left up to software; see the SURF_INDEX_* macros in 37 * brw_context.h to see our current layout. 38 */ 39 40 #include "main/mtypes.h" 41 42 #include "brw_context.h" 43 #include "brw_defines.h" 44 #include "brw_state.h" 45 #include "intel_batchbuffer.h" 46 47 static const GLuint stage_to_bt_edit[] = { 48 [MESA_SHADER_VERTEX] = _3DSTATE_BINDING_TABLE_EDIT_VS, 49 [MESA_SHADER_GEOMETRY] = _3DSTATE_BINDING_TABLE_EDIT_GS, 50 [MESA_SHADER_FRAGMENT] = _3DSTATE_BINDING_TABLE_EDIT_PS, 51 }; 52 53 static uint32_t 54 reserve_hw_bt_space(struct brw_context *brw, unsigned bytes) 55 { 56 /* From the Broadwell PRM, Volume 16, "Workarounds", 57 * WaStateBindingTableOverfetch: 58 * "HW over-fetches two cache lines of binding table indices. When 59 * using the resource streamer, SW needs to pad binding table pointer 60 * updates with an additional two cache lines." 61 * 62 * Cache lines are 64 bytes, so we subtract 128 bytes from the size of 63 * the binding table pool buffer. 64 */ 65 if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) { 66 gen7_reset_hw_bt_pool_offsets(brw); 67 } 68 69 uint32_t offset = brw->hw_bt_pool.next_offset; 70 71 /* From the Haswell PRM, Volume 2b: Command Reference: Instructions, 72 * 3DSTATE_BINDING_TABLE_POINTERS_xS: 73 * 74 * "If HW Binding Table is enabled, the offset is relative to the 75 * Binding Table Pool Base Address and the alignment is 64 bytes." 76 */ 77 brw->hw_bt_pool.next_offset += ALIGN(bytes, 64); 78 79 return offset; 80 } 81 82 /** 83 * Upload a shader stage's binding table as indirect state. 84 * 85 * This copies brw_stage_state::surf_offset[] into the indirect state section 86 * of the batchbuffer (allocated by brw_state_batch()). 87 */ 88 void 89 brw_upload_binding_table(struct brw_context *brw, 90 uint32_t packet_name, 91 const struct brw_stage_prog_data *prog_data, 92 struct brw_stage_state *stage_state) 93 { 94 if (prog_data->binding_table.size_bytes == 0) { 95 /* There are no surfaces; skip making the binding table altogether. */ 96 if (stage_state->bind_bo_offset == 0 && brw->gen < 9) 97 return; 98 99 stage_state->bind_bo_offset = 0; 100 } else { 101 /* Upload a new binding table. */ 102 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 103 brw_emit_buffer_surface_state( 104 brw, &stage_state->surf_offset[ 105 prog_data->binding_table.shader_time_start], 106 brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW, 107 brw->shader_time.bo->size, 1, true); 108 } 109 /* When RS is enabled use hw-binding table uploads, otherwise fallback to 110 * software-uploads. 111 */ 112 if (brw->use_resource_streamer) { 113 gen7_update_binding_table_from_array(brw, stage_state->stage, 114 stage_state->surf_offset, 115 prog_data->binding_table 116 .size_bytes / 4); 117 } else { 118 uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, 119 prog_data->binding_table.size_bytes, 120 32, 121 &stage_state->bind_bo_offset); 122 123 /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */ 124 memcpy(bind, stage_state->surf_offset, 125 prog_data->binding_table.size_bytes); 126 } 127 } 128 129 brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; 130 131 if (brw->gen >= 7) { 132 if (brw->use_resource_streamer) { 133 stage_state->bind_bo_offset = 134 reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes); 135 } 136 BEGIN_BATCH(2); 137 OUT_BATCH(packet_name << 16 | (2 - 2)); 138 /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field 139 * when hw-generated binding table is enabled. 140 */ 141 OUT_BATCH(brw->use_resource_streamer ? 142 (stage_state->bind_bo_offset >> 1) : 143 stage_state->bind_bo_offset); 144 ADVANCE_BATCH(); 145 } 146 } 147 148 /** 149 * State atoms which upload the binding table for a particular shader stage. 150 * @{ 151 */ 152 153 /** Upload the VS binding table. */ 154 static void 155 brw_vs_upload_binding_table(struct brw_context *brw) 156 { 157 /* BRW_NEW_VS_PROG_DATA */ 158 const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data; 159 brw_upload_binding_table(brw, 160 _3DSTATE_BINDING_TABLE_POINTERS_VS, 161 prog_data, 162 &brw->vs.base); 163 } 164 165 const struct brw_tracked_state brw_vs_binding_table = { 166 .dirty = { 167 .mesa = 0, 168 .brw = BRW_NEW_BATCH | 169 BRW_NEW_BLORP | 170 BRW_NEW_VS_CONSTBUF | 171 BRW_NEW_VS_PROG_DATA | 172 BRW_NEW_SURFACES, 173 }, 174 .emit = brw_vs_upload_binding_table, 175 }; 176 177 178 /** Upload the PS binding table. */ 179 static void 180 brw_upload_wm_binding_table(struct brw_context *brw) 181 { 182 /* BRW_NEW_FS_PROG_DATA */ 183 const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; 184 brw_upload_binding_table(brw, 185 _3DSTATE_BINDING_TABLE_POINTERS_PS, 186 prog_data, 187 &brw->wm.base); 188 } 189 190 const struct brw_tracked_state brw_wm_binding_table = { 191 .dirty = { 192 .mesa = 0, 193 .brw = BRW_NEW_BATCH | 194 BRW_NEW_BLORP | 195 BRW_NEW_FS_PROG_DATA | 196 BRW_NEW_SURFACES, 197 }, 198 .emit = brw_upload_wm_binding_table, 199 }; 200 201 /** Upload the TCS binding table (if tessellation stages are active). */ 202 static void 203 brw_tcs_upload_binding_table(struct brw_context *brw) 204 { 205 /* Skip if the tessellation stages are disabled. */ 206 if (brw->tess_eval_program == NULL) 207 return; 208 209 /* BRW_NEW_TCS_PROG_DATA */ 210 const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data; 211 brw_upload_binding_table(brw, 212 _3DSTATE_BINDING_TABLE_POINTERS_HS, 213 prog_data, 214 &brw->tcs.base); 215 } 216 217 const struct brw_tracked_state brw_tcs_binding_table = { 218 .dirty = { 219 .mesa = 0, 220 .brw = BRW_NEW_BATCH | 221 BRW_NEW_BLORP | 222 BRW_NEW_DEFAULT_TESS_LEVELS | 223 BRW_NEW_SURFACES | 224 BRW_NEW_TCS_CONSTBUF | 225 BRW_NEW_TCS_PROG_DATA, 226 }, 227 .emit = brw_tcs_upload_binding_table, 228 }; 229 230 /** Upload the TES binding table (if TES is active). */ 231 static void 232 brw_tes_upload_binding_table(struct brw_context *brw) 233 { 234 /* If there's no TES, skip changing anything. */ 235 if (brw->tess_eval_program == NULL) 236 return; 237 238 /* BRW_NEW_TES_PROG_DATA */ 239 const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data; 240 brw_upload_binding_table(brw, 241 _3DSTATE_BINDING_TABLE_POINTERS_DS, 242 prog_data, 243 &brw->tes.base); 244 } 245 246 const struct brw_tracked_state brw_tes_binding_table = { 247 .dirty = { 248 .mesa = 0, 249 .brw = BRW_NEW_BATCH | 250 BRW_NEW_BLORP | 251 BRW_NEW_SURFACES | 252 BRW_NEW_TES_CONSTBUF | 253 BRW_NEW_TES_PROG_DATA, 254 }, 255 .emit = brw_tes_upload_binding_table, 256 }; 257 258 /** Upload the GS binding table (if GS is active). */ 259 static void 260 brw_gs_upload_binding_table(struct brw_context *brw) 261 { 262 /* If there's no GS, skip changing anything. */ 263 if (brw->geometry_program == NULL) 264 return; 265 266 /* BRW_NEW_GS_PROG_DATA */ 267 const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; 268 brw_upload_binding_table(brw, 269 _3DSTATE_BINDING_TABLE_POINTERS_GS, 270 prog_data, 271 &brw->gs.base); 272 } 273 274 const struct brw_tracked_state brw_gs_binding_table = { 275 .dirty = { 276 .mesa = 0, 277 .brw = BRW_NEW_BATCH | 278 BRW_NEW_BLORP | 279 BRW_NEW_GS_CONSTBUF | 280 BRW_NEW_GS_PROG_DATA | 281 BRW_NEW_SURFACES, 282 }, 283 .emit = brw_gs_upload_binding_table, 284 }; 285 286 /** 287 * Edit a single entry in a hardware-generated binding table 288 */ 289 void 290 gen7_edit_hw_binding_table_entry(struct brw_context *brw, 291 gl_shader_stage stage, 292 uint32_t index, 293 uint32_t surf_offset) 294 { 295 assert(stage < ARRAY_SIZE(stage_to_bt_edit)); 296 assert(stage_to_bt_edit[stage]); 297 298 uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) | 299 (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) : 300 HSW_SURFACE_STATE_EDIT(surf_offset)); 301 302 BEGIN_BATCH(3); 303 OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2)); 304 OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL); 305 OUT_BATCH(dw2); 306 ADVANCE_BATCH(); 307 } 308 309 /** 310 * Upload a whole hardware binding table for the given stage. 311 * 312 * Takes an array of surface offsets and the number of binding table 313 * entries. 314 */ 315 void 316 gen7_update_binding_table_from_array(struct brw_context *brw, 317 gl_shader_stage stage, 318 const uint32_t* binding_table, 319 int num_surfaces) 320 { 321 uint32_t dw2 = 0; 322 323 assert(stage < ARRAY_SIZE(stage_to_bt_edit)); 324 assert(stage_to_bt_edit[stage]); 325 326 BEGIN_BATCH(num_surfaces + 2); 327 OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces); 328 OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL); 329 for (int i = 0; i < num_surfaces; i++) { 330 dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) | 331 (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) : 332 HSW_SURFACE_STATE_EDIT(binding_table[i])); 333 OUT_BATCH(dw2); 334 } 335 ADVANCE_BATCH(); 336 } 337 338 /** 339 * Disable hardware binding table support, falling back to the 340 * older software-generated binding table mechanism. 341 */ 342 void 343 gen7_disable_hw_binding_tables(struct brw_context *brw) 344 { 345 if (!brw->use_resource_streamer) 346 return; 347 /* From the Haswell PRM, Volume 7: 3D Media GPGPU, 348 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: 349 * 350 * "When switching between HW and SW binding table generation, SW must 351 * issue a state cache invalidate." 352 */ 353 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); 354 355 int pkt_len = brw->gen >= 8 ? 4 : 3; 356 357 BEGIN_BATCH(pkt_len); 358 OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2)); 359 if (brw->gen >= 8) { 360 OUT_BATCH(0); 361 OUT_BATCH(0); 362 OUT_BATCH(0); 363 } else { 364 OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE); 365 OUT_BATCH(0); 366 } 367 ADVANCE_BATCH(); 368 } 369 370 /** 371 * Enable hardware binding tables and set up the binding table pool. 372 */ 373 void 374 gen7_enable_hw_binding_tables(struct brw_context *brw) 375 { 376 if (!brw->use_resource_streamer) 377 return; 378 379 if (!brw->hw_bt_pool.bo) { 380 /* We use a single re-usable buffer object for the lifetime of the 381 * context and size it to maximum allowed binding tables that can be 382 * programmed per batch: 383 * 384 * From the Haswell PRM, Volume 7: 3D Media GPGPU, 385 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: 386 * "A maximum of 16,383 Binding tables are allowed in any batch buffer" 387 */ 388 static const int max_size = 16383 * 4; 389 brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt", 390 max_size, 64); 391 brw->hw_bt_pool.next_offset = 0; 392 } 393 394 /* From the Haswell PRM, Volume 7: 3D Media GPGPU, 395 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: 396 * 397 * "When switching between HW and SW binding table generation, SW must 398 * issue a state cache invalidate." 399 */ 400 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); 401 402 int pkt_len = brw->gen >= 8 ? 4 : 3; 403 uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE; 404 if (brw->is_haswell) { 405 dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) | 406 HSW_BT_POOL_ALLOC_MUST_BE_ONE; 407 } else if (brw->gen >= 8) { 408 dw1 |= BDW_MOCS_WB; 409 } 410 411 BEGIN_BATCH(pkt_len); 412 OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2)); 413 if (brw->gen >= 8) { 414 OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1); 415 OUT_BATCH(brw->hw_bt_pool.bo->size); 416 } else { 417 OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1); 418 OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, 419 brw->hw_bt_pool.bo->size); 420 } 421 ADVANCE_BATCH(); 422 } 423 424 void 425 gen7_reset_hw_bt_pool_offsets(struct brw_context *brw) 426 { 427 brw->hw_bt_pool.next_offset = 0; 428 } 429 430 const struct brw_tracked_state gen7_hw_binding_tables = { 431 .dirty = { 432 .mesa = 0, 433 .brw = BRW_NEW_BATCH | 434 BRW_NEW_BLORP, 435 }, 436 .emit = gen7_enable_hw_binding_tables 437 }; 438 439 /** @} */ 440 441 /** 442 * State atoms which emit 3DSTATE packets to update the binding table pointers. 443 * @{ 444 */ 445 446 /** 447 * (Gen4-5) Upload the binding table pointers for all shader stages. 448 * 449 * The binding table pointers are relative to the surface state base address, 450 * which points at the batchbuffer containing the streamed batch state. 451 */ 452 static void 453 gen4_upload_binding_table_pointers(struct brw_context *brw) 454 { 455 BEGIN_BATCH(6); 456 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2)); 457 OUT_BATCH(brw->vs.base.bind_bo_offset); 458 OUT_BATCH(0); /* gs */ 459 OUT_BATCH(0); /* clip */ 460 OUT_BATCH(0); /* sf */ 461 OUT_BATCH(brw->wm.base.bind_bo_offset); 462 ADVANCE_BATCH(); 463 } 464 465 const struct brw_tracked_state brw_binding_table_pointers = { 466 .dirty = { 467 .mesa = 0, 468 .brw = BRW_NEW_BATCH | 469 BRW_NEW_BLORP | 470 BRW_NEW_BINDING_TABLE_POINTERS | 471 BRW_NEW_STATE_BASE_ADDRESS, 472 }, 473 .emit = gen4_upload_binding_table_pointers, 474 }; 475 476 /** 477 * (Sandybridge Only) Upload the binding table pointers for all shader stages. 478 * 479 * The binding table pointers are relative to the surface state base address, 480 * which points at the batchbuffer containing the streamed batch state. 481 */ 482 static void 483 gen6_upload_binding_table_pointers(struct brw_context *brw) 484 { 485 BEGIN_BATCH(4); 486 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | 487 GEN6_BINDING_TABLE_MODIFY_VS | 488 GEN6_BINDING_TABLE_MODIFY_GS | 489 GEN6_BINDING_TABLE_MODIFY_PS | 490 (4 - 2)); 491 OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */ 492 if (brw->ff_gs.prog_active) 493 OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */ 494 else 495 OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */ 496 OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */ 497 ADVANCE_BATCH(); 498 } 499 500 const struct brw_tracked_state gen6_binding_table_pointers = { 501 .dirty = { 502 .mesa = 0, 503 .brw = BRW_NEW_BATCH | 504 BRW_NEW_BLORP | 505 BRW_NEW_BINDING_TABLE_POINTERS | 506 BRW_NEW_STATE_BASE_ADDRESS, 507 }, 508 .emit = gen6_upload_binding_table_pointers, 509 }; 510 511 /** @} */ 512