1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "radeon/r600_cs.h" 25 #include "util/u_memory.h" 26 #include "si_pipe.h" 27 #include "sid.h" 28 29 void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode) 30 { 31 state->last_opcode = opcode; 32 state->last_pm4 = state->ndw++; 33 } 34 35 void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw) 36 { 37 state->pm4[state->ndw++] = dw; 38 } 39 40 void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate) 41 { 42 unsigned count; 43 count = state->ndw - state->last_pm4 - 2; 44 state->pm4[state->last_pm4] = 45 PKT3(state->last_opcode, count, predicate); 46 47 assert(state->ndw <= SI_PM4_MAX_DW); 48 } 49 50 void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val) 51 { 52 unsigned opcode; 53 54 if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) { 55 opcode = PKT3_SET_CONFIG_REG; 56 reg -= SI_CONFIG_REG_OFFSET; 57 58 } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) { 59 opcode = PKT3_SET_SH_REG; 60 reg -= SI_SH_REG_OFFSET; 61 62 } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) { 63 opcode = PKT3_SET_CONTEXT_REG; 64 reg -= SI_CONTEXT_REG_OFFSET; 65 66 } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) { 67 opcode = PKT3_SET_UCONFIG_REG; 68 reg -= CIK_UCONFIG_REG_OFFSET; 69 70 } else { 71 R600_ERR("Invalid register offset %08x!\n", reg); 72 return; 73 } 74 75 reg >>= 2; 76 77 if (opcode != state->last_opcode || reg != (state->last_reg + 1)) { 78 si_pm4_cmd_begin(state, opcode); 79 si_pm4_cmd_add(state, reg); 80 } 81 82 state->last_reg = reg; 83 si_pm4_cmd_add(state, val); 84 si_pm4_cmd_end(state, false); 85 } 86 87 void si_pm4_add_bo(struct si_pm4_state *state, 88 struct r600_resource *bo, 89 enum radeon_bo_usage usage, 90 enum radeon_bo_priority priority) 91 { 92 unsigned idx = state->nbo++; 93 assert(idx < SI_PM4_MAX_BO); 94 95 r600_resource_reference(&state->bo[idx], bo); 96 state->bo_usage[idx] = usage; 97 state->bo_priority[idx] = priority; 98 } 99 100 void si_pm4_clear_state(struct si_pm4_state *state) 101 { 102 for (int i = 0; i < state->nbo; ++i) 103 r600_resource_reference(&state->bo[i], NULL); 104 r600_resource_reference(&state->indirect_buffer, NULL); 105 state->nbo = 0; 106 state->ndw = 0; 107 } 108 109 void si_pm4_free_state(struct si_context *sctx, 110 struct si_pm4_state *state, 111 unsigned idx) 112 { 113 if (!state) 114 return; 115 116 if (idx != ~0 && sctx->emitted.array[idx] == state) { 117 sctx->emitted.array[idx] = NULL; 118 } 119 120 si_pm4_clear_state(state); 121 FREE(state); 122 } 123 124 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) 125 { 126 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 127 128 for (int i = 0; i < state->nbo; ++i) { 129 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, state->bo[i], 130 state->bo_usage[i], state->bo_priority[i]); 131 } 132 133 if (!state->indirect_buffer) { 134 radeon_emit_array(cs, state->pm4, state->ndw); 135 } else { 136 struct r600_resource *ib = state->indirect_buffer; 137 138 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, ib, 139 RADEON_USAGE_READ, 140 RADEON_PRIO_IB2); 141 142 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 143 radeon_emit(cs, ib->gpu_address); 144 radeon_emit(cs, ib->gpu_address >> 32); 145 radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff); 146 } 147 } 148 149 void si_pm4_reset_emitted(struct si_context *sctx) 150 { 151 memset(&sctx->emitted, 0, sizeof(sctx->emitted)); 152 sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES); 153 } 154 155 void si_pm4_upload_indirect_buffer(struct si_context *sctx, 156 struct si_pm4_state *state) 157 { 158 struct pipe_screen *screen = sctx->b.b.screen; 159 unsigned aligned_ndw = align(state->ndw, 8); 160 161 /* only supported on CIK and later */ 162 if (sctx->b.chip_class < CIK) 163 return; 164 165 assert(state->ndw); 166 assert(aligned_ndw <= SI_PM4_MAX_DW); 167 168 r600_resource_reference(&state->indirect_buffer, NULL); 169 /* TODO: this hangs with 1024 or higher alignment on GFX9. */ 170 state->indirect_buffer = (struct r600_resource*) 171 si_aligned_buffer_create(screen, 0, 172 PIPE_USAGE_DEFAULT, aligned_ndw * 4, 173 256); 174 if (!state->indirect_buffer) 175 return; 176 177 /* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */ 178 if (sctx->screen->info.gfx_ib_pad_with_type2) { 179 for (int i = state->ndw; i < aligned_ndw; i++) 180 state->pm4[i] = 0x80000000; /* type2 nop packet */ 181 } else { 182 for (int i = state->ndw; i < aligned_ndw; i++) 183 state->pm4[i] = 0xffff1000; /* type3 nop packet */ 184 } 185 186 pipe_buffer_write(&sctx->b.b, &state->indirect_buffer->b.b, 187 0, aligned_ndw *4, state->pm4); 188 } 189