1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian Knig <christian.koenig (at) amd.com> 25 */ 26 27 #include "radeon/r600_cs.h" 28 #include "util/u_memory.h" 29 #include "si_pipe.h" 30 #include "sid.h" 31 32 #define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *)) 33 34 void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode) 35 { 36 state->last_opcode = opcode; 37 state->last_pm4 = state->ndw++; 38 } 39 40 void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw) 41 { 42 state->pm4[state->ndw++] = dw; 43 } 44 45 void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate) 46 { 47 unsigned count; 48 count = state->ndw - state->last_pm4 - 2; 49 state->pm4[state->last_pm4] = 50 PKT3(state->last_opcode, count, predicate) 51 | PKT3_SHADER_TYPE_S(state->compute_pkt); 52 53 assert(state->ndw <= SI_PM4_MAX_DW); 54 } 55 56 void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val) 57 { 58 unsigned opcode; 59 60 if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) { 61 opcode = PKT3_SET_CONFIG_REG; 62 reg -= SI_CONFIG_REG_OFFSET; 63 64 } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) { 65 opcode = PKT3_SET_SH_REG; 66 reg -= SI_SH_REG_OFFSET; 67 68 } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) { 69 opcode = PKT3_SET_CONTEXT_REG; 70 reg -= SI_CONTEXT_REG_OFFSET; 71 72 } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) { 73 opcode = PKT3_SET_UCONFIG_REG; 74 reg -= CIK_UCONFIG_REG_OFFSET; 75 76 } else { 77 R600_ERR("Invalid register offset %08x!\n", reg); 78 return; 79 } 80 81 reg >>= 2; 82 83 if (opcode != state->last_opcode || reg != (state->last_reg + 1)) { 84 si_pm4_cmd_begin(state, opcode); 85 si_pm4_cmd_add(state, reg); 86 } 87 88 state->last_reg = reg; 89 si_pm4_cmd_add(state, val); 90 si_pm4_cmd_end(state, false); 91 } 92 93 void si_pm4_add_bo(struct si_pm4_state *state, 94 struct r600_resource *bo, 95 enum radeon_bo_usage usage, 96 enum radeon_bo_priority priority) 97 { 98 unsigned idx = state->nbo++; 99 assert(idx < SI_PM4_MAX_BO); 100 101 r600_resource_reference(&state->bo[idx], bo); 102 state->bo_usage[idx] = usage; 103 state->bo_priority[idx] = priority; 104 } 105 106 void si_pm4_clear_state(struct si_pm4_state *state) 107 { 108 for (int i = 0; i < state->nbo; ++i) 109 r600_resource_reference(&state->bo[i], NULL); 110 r600_resource_reference(&state->indirect_buffer, NULL); 111 state->nbo = 0; 112 state->ndw = 0; 113 } 114 115 void si_pm4_free_state_simple(struct si_pm4_state *state) 116 { 117 si_pm4_clear_state(state); 118 FREE(state); 119 } 120 121 void si_pm4_free_state(struct si_context *sctx, 122 struct si_pm4_state *state, 123 unsigned idx) 124 { 125 if (!state) 126 return; 127 128 if (idx != ~0 && sctx->emitted.array[idx] == state) { 129 sctx->emitted.array[idx] = NULL; 130 } 131 132 si_pm4_free_state_simple(state); 133 } 134 135 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) 136 { 137 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 138 139 for (int i = 0; i < state->nbo; ++i) { 140 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, state->bo[i], 141 state->bo_usage[i], state->bo_priority[i]); 142 } 143 144 if (!state->indirect_buffer) { 145 radeon_emit_array(cs, state->pm4, state->ndw); 146 } else { 147 struct r600_resource *ib = state->indirect_buffer; 148 149 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, ib, 150 RADEON_USAGE_READ, 151 RADEON_PRIO_IB2); 152 153 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 154 radeon_emit(cs, ib->gpu_address); 155 radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff); 156 radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff); 157 } 158 } 159 160 void si_pm4_emit_dirty(struct si_context *sctx) 161 { 162 for (int i = 0; i < NUMBER_OF_STATES; ++i) { 163 struct si_pm4_state *state = sctx->queued.array[i]; 164 165 if (!state || sctx->emitted.array[i] == state) 166 continue; 167 168 si_pm4_emit(sctx, state); 169 sctx->emitted.array[i] = state; 170 } 171 } 172 173 void si_pm4_reset_emitted(struct si_context *sctx) 174 { 175 memset(&sctx->emitted, 0, sizeof(sctx->emitted)); 176 } 177 178 void si_pm4_upload_indirect_buffer(struct si_context *sctx, 179 struct si_pm4_state *state) 180 { 181 struct pipe_screen *screen = sctx->b.b.screen; 182 unsigned aligned_ndw = align(state->ndw, 8); 183 184 /* only supported on CIK and later */ 185 if (sctx->b.chip_class < CIK) 186 return; 187 188 assert(state->ndw); 189 assert(aligned_ndw <= SI_PM4_MAX_DW); 190 191 r600_resource_reference(&state->indirect_buffer, NULL); 192 state->indirect_buffer = (struct r600_resource*) 193 pipe_buffer_create(screen, 0, 194 PIPE_USAGE_DEFAULT, aligned_ndw * 4); 195 if (!state->indirect_buffer) 196 return; 197 198 /* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */ 199 if (sctx->screen->b.info.gfx_ib_pad_with_type2) { 200 for (int i = state->ndw; i < aligned_ndw; i++) 201 state->pm4[i] = 0x80000000; /* type2 nop packet */ 202 } else { 203 for (int i = state->ndw; i < aligned_ndw; i++) 204 state->pm4[i] = 0xffff1000; /* type3 nop packet */ 205 } 206 207 pipe_buffer_write(&sctx->b.b, &state->indirect_buffer->b.b, 208 0, aligned_ndw *4, state->pm4); 209 } 210