1 /* 2 * Copyright 2010 Jerome Glisse <glisse (at) freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 #include "r600_sq.h" 24 #include "r600_opcodes.h" 25 #include "r600_formats.h" 26 #include "r600d.h" 27 28 #include <errno.h> 29 #include <byteswap.h> 30 #include "util/u_memory.h" 31 #include "pipe/p_shader_tokens.h" 32 33 #define NUM_OF_CYCLES 3 34 #define NUM_OF_COMPONENTS 4 35 36 static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 37 { 38 if(alu->is_op3) 39 return 3; 40 41 switch (bc->chip_class) { 42 case R600: 43 case R700: 44 switch (alu->inst) { 45 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: 46 return 0; 47 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: 48 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: 49 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT: 50 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT: 51 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT: 52 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: 53 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: 54 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: 55 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: 56 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: 57 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE: 58 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT: 59 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT: 60 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT: 61 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT: 62 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: 63 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: 64 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT: 65 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT: 66 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT: 67 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT: 68 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: 69 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT: 70 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: 71 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT: 72 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: 73 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT: 74 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT: 75 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: 76 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT: 77 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT: 78 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: 79 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: 80 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: 81 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: 82 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT: 83 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT: 84 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: 85 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: 86 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: 87 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT: 88 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT: 89 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT: 90 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT: 91 return 2; 92 93 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: 94 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA: 95 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: 96 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT: 97 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: 98 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: 99 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL: 100 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: 101 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: 102 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: 103 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: 104 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: 105 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: 106 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: 107 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT: 108 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT: 109 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: 110 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: 111 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: 112 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: 113 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT: 114 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT: 115 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: 116 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: 117 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: 118 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT: 119 return 1; 120 default: R600_ERR( 121 "Need instruction operand number for 0x%x.\n", alu->inst); 122 } 123 break; 124 case EVERGREEN: 125 case CAYMAN: 126 switch (alu->inst) { 127 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: 128 return 0; 129 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: 130 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: 131 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT: 132 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT: 133 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT: 134 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: 135 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: 136 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: 137 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: 138 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: 139 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE: 140 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT: 141 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT: 142 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT: 143 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT: 144 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: 145 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: 146 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT: 147 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT: 148 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT: 149 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT: 150 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: 151 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT: 152 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: 153 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT: 154 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: 155 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT: 156 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT: 157 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: 158 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT: 159 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT: 160 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: 161 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT: 162 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: 163 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: 164 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: 165 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT: 166 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: 167 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: 168 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: 169 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY: 170 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW: 171 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT: 172 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT: 173 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT: 174 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT: 175 return 2; 176 177 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: 178 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: 179 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: 180 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL: 181 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: 182 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: 183 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: 184 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: 185 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: 186 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: 187 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: 188 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: 189 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: 190 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: 191 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: 192 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: 193 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT: 194 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT: 195 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: 196 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: 197 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: 198 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT: 199 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0: 200 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT: 201 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT: 202 return 1; 203 default: R600_ERR( 204 "Need instruction operand number for 0x%x.\n", alu->inst); 205 } 206 break; 207 } 208 209 return 3; 210 } 211 212 int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); 213 214 static struct r600_bytecode_cf *r600_bytecode_cf(void) 215 { 216 struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); 217 218 if (cf == NULL) 219 return NULL; 220 LIST_INITHEAD(&cf->list); 221 LIST_INITHEAD(&cf->alu); 222 LIST_INITHEAD(&cf->vtx); 223 LIST_INITHEAD(&cf->tex); 224 return cf; 225 } 226 227 static struct r600_bytecode_alu *r600_bytecode_alu(void) 228 { 229 struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); 230 231 if (alu == NULL) 232 return NULL; 233 LIST_INITHEAD(&alu->list); 234 return alu; 235 } 236 237 static struct r600_bytecode_vtx *r600_bytecode_vtx(void) 238 { 239 struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); 240 241 if (vtx == NULL) 242 return NULL; 243 LIST_INITHEAD(&vtx->list); 244 return vtx; 245 } 246 247 static struct r600_bytecode_tex *r600_bytecode_tex(void) 248 { 249 struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); 250 251 if (tex == NULL) 252 return NULL; 253 LIST_INITHEAD(&tex->list); 254 return tex; 255 } 256 257 void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family) 258 { 259 if ((chip_class == R600) && 260 (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { 261 bc->ar_handling = AR_HANDLE_RV6XX; 262 bc->r6xx_nop_after_rel_dst = 1; 263 } else { 264 bc->ar_handling = AR_HANDLE_NORMAL; 265 bc->r6xx_nop_after_rel_dst = 0; 266 } 267 268 LIST_INITHEAD(&bc->cf); 269 bc->chip_class = chip_class; 270 } 271 272 static int r600_bytecode_add_cf(struct r600_bytecode *bc) 273 { 274 struct r600_bytecode_cf *cf = r600_bytecode_cf(); 275 276 if (cf == NULL) 277 return -ENOMEM; 278 LIST_ADDTAIL(&cf->list, &bc->cf); 279 if (bc->cf_last) { 280 cf->id = bc->cf_last->id + 2; 281 if (bc->cf_last->eg_alu_extended) { 282 /* take into account extended alu size */ 283 cf->id += 2; 284 bc->ndw += 2; 285 } 286 } 287 bc->cf_last = cf; 288 bc->ncf++; 289 bc->ndw += 2; 290 bc->force_add_cf = 0; 291 bc->ar_loaded = 0; 292 return 0; 293 } 294 295 int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output) 296 { 297 int r; 298 299 if (output->gpr >= bc->ngpr) 300 bc->ngpr = output->gpr + 1; 301 302 if (bc->cf_last && (bc->cf_last->inst == output->inst || 303 (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) && 304 output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) && 305 output->type == bc->cf_last->output.type && 306 output->elem_size == bc->cf_last->output.elem_size && 307 output->swizzle_x == bc->cf_last->output.swizzle_x && 308 output->swizzle_y == bc->cf_last->output.swizzle_y && 309 output->swizzle_z == bc->cf_last->output.swizzle_z && 310 output->swizzle_w == bc->cf_last->output.swizzle_w && 311 (output->burst_count + bc->cf_last->output.burst_count) <= 16) { 312 313 if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && 314 (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { 315 316 bc->cf_last->output.end_of_program |= output->end_of_program; 317 bc->cf_last->output.inst = output->inst; 318 bc->cf_last->output.gpr = output->gpr; 319 bc->cf_last->output.array_base = output->array_base; 320 bc->cf_last->output.burst_count += output->burst_count; 321 return 0; 322 323 } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && 324 output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { 325 326 bc->cf_last->output.end_of_program |= output->end_of_program; 327 bc->cf_last->output.inst = output->inst; 328 bc->cf_last->output.burst_count += output->burst_count; 329 return 0; 330 } 331 } 332 333 r = r600_bytecode_add_cf(bc); 334 if (r) 335 return r; 336 bc->cf_last->inst = output->inst; 337 memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); 338 return 0; 339 } 340 341 /* alu instructions that can ony exits once per group */ 342 static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 343 { 344 switch (bc->chip_class) { 345 case R600: 346 case R700: 347 return !alu->is_op3 && ( 348 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || 349 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || 350 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || 351 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || 352 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || 353 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || 354 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || 355 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || 356 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || 357 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || 358 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || 359 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || 360 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || 361 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || 362 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || 363 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || 364 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || 365 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || 366 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || 367 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || 368 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || 369 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || 370 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || 371 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || 372 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || 373 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || 374 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || 375 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || 376 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || 377 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || 378 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || 379 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || 380 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || 381 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); 382 case EVERGREEN: 383 case CAYMAN: 384 default: 385 return !alu->is_op3 && ( 386 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || 387 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || 388 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || 389 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || 390 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || 391 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || 392 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || 393 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || 394 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || 395 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || 396 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || 397 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || 398 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || 399 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || 400 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || 401 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || 402 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || 403 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || 404 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || 405 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || 406 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || 407 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || 408 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || 409 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || 410 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || 411 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || 412 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || 413 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || 414 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || 415 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || 416 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || 417 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || 418 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || 419 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); 420 } 421 } 422 423 static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 424 { 425 switch (bc->chip_class) { 426 case R600: 427 case R700: 428 return !alu->is_op3 && ( 429 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || 430 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || 431 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || 432 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); 433 case EVERGREEN: 434 case CAYMAN: 435 default: 436 return !alu->is_op3 && ( 437 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || 438 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || 439 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || 440 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); 441 } 442 } 443 444 static int is_alu_cube_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 445 { 446 switch (bc->chip_class) { 447 case R600: 448 case R700: 449 return !alu->is_op3 && 450 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; 451 case EVERGREEN: 452 case CAYMAN: 453 default: 454 return !alu->is_op3 && 455 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; 456 } 457 } 458 459 static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 460 { 461 switch (bc->chip_class) { 462 case R600: 463 case R700: 464 return !alu->is_op3 && ( 465 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || 466 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || 467 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT || 468 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT); 469 case EVERGREEN: 470 case CAYMAN: 471 default: 472 return !alu->is_op3 && ( 473 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); 474 } 475 } 476 477 static int is_opcode_in_range(unsigned opcode, unsigned min, unsigned max) 478 { 479 return min <= opcode && opcode <= max; 480 } 481 482 /* ALU instructions that can only execute on the vector unit: 483 * 484 * opcode ranges: 485 * R6xx/R7xx: 486 * op3 : [0x08 - 0x0B] 487 * op2 : 0x07, [0x15 - 0x18], [0x1B - 0x1D], [0x50 - 0x53], [0x7A - 0x7E] 488 * 489 * EVERGREEN: 490 * op3: [0x04 - 0x11] 491 * op2: [0xA0 - 0xE2] 492 */ 493 static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 494 { 495 switch (bc->chip_class) { 496 case R600: 497 case R700: 498 if (alu->is_op3) 499 return is_opcode_in_range(alu->inst, 500 V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_64, 501 V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_64_D2); 502 else 503 return (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FREXP_64) || 504 is_opcode_in_range(alu->inst, 505 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA, 506 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT) || 507 is_opcode_in_range(alu->inst, 508 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_64, 509 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT32_TO_FLT64) || 510 is_opcode_in_range(alu->inst, 511 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, 512 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4) || 513 is_opcode_in_range(alu->inst, 514 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LDEXP_64, 515 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_64); 516 517 case EVERGREEN: 518 if (alu->is_op3) 519 return is_opcode_in_range(alu->inst, 520 EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_BFE_UINT, 521 EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_LDS_IDX_OP); 522 else 523 return is_opcode_in_range(alu->inst, 524 EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_BFM_INT, 525 EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P20); 526 case CAYMAN: 527 default: 528 assert(0); 529 return 0; 530 } 531 } 532 533 /* ALU instructions that can only execute on the trans unit: 534 * 535 * opcode ranges: 536 * R600: 537 * op3: 0x0C 538 * op2: [0x60 - 0x79] 539 * 540 * R700: 541 * op3: 0x0C 542 * op2: [0x60 - 0x6F], [0x73 - 0x79] 543 * 544 * EVERGREEN: 545 * op3: 0x1F 546 * op2: [0x81 - 0x9C] 547 */ 548 static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 549 { 550 551 switch (bc->chip_class) { 552 case R600: 553 if (alu->is_op3) 554 return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 555 else 556 return is_opcode_in_range(alu->inst, 557 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 558 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT); 559 case R700: 560 if (alu->is_op3) 561 return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 562 else 563 return is_opcode_in_range(alu->inst, 564 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 565 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS) || 566 is_opcode_in_range(alu->inst, 567 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, 568 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT); 569 case EVERGREEN: 570 if (alu->is_op3) 571 return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 572 else 573 return is_opcode_in_range(alu->inst, 574 EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, 575 EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT); 576 case CAYMAN: 577 default: 578 assert(0); 579 return 0; 580 } 581 } 582 583 /* alu instructions that can execute on any unit */ 584 static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 585 { 586 return !is_alu_vec_unit_inst(bc, alu) && 587 !is_alu_trans_unit_inst(bc, alu); 588 } 589 590 static int is_nop_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 591 { 592 switch (bc->chip_class) { 593 case R600: 594 case R700: 595 return (!alu->is_op3 && alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 596 case EVERGREEN: 597 case CAYMAN: 598 default: 599 return (!alu->is_op3 && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 600 } 601 } 602 603 static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, 604 struct r600_bytecode_alu *assignment[5]) 605 { 606 struct r600_bytecode_alu *alu; 607 unsigned i, chan, trans; 608 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 609 610 for (i = 0; i < max_slots; i++) 611 assignment[i] = NULL; 612 613 for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) { 614 chan = alu->dst.chan; 615 if (max_slots == 4) 616 trans = 0; 617 else if (is_alu_trans_unit_inst(bc, alu)) 618 trans = 1; 619 else if (is_alu_vec_unit_inst(bc, alu)) 620 trans = 0; 621 else if (assignment[chan]) 622 trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */ 623 else 624 trans = 0; 625 626 if (trans) { 627 if (assignment[4]) { 628 assert(0); /* ALU.Trans has already been allocated. */ 629 return -1; 630 } 631 assignment[4] = alu; 632 } else { 633 if (assignment[chan]) { 634 assert(0); /* ALU.chan has already been allocated. */ 635 return -1; 636 } 637 assignment[chan] = alu; 638 } 639 640 if (alu->last) 641 break; 642 } 643 return 0; 644 } 645 646 struct alu_bank_swizzle { 647 int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; 648 int hw_cfile_addr[4]; 649 int hw_cfile_elem[4]; 650 }; 651 652 static const unsigned cycle_for_bank_swizzle_vec[][3] = { 653 [SQ_ALU_VEC_012] = { 0, 1, 2 }, 654 [SQ_ALU_VEC_021] = { 0, 2, 1 }, 655 [SQ_ALU_VEC_120] = { 1, 2, 0 }, 656 [SQ_ALU_VEC_102] = { 1, 0, 2 }, 657 [SQ_ALU_VEC_201] = { 2, 0, 1 }, 658 [SQ_ALU_VEC_210] = { 2, 1, 0 } 659 }; 660 661 static const unsigned cycle_for_bank_swizzle_scl[][3] = { 662 [SQ_ALU_SCL_210] = { 2, 1, 0 }, 663 [SQ_ALU_SCL_122] = { 1, 2, 2 }, 664 [SQ_ALU_SCL_212] = { 2, 1, 2 }, 665 [SQ_ALU_SCL_221] = { 2, 2, 1 } 666 }; 667 668 static void init_bank_swizzle(struct alu_bank_swizzle *bs) 669 { 670 int i, cycle, component; 671 /* set up gpr use */ 672 for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) 673 for (component = 0; component < NUM_OF_COMPONENTS; component++) 674 bs->hw_gpr[cycle][component] = -1; 675 for (i = 0; i < 4; i++) 676 bs->hw_cfile_addr[i] = -1; 677 for (i = 0; i < 4; i++) 678 bs->hw_cfile_elem[i] = -1; 679 } 680 681 static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) 682 { 683 if (bs->hw_gpr[cycle][chan] == -1) 684 bs->hw_gpr[cycle][chan] = sel; 685 else if (bs->hw_gpr[cycle][chan] != (int)sel) { 686 /* Another scalar operation has already used the GPR read port for the channel. */ 687 return -1; 688 } 689 return 0; 690 } 691 692 static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) 693 { 694 int res, num_res = 4; 695 if (bc->chip_class >= R700) { 696 num_res = 2; 697 chan /= 2; 698 } 699 for (res = 0; res < num_res; ++res) { 700 if (bs->hw_cfile_addr[res] == -1) { 701 bs->hw_cfile_addr[res] = sel; 702 bs->hw_cfile_elem[res] = chan; 703 return 0; 704 } else if (bs->hw_cfile_addr[res] == sel && 705 bs->hw_cfile_elem[res] == chan) 706 return 0; /* Read for this scalar element already reserved, nothing to do here. */ 707 } 708 /* All cfile read ports are used, cannot reference vector element. */ 709 return -1; 710 } 711 712 static int is_gpr(unsigned sel) 713 { 714 return (sel >= 0 && sel <= 127); 715 } 716 717 /* CB constants start at 512, and get translated to a kcache index when ALU 718 * clauses are constructed. Note that we handle kcache constants the same way 719 * as (the now gone) cfile constants, is that really required? */ 720 static int is_cfile(unsigned sel) 721 { 722 return (sel > 255 && sel < 512) || 723 (sel > 511 && sel < 4607) || /* Kcache before translation. */ 724 (sel > 127 && sel < 192); /* Kcache after translation. */ 725 } 726 727 static int is_const(int sel) 728 { 729 return is_cfile(sel) || 730 (sel >= V_SQ_ALU_SRC_0 && 731 sel <= V_SQ_ALU_SRC_LITERAL); 732 } 733 734 static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, 735 struct alu_bank_swizzle *bs, int bank_swizzle) 736 { 737 int r, src, num_src, sel, elem, cycle; 738 739 num_src = r600_bytecode_get_num_operands(bc, alu); 740 for (src = 0; src < num_src; src++) { 741 sel = alu->src[src].sel; 742 elem = alu->src[src].chan; 743 if (is_gpr(sel)) { 744 cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; 745 if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) 746 /* Nothing to do; special-case optimization, 747 * second source uses first sources reservation. */ 748 continue; 749 else { 750 r = reserve_gpr(bs, sel, elem, cycle); 751 if (r) 752 return r; 753 } 754 } else if (is_cfile(sel)) { 755 r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 756 if (r) 757 return r; 758 } 759 /* No restrictions on PV, PS, literal or special constants. */ 760 } 761 return 0; 762 } 763 764 static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, 765 struct alu_bank_swizzle *bs, int bank_swizzle) 766 { 767 int r, src, num_src, const_count, sel, elem, cycle; 768 769 num_src = r600_bytecode_get_num_operands(bc, alu); 770 for (const_count = 0, src = 0; src < num_src; ++src) { 771 sel = alu->src[src].sel; 772 elem = alu->src[src].chan; 773 if (is_const(sel)) { /* Any constant, including literal and inline constants. */ 774 if (const_count >= 2) 775 /* More than two references to a constant in 776 * transcendental operation. */ 777 return -1; 778 else 779 const_count++; 780 } 781 if (is_cfile(sel)) { 782 r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 783 if (r) 784 return r; 785 } 786 } 787 for (src = 0; src < num_src; ++src) { 788 sel = alu->src[src].sel; 789 elem = alu->src[src].chan; 790 if (is_gpr(sel)) { 791 cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 792 if (cycle < const_count) 793 /* Cycle for GPR load conflicts with 794 * constant load in transcendental operation. */ 795 return -1; 796 r = reserve_gpr(bs, sel, elem, cycle); 797 if (r) 798 return r; 799 } 800 /* PV PS restrictions */ 801 if (const_count && (sel == 254 || sel == 255)) { 802 cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 803 if (cycle < const_count) 804 return -1; 805 } 806 } 807 return 0; 808 } 809 810 static int check_and_set_bank_swizzle(struct r600_bytecode *bc, 811 struct r600_bytecode_alu *slots[5]) 812 { 813 struct alu_bank_swizzle bs; 814 int bank_swizzle[5]; 815 int i, r = 0, forced = 1; 816 boolean scalar_only = bc->chip_class == CAYMAN ? false : true; 817 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 818 819 for (i = 0; i < max_slots; i++) { 820 if (slots[i]) { 821 if (slots[i]->bank_swizzle_force) { 822 slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; 823 } else { 824 forced = 0; 825 } 826 } 827 828 if (i < 4 && slots[i]) 829 scalar_only = false; 830 } 831 if (forced) 832 return 0; 833 834 /* Just check every possible combination of bank swizzle. 835 * Not very efficent, but works on the first try in most of the cases. */ 836 for (i = 0; i < 4; i++) 837 if (!slots[i] || !slots[i]->bank_swizzle_force) 838 bank_swizzle[i] = SQ_ALU_VEC_012; 839 else 840 bank_swizzle[i] = slots[i]->bank_swizzle; 841 842 bank_swizzle[4] = SQ_ALU_SCL_210; 843 while(bank_swizzle[4] <= SQ_ALU_SCL_221) { 844 845 if (max_slots == 4) { 846 for (i = 0; i < max_slots; i++) { 847 if (bank_swizzle[i] == SQ_ALU_VEC_210) 848 return -1; 849 } 850 } 851 init_bank_swizzle(&bs); 852 if (scalar_only == false) { 853 for (i = 0; i < 4; i++) { 854 if (slots[i]) { 855 r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); 856 if (r) 857 break; 858 } 859 } 860 } else 861 r = 0; 862 863 if (!r && slots[4] && max_slots == 5) { 864 r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); 865 } 866 if (!r) { 867 for (i = 0; i < max_slots; i++) { 868 if (slots[i]) 869 slots[i]->bank_swizzle = bank_swizzle[i]; 870 } 871 return 0; 872 } 873 874 if (scalar_only) { 875 bank_swizzle[4]++; 876 } else { 877 for (i = 0; i < max_slots; i++) { 878 if (!slots[i] || !slots[i]->bank_swizzle_force) { 879 bank_swizzle[i]++; 880 if (bank_swizzle[i] <= SQ_ALU_VEC_210) 881 break; 882 else 883 bank_swizzle[i] = SQ_ALU_VEC_012; 884 } 885 } 886 } 887 } 888 889 /* Couldn't find a working swizzle. */ 890 return -1; 891 } 892 893 static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, 894 struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev) 895 { 896 struct r600_bytecode_alu *prev[5]; 897 int gpr[5], chan[5]; 898 int i, j, r, src, num_src; 899 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 900 901 r = assign_alu_units(bc, alu_prev, prev); 902 if (r) 903 return r; 904 905 for (i = 0; i < max_slots; ++i) { 906 if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) { 907 gpr[i] = prev[i]->dst.sel; 908 /* cube writes more than PV.X */ 909 if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i])) 910 chan[i] = 0; 911 else 912 chan[i] = prev[i]->dst.chan; 913 } else 914 gpr[i] = -1; 915 } 916 917 for (i = 0; i < max_slots; ++i) { 918 struct r600_bytecode_alu *alu = slots[i]; 919 if(!alu) 920 continue; 921 922 num_src = r600_bytecode_get_num_operands(bc, alu); 923 for (src = 0; src < num_src; ++src) { 924 if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) 925 continue; 926 927 if (bc->chip_class < CAYMAN) { 928 if (alu->src[src].sel == gpr[4] && 929 alu->src[src].chan == chan[4] && 930 alu_prev->pred_sel == alu->pred_sel) { 931 alu->src[src].sel = V_SQ_ALU_SRC_PS; 932 alu->src[src].chan = 0; 933 continue; 934 } 935 } 936 937 for (j = 0; j < 4; ++j) { 938 if (alu->src[src].sel == gpr[j] && 939 alu->src[src].chan == j && 940 alu_prev->pred_sel == alu->pred_sel) { 941 alu->src[src].sel = V_SQ_ALU_SRC_PV; 942 alu->src[src].chan = chan[j]; 943 break; 944 } 945 } 946 } 947 } 948 949 return 0; 950 } 951 952 void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg) 953 { 954 switch(value) { 955 case 0: 956 *sel = V_SQ_ALU_SRC_0; 957 break; 958 case 1: 959 *sel = V_SQ_ALU_SRC_1_INT; 960 break; 961 case -1: 962 *sel = V_SQ_ALU_SRC_M_1_INT; 963 break; 964 case 0x3F800000: /* 1.0f */ 965 *sel = V_SQ_ALU_SRC_1; 966 break; 967 case 0x3F000000: /* 0.5f */ 968 *sel = V_SQ_ALU_SRC_0_5; 969 break; 970 case 0xBF800000: /* -1.0f */ 971 *sel = V_SQ_ALU_SRC_1; 972 *neg ^= 1; 973 break; 974 case 0xBF000000: /* -0.5f */ 975 *sel = V_SQ_ALU_SRC_0_5; 976 *neg ^= 1; 977 break; 978 default: 979 *sel = V_SQ_ALU_SRC_LITERAL; 980 break; 981 } 982 } 983 984 /* compute how many literal are needed */ 985 static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, 986 uint32_t literal[4], unsigned *nliteral) 987 { 988 unsigned num_src = r600_bytecode_get_num_operands(bc, alu); 989 unsigned i, j; 990 991 for (i = 0; i < num_src; ++i) { 992 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 993 uint32_t value = alu->src[i].value; 994 unsigned found = 0; 995 for (j = 0; j < *nliteral; ++j) { 996 if (literal[j] == value) { 997 found = 1; 998 break; 999 } 1000 } 1001 if (!found) { 1002 if (*nliteral >= 4) 1003 return -EINVAL; 1004 literal[(*nliteral)++] = value; 1005 } 1006 } 1007 } 1008 return 0; 1009 } 1010 1011 static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc, 1012 struct r600_bytecode_alu *alu, 1013 uint32_t literal[4], unsigned nliteral) 1014 { 1015 unsigned num_src = r600_bytecode_get_num_operands(bc, alu); 1016 unsigned i, j; 1017 1018 for (i = 0; i < num_src; ++i) { 1019 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1020 uint32_t value = alu->src[i].value; 1021 for (j = 0; j < nliteral; ++j) { 1022 if (literal[j] == value) { 1023 alu->src[i].chan = j; 1024 break; 1025 } 1026 } 1027 } 1028 } 1029 } 1030 1031 static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5], 1032 struct r600_bytecode_alu *alu_prev) 1033 { 1034 struct r600_bytecode_alu *prev[5]; 1035 struct r600_bytecode_alu *result[5] = { NULL }; 1036 1037 uint32_t literal[4], prev_literal[4]; 1038 unsigned nliteral = 0, prev_nliteral = 0; 1039 1040 int i, j, r, src, num_src; 1041 int num_once_inst = 0; 1042 int have_mova = 0, have_rel = 0; 1043 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 1044 1045 r = assign_alu_units(bc, alu_prev, prev); 1046 if (r) 1047 return r; 1048 1049 for (i = 0; i < max_slots; ++i) { 1050 if (prev[i]) { 1051 if (prev[i]->pred_sel) 1052 return 0; 1053 if (is_alu_once_inst(bc, prev[i])) 1054 return 0; 1055 } 1056 if (slots[i]) { 1057 if (slots[i]->pred_sel) 1058 return 0; 1059 if (is_alu_once_inst(bc, slots[i])) 1060 return 0; 1061 } 1062 } 1063 1064 for (i = 0; i < max_slots; ++i) { 1065 struct r600_bytecode_alu *alu; 1066 1067 if (num_once_inst > 0) 1068 return 0; 1069 1070 /* check number of literals */ 1071 if (prev[i]) { 1072 if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral)) 1073 return 0; 1074 if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) 1075 return 0; 1076 if (is_alu_mova_inst(bc, prev[i])) { 1077 if (have_rel) 1078 return 0; 1079 have_mova = 1; 1080 } 1081 num_once_inst += is_alu_once_inst(bc, prev[i]); 1082 } 1083 if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral)) 1084 return 0; 1085 1086 /* Let's check used slots. */ 1087 if (prev[i] && !slots[i]) { 1088 result[i] = prev[i]; 1089 continue; 1090 } else if (prev[i] && slots[i]) { 1091 if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { 1092 /* Trans unit is still free try to use it. */ 1093 if (is_alu_any_unit_inst(bc, slots[i])) { 1094 result[i] = prev[i]; 1095 result[4] = slots[i]; 1096 } else if (is_alu_any_unit_inst(bc, prev[i])) { 1097 if (slots[i]->dst.sel == prev[i]->dst.sel && 1098 (slots[i]->dst.write == 1 || slots[i]->is_op3) && 1099 (prev[i]->dst.write == 1 || prev[i]->is_op3)) 1100 return 0; 1101 1102 result[i] = slots[i]; 1103 result[4] = prev[i]; 1104 } else 1105 return 0; 1106 } else 1107 return 0; 1108 } else if(!slots[i]) { 1109 continue; 1110 } else { 1111 if (max_slots == 5 && slots[i] && prev[4] && 1112 slots[i]->dst.sel == prev[4]->dst.sel && 1113 slots[i]->dst.chan == prev[4]->dst.chan && 1114 (slots[i]->dst.write == 1 || slots[i]->is_op3) && 1115 (prev[4]->dst.write == 1 || prev[4]->is_op3)) 1116 return 0; 1117 1118 result[i] = slots[i]; 1119 } 1120 1121 alu = slots[i]; 1122 num_once_inst += is_alu_once_inst(bc, alu); 1123 1124 /* don't reschedule NOPs */ 1125 if (is_nop_inst(bc, alu)) 1126 return 0; 1127 1128 /* Let's check dst gpr. */ 1129 if (alu->dst.rel) { 1130 if (have_mova) 1131 return 0; 1132 have_rel = 1; 1133 } 1134 1135 /* Let's check source gprs */ 1136 num_src = r600_bytecode_get_num_operands(bc, alu); 1137 for (src = 0; src < num_src; ++src) { 1138 if (alu->src[src].rel) { 1139 if (have_mova) 1140 return 0; 1141 have_rel = 1; 1142 } 1143 1144 /* Constants don't matter. */ 1145 if (!is_gpr(alu->src[src].sel)) 1146 continue; 1147 1148 for (j = 0; j < max_slots; ++j) { 1149 if (!prev[j] || !(prev[j]->dst.write || prev[j]->is_op3)) 1150 continue; 1151 1152 /* If it's relative then we can't determin which gpr is really used. */ 1153 if (prev[j]->dst.chan == alu->src[src].chan && 1154 (prev[j]->dst.sel == alu->src[src].sel || 1155 prev[j]->dst.rel || alu->src[src].rel)) 1156 return 0; 1157 } 1158 } 1159 } 1160 1161 /* more than one PRED_ or KILL_ ? */ 1162 if (num_once_inst > 1) 1163 return 0; 1164 1165 /* check if the result can still be swizzlet */ 1166 r = check_and_set_bank_swizzle(bc, result); 1167 if (r) 1168 return 0; 1169 1170 /* looks like everything worked out right, apply the changes */ 1171 1172 /* undo adding previus literals */ 1173 bc->cf_last->ndw -= align(prev_nliteral, 2); 1174 1175 /* sort instructions */ 1176 for (i = 0; i < max_slots; ++i) { 1177 slots[i] = result[i]; 1178 if (result[i]) { 1179 LIST_DEL(&result[i]->list); 1180 result[i]->last = 0; 1181 LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu); 1182 } 1183 } 1184 1185 /* determine new last instruction */ 1186 LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1; 1187 1188 /* determine new first instruction */ 1189 for (i = 0; i < max_slots; ++i) { 1190 if (result[i]) { 1191 bc->cf_last->curr_bs_head = result[i]; 1192 break; 1193 } 1194 } 1195 1196 bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head; 1197 bc->cf_last->prev2_bs_head = NULL; 1198 1199 return 0; 1200 } 1201 1202 /* we'll keep kcache sets sorted by bank & addr */ 1203 static int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc, 1204 struct r600_bytecode_kcache *kcache, 1205 unsigned bank, unsigned line) 1206 { 1207 int i, kcache_banks = bc->chip_class >= EVERGREEN ? 4 : 2; 1208 1209 for (i = 0; i < kcache_banks; i++) { 1210 if (kcache[i].mode) { 1211 int d; 1212 1213 if (kcache[i].bank < bank) 1214 continue; 1215 1216 if ((kcache[i].bank == bank && kcache[i].addr > line+1) || 1217 kcache[i].bank > bank) { 1218 /* try to insert new line */ 1219 if (kcache[kcache_banks-1].mode) { 1220 /* all sets are in use */ 1221 return -ENOMEM; 1222 } 1223 1224 memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache)); 1225 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 1226 kcache[i].bank = bank; 1227 kcache[i].addr = line; 1228 return 0; 1229 } 1230 1231 d = line - kcache[i].addr; 1232 1233 if (d == -1) { 1234 kcache[i].addr--; 1235 if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) { 1236 /* we are prepending the line to the current set, 1237 * discarding the existing second line, 1238 * so we'll have to insert line+2 after it */ 1239 line += 2; 1240 continue; 1241 } else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) { 1242 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1243 return 0; 1244 } else { 1245 /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */ 1246 return -ENOMEM; 1247 } 1248 } else if (d == 1) { 1249 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1250 return 0; 1251 } else if (d == 0) 1252 return 0; 1253 } else { /* free kcache set - use it */ 1254 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 1255 kcache[i].bank = bank; 1256 kcache[i].addr = line; 1257 return 0; 1258 } 1259 } 1260 return -ENOMEM; 1261 } 1262 1263 static int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc, 1264 struct r600_bytecode_kcache *kcache, 1265 struct r600_bytecode_alu *alu) 1266 { 1267 int i, r; 1268 1269 for (i = 0; i < 3; i++) { 1270 unsigned bank, line, sel = alu->src[i].sel; 1271 1272 if (sel < 512) 1273 continue; 1274 1275 bank = alu->src[i].kc_bank; 1276 line = (sel-512)>>4; 1277 1278 if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line))) 1279 return r; 1280 } 1281 return 0; 1282 } 1283 1284 static int r600_bytecode_assign_kcache_banks(struct r600_bytecode *bc, 1285 struct r600_bytecode_alu *alu, 1286 struct r600_bytecode_kcache * kcache) 1287 { 1288 int i, j; 1289 1290 /* Alter the src operands to refer to the kcache. */ 1291 for (i = 0; i < 3; ++i) { 1292 static const unsigned int base[] = {128, 160, 256, 288}; 1293 unsigned int line, sel = alu->src[i].sel, found = 0; 1294 1295 if (sel < 512) 1296 continue; 1297 1298 sel -= 512; 1299 line = sel>>4; 1300 1301 for (j = 0; j < 4 && !found; ++j) { 1302 switch (kcache[j].mode) { 1303 case V_SQ_CF_KCACHE_NOP: 1304 case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX: 1305 R600_ERR("unexpected kcache line mode\n"); 1306 return -ENOMEM; 1307 default: 1308 if (kcache[j].bank == alu->src[i].kc_bank && 1309 kcache[j].addr <= line && 1310 line < kcache[j].addr + kcache[j].mode) { 1311 alu->src[i].sel = sel - (kcache[j].addr<<4); 1312 alu->src[i].sel += base[j]; 1313 found=1; 1314 } 1315 } 1316 } 1317 } 1318 return 0; 1319 } 1320 1321 static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, int type) 1322 { 1323 struct r600_bytecode_kcache kcache_sets[4]; 1324 struct r600_bytecode_kcache *kcache = kcache_sets; 1325 int r; 1326 1327 memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache)); 1328 1329 if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1330 /* can't alloc, need to start new clause */ 1331 if ((r = r600_bytecode_add_cf(bc))) { 1332 return r; 1333 } 1334 bc->cf_last->inst = type; 1335 1336 /* retry with the new clause */ 1337 kcache = bc->cf_last->kcache; 1338 if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1339 /* can't alloc again- should never happen */ 1340 return r; 1341 } 1342 } else { 1343 /* update kcache sets */ 1344 memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache)); 1345 } 1346 1347 /* if we actually used more than 2 kcache sets - use ALU_EXTENDED on eg+ */ 1348 if (kcache[2].mode != V_SQ_CF_KCACHE_NOP) { 1349 if (bc->chip_class < EVERGREEN) 1350 return -ENOMEM; 1351 bc->cf_last->eg_alu_extended = 1; 1352 } 1353 1354 return 0; 1355 } 1356 1357 static int insert_nop_r6xx(struct r600_bytecode *bc) 1358 { 1359 struct r600_bytecode_alu alu; 1360 int r, i; 1361 1362 for (i = 0; i < 4; i++) { 1363 memset(&alu, 0, sizeof(alu)); 1364 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 1365 alu.src[0].chan = i; 1366 alu.dst.chan = i; 1367 alu.last = (i == 3); 1368 r = r600_bytecode_add_alu(bc, &alu); 1369 if (r) 1370 return r; 1371 } 1372 return 0; 1373 } 1374 1375 /* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1376 static int load_ar_r6xx(struct r600_bytecode *bc) 1377 { 1378 struct r600_bytecode_alu alu; 1379 int r; 1380 1381 if (bc->ar_loaded) 1382 return 0; 1383 1384 /* hack to avoid making MOVA the last instruction in the clause */ 1385 if ((bc->cf_last->ndw>>1) >= 110) 1386 bc->force_add_cf = 1; 1387 1388 memset(&alu, 0, sizeof(alu)); 1389 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT; 1390 alu.src[0].sel = bc->ar_reg; 1391 alu.last = 1; 1392 alu.index_mode = INDEX_MODE_LOOP; 1393 r = r600_bytecode_add_alu(bc, &alu); 1394 if (r) 1395 return r; 1396 1397 /* no requirement to set uses waterfall on MOVA_GPR_INT */ 1398 bc->ar_loaded = 1; 1399 return 0; 1400 } 1401 1402 /* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1403 static int load_ar(struct r600_bytecode *bc) 1404 { 1405 struct r600_bytecode_alu alu; 1406 int r; 1407 1408 if (bc->ar_handling) 1409 return load_ar_r6xx(bc); 1410 1411 if (bc->ar_loaded) 1412 return 0; 1413 1414 /* hack to avoid making MOVA the last instruction in the clause */ 1415 if ((bc->cf_last->ndw>>1) >= 110) 1416 bc->force_add_cf = 1; 1417 1418 memset(&alu, 0, sizeof(alu)); 1419 alu.inst = BC_INST(bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); 1420 alu.src[0].sel = bc->ar_reg; 1421 alu.last = 1; 1422 r = r600_bytecode_add_alu(bc, &alu); 1423 if (r) 1424 return r; 1425 1426 bc->cf_last->r6xx_uses_waterfall = 1; 1427 bc->ar_loaded = 1; 1428 return 0; 1429 } 1430 1431 int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type) 1432 { 1433 struct r600_bytecode_alu *nalu = r600_bytecode_alu(); 1434 struct r600_bytecode_alu *lalu; 1435 int i, r; 1436 1437 if (nalu == NULL) 1438 return -ENOMEM; 1439 memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); 1440 1441 if (bc->cf_last != NULL && bc->cf_last->inst != type) { 1442 /* check if we could add it anyway */ 1443 if (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) && 1444 type == BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)) { 1445 LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { 1446 if (lalu->execute_mask) { 1447 bc->force_add_cf = 1; 1448 break; 1449 } 1450 } 1451 } else 1452 bc->force_add_cf = 1; 1453 } 1454 1455 /* cf can contains only alu or only vtx or only tex */ 1456 if (bc->cf_last == NULL || bc->force_add_cf) { 1457 r = r600_bytecode_add_cf(bc); 1458 if (r) { 1459 free(nalu); 1460 return r; 1461 } 1462 } 1463 bc->cf_last->inst = type; 1464 1465 /* Check AR usage and load it if required */ 1466 for (i = 0; i < 3; i++) 1467 if (nalu->src[i].rel && !bc->ar_loaded) 1468 load_ar(bc); 1469 1470 if (nalu->dst.rel && !bc->ar_loaded) 1471 load_ar(bc); 1472 1473 /* Setup the kcache for this ALU instruction. This will start a new 1474 * ALU clause if needed. */ 1475 if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) { 1476 free(nalu); 1477 return r; 1478 } 1479 1480 if (!bc->cf_last->curr_bs_head) { 1481 bc->cf_last->curr_bs_head = nalu; 1482 } 1483 /* number of gpr == the last gpr used in any alu */ 1484 for (i = 0; i < 3; i++) { 1485 if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { 1486 bc->ngpr = nalu->src[i].sel + 1; 1487 } 1488 if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) 1489 r600_bytecode_special_constants(nalu->src[i].value, 1490 &nalu->src[i].sel, &nalu->src[i].neg); 1491 } 1492 if (nalu->dst.sel >= bc->ngpr) { 1493 bc->ngpr = nalu->dst.sel + 1; 1494 } 1495 LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); 1496 /* each alu use 2 dwords */ 1497 bc->cf_last->ndw += 2; 1498 bc->ndw += 2; 1499 1500 /* process cur ALU instructions for bank swizzle */ 1501 if (nalu->last) { 1502 uint32_t literal[4]; 1503 unsigned nliteral; 1504 struct r600_bytecode_alu *slots[5]; 1505 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 1506 r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); 1507 if (r) 1508 return r; 1509 1510 if (bc->cf_last->prev_bs_head) { 1511 r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head); 1512 if (r) 1513 return r; 1514 } 1515 1516 if (bc->cf_last->prev_bs_head) { 1517 r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head); 1518 if (r) 1519 return r; 1520 } 1521 1522 r = check_and_set_bank_swizzle(bc, slots); 1523 if (r) 1524 return r; 1525 1526 for (i = 0, nliteral = 0; i < max_slots; i++) { 1527 if (slots[i]) { 1528 r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral); 1529 if (r) 1530 return r; 1531 } 1532 } 1533 bc->cf_last->ndw += align(nliteral, 2); 1534 1535 /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) 1536 * worst case */ 1537 if ((bc->cf_last->ndw >> 1) >= 120) { 1538 bc->force_add_cf = 1; 1539 } 1540 1541 bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; 1542 bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; 1543 bc->cf_last->curr_bs_head = NULL; 1544 } 1545 1546 if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst) 1547 insert_nop_r6xx(bc); 1548 1549 return 0; 1550 } 1551 1552 int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu) 1553 { 1554 return r600_bytecode_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 1555 } 1556 1557 static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc) 1558 { 1559 switch (bc->chip_class) { 1560 case R600: 1561 return 8; 1562 1563 case R700: 1564 case EVERGREEN: 1565 case CAYMAN: 1566 return 16; 1567 1568 default: 1569 R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1570 return 8; 1571 } 1572 } 1573 1574 static inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc) 1575 { 1576 switch (bc->chip_class) { 1577 case R700: 1578 case R600: 1579 return bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && 1580 bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC; 1581 case EVERGREEN: 1582 return bc->cf_last->inst != EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX; 1583 case CAYMAN: 1584 return bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; 1585 default: 1586 R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1587 return FALSE; 1588 } 1589 } 1590 1591 int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) 1592 { 1593 struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); 1594 int r; 1595 1596 if (nvtx == NULL) 1597 return -ENOMEM; 1598 memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); 1599 1600 /* cf can contains only alu or only vtx or only tex */ 1601 if (bc->cf_last == NULL || 1602 last_inst_was_not_vtx_fetch(bc) || 1603 bc->force_add_cf) { 1604 r = r600_bytecode_add_cf(bc); 1605 if (r) { 1606 free(nvtx); 1607 return r; 1608 } 1609 switch (bc->chip_class) { 1610 case R600: 1611 case R700: 1612 bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; 1613 break; 1614 case EVERGREEN: 1615 bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX; 1616 break; 1617 case CAYMAN: 1618 bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; 1619 break; 1620 default: 1621 R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1622 return -EINVAL; 1623 } 1624 } 1625 LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); 1626 /* each fetch use 4 dwords */ 1627 bc->cf_last->ndw += 4; 1628 bc->ndw += 4; 1629 if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1630 bc->force_add_cf = 1; 1631 1632 bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1); 1633 bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1); 1634 1635 return 0; 1636 } 1637 1638 int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex) 1639 { 1640 struct r600_bytecode_tex *ntex = r600_bytecode_tex(); 1641 int r; 1642 1643 if (ntex == NULL) 1644 return -ENOMEM; 1645 memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); 1646 1647 /* we can't fetch data und use it as texture lookup address in the same TEX clause */ 1648 if (bc->cf_last != NULL && 1649 bc->cf_last->inst == BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX)) { 1650 struct r600_bytecode_tex *ttex; 1651 LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { 1652 if (ttex->dst_gpr == ntex->src_gpr) { 1653 bc->force_add_cf = 1; 1654 break; 1655 } 1656 } 1657 /* slight hack to make gradients always go into same cf */ 1658 if (ntex->inst == SQ_TEX_INST_SET_GRADIENTS_H) 1659 bc->force_add_cf = 1; 1660 } 1661 1662 /* cf can contains only alu or only vtx or only tex */ 1663 if (bc->cf_last == NULL || 1664 bc->cf_last->inst != BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX) || 1665 bc->force_add_cf) { 1666 r = r600_bytecode_add_cf(bc); 1667 if (r) { 1668 free(ntex); 1669 return r; 1670 } 1671 bc->cf_last->inst = BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX); 1672 } 1673 if (ntex->src_gpr >= bc->ngpr) { 1674 bc->ngpr = ntex->src_gpr + 1; 1675 } 1676 if (ntex->dst_gpr >= bc->ngpr) { 1677 bc->ngpr = ntex->dst_gpr + 1; 1678 } 1679 LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); 1680 /* each texture fetch use 4 dwords */ 1681 bc->cf_last->ndw += 4; 1682 bc->ndw += 4; 1683 if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1684 bc->force_add_cf = 1; 1685 return 0; 1686 } 1687 1688 int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst) 1689 { 1690 int r; 1691 r = r600_bytecode_add_cf(bc); 1692 if (r) 1693 return r; 1694 1695 bc->cf_last->cond = V_SQ_CF_COND_ACTIVE; 1696 bc->cf_last->inst = inst; 1697 return 0; 1698 } 1699 1700 int cm_bytecode_add_cf_end(struct r600_bytecode *bc) 1701 { 1702 return r600_bytecode_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); 1703 } 1704 1705 /* common to all 3 families */ 1706 static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) 1707 { 1708 bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | 1709 S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | 1710 S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | 1711 S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); 1712 if (bc->chip_class < CAYMAN) 1713 bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); 1714 id++; 1715 bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | 1716 S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | 1717 S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | 1718 S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | 1719 S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) | 1720 S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) | 1721 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) | 1722 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | 1723 S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | 1724 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); 1725 bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| 1726 S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); 1727 if (bc->chip_class < CAYMAN) 1728 bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); 1729 id++; 1730 bc->bytecode[id++] = 0; 1731 return 0; 1732 } 1733 1734 /* common to all 3 families */ 1735 static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) 1736 { 1737 bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | 1738 S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | 1739 S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | 1740 S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); 1741 bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | 1742 S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | 1743 S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | 1744 S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) | 1745 S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) | 1746 S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) | 1747 S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) | 1748 S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) | 1749 S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) | 1750 S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) | 1751 S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w); 1752 bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) | 1753 S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) | 1754 S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) | 1755 S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) | 1756 S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) | 1757 S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) | 1758 S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) | 1759 S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w); 1760 bc->bytecode[id++] = 0; 1761 return 0; 1762 } 1763 1764 /* r600 only, r700/eg bits in r700_asm.c */ 1765 static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) 1766 { 1767 /* don't replace gpr by pv or ps for destination register */ 1768 bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | 1769 S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | 1770 S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | 1771 S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | 1772 S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | 1773 S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | 1774 S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | 1775 S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | 1776 S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) | 1777 S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) | 1778 S_SQ_ALU_WORD0_LAST(alu->last); 1779 1780 if (alu->is_op3) { 1781 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 1782 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 1783 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 1784 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 1785 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | 1786 S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | 1787 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | 1788 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | 1789 S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | 1790 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle); 1791 } else { 1792 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 1793 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 1794 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 1795 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 1796 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | 1797 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | 1798 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | 1799 S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | 1800 S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | 1801 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | 1802 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) | 1803 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred); 1804 } 1805 return 0; 1806 } 1807 1808 static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) 1809 { 1810 *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); 1811 *bytecode++ = cf->inst | 1812 S_SQ_CF_WORD1_BARRIER(1) | 1813 S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); 1814 } 1815 1816 /* common for r600/r700 - eg in eg_asm.c */ 1817 static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) 1818 { 1819 unsigned id = cf->id; 1820 1821 switch (cf->inst) { 1822 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 1823 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 1824 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 1825 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 1826 bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | 1827 S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | 1828 S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | 1829 S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); 1830 1831 bc->bytecode[id++] = cf->inst | 1832 S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | 1833 S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | 1834 S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | 1835 S_SQ_CF_ALU_WORD1_BARRIER(1) | 1836 S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) | 1837 S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); 1838 break; 1839 case V_SQ_CF_WORD1_SQ_CF_INST_TEX: 1840 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 1841 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 1842 if (bc->chip_class == R700) 1843 r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1844 else 1845 r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1846 break; 1847 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 1848 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 1849 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1850 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1851 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1852 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); 1853 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1854 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | 1855 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | 1856 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | 1857 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | 1858 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | 1859 cf->output.inst | 1860 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); 1861 break; 1862 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0: 1863 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1: 1864 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: 1865 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: 1866 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1867 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1868 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1869 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); 1870 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1871 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | 1872 cf->output.inst | 1873 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program) | 1874 S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) | 1875 S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask); 1876 break; 1877 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 1878 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 1879 case V_SQ_CF_WORD1_SQ_CF_INST_POP: 1880 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 1881 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: 1882 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 1883 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 1884 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 1885 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 1886 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 1887 bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); 1888 bc->bytecode[id++] = cf->inst | 1889 S_SQ_CF_WORD1_BARRIER(1) | 1890 S_SQ_CF_WORD1_COND(cf->cond) | 1891 S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); 1892 1893 break; 1894 default: 1895 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 1896 return -EINVAL; 1897 } 1898 return 0; 1899 } 1900 1901 int r600_bytecode_build(struct r600_bytecode *bc) 1902 { 1903 struct r600_bytecode_cf *cf; 1904 struct r600_bytecode_alu *alu; 1905 struct r600_bytecode_vtx *vtx; 1906 struct r600_bytecode_tex *tex; 1907 uint32_t literal[4]; 1908 unsigned nliteral; 1909 unsigned addr; 1910 int i, r; 1911 1912 if (bc->callstack[0].max > 0) 1913 bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; 1914 if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) { 1915 bc->nstack = 1; 1916 } 1917 1918 /* first path compute addr of each CF block */ 1919 /* addr start after all the CF instructions */ 1920 addr = bc->cf_last->id + 2; 1921 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1922 if (bc->chip_class >= EVERGREEN) { 1923 switch (cf->inst) { 1924 case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: 1925 case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: 1926 /* fetch node need to be 16 bytes aligned*/ 1927 addr += 3; 1928 addr &= 0xFFFFFFFCUL; 1929 break; 1930 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 1931 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 1932 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 1933 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 1934 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 1935 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 1936 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0: 1937 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1: 1938 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2: 1939 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3: 1940 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0: 1941 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1: 1942 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2: 1943 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3: 1944 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0: 1945 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1: 1946 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2: 1947 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3: 1948 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0: 1949 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1: 1950 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2: 1951 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3: 1952 case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 1953 case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 1954 case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: 1955 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 1956 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: 1957 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 1958 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 1959 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 1960 case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 1961 case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 1962 case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: 1963 case CF_NATIVE: 1964 break; 1965 default: 1966 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 1967 return -EINVAL; 1968 } 1969 } else { 1970 switch (cf->inst) { 1971 case V_SQ_CF_WORD1_SQ_CF_INST_TEX: 1972 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 1973 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 1974 /* fetch node need to be 16 bytes aligned*/ 1975 addr += 3; 1976 addr &= 0xFFFFFFFCUL; 1977 break; 1978 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 1979 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 1980 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 1981 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 1982 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 1983 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 1984 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0: 1985 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1: 1986 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: 1987 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: 1988 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 1989 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 1990 case V_SQ_CF_WORD1_SQ_CF_INST_POP: 1991 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: 1992 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 1993 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 1994 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 1995 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 1996 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 1997 break; 1998 default: 1999 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 2000 return -EINVAL; 2001 } 2002 } 2003 cf->addr = addr; 2004 addr += cf->ndw; 2005 bc->ndw = cf->addr + cf->ndw; 2006 } 2007 free(bc->bytecode); 2008 bc->bytecode = calloc(1, bc->ndw * 4); 2009 if (bc->bytecode == NULL) 2010 return -ENOMEM; 2011 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 2012 addr = cf->addr; 2013 if (bc->chip_class >= EVERGREEN) { 2014 r = eg_bytecode_cf_build(bc, cf); 2015 if (r) 2016 return r; 2017 2018 switch (cf->inst) { 2019 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 2020 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 2021 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 2022 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 2023 nliteral = 0; 2024 memset(literal, 0, sizeof(literal)); 2025 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2026 r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); 2027 if (r) 2028 return r; 2029 r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral); 2030 r600_bytecode_assign_kcache_banks(bc, alu, cf->kcache); 2031 2032 switch(bc->chip_class) { 2033 case EVERGREEN: /* eg alu is same encoding as r700 */ 2034 case CAYMAN: 2035 r = r700_bytecode_alu_build(bc, alu, addr); 2036 break; 2037 default: 2038 R600_ERR("unknown chip class %d.\n", bc->chip_class); 2039 return -EINVAL; 2040 } 2041 if (r) 2042 return r; 2043 addr += 2; 2044 if (alu->last) { 2045 for (i = 0; i < align(nliteral, 2); ++i) { 2046 bc->bytecode[addr++] = literal[i]; 2047 } 2048 nliteral = 0; 2049 memset(literal, 0, sizeof(literal)); 2050 } 2051 } 2052 break; 2053 case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: 2054 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2055 r = r600_bytecode_vtx_build(bc, vtx, addr); 2056 if (r) 2057 return r; 2058 addr += 4; 2059 } 2060 break; 2061 case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: 2062 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2063 assert(bc->chip_class >= EVERGREEN); 2064 r = r600_bytecode_vtx_build(bc, vtx, addr); 2065 if (r) 2066 return r; 2067 addr += 4; 2068 } 2069 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2070 r = r600_bytecode_tex_build(bc, tex, addr); 2071 if (r) 2072 return r; 2073 addr += 4; 2074 } 2075 break; 2076 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 2077 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 2078 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0: 2079 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1: 2080 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2: 2081 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3: 2082 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0: 2083 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1: 2084 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2: 2085 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3: 2086 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0: 2087 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1: 2088 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2: 2089 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3: 2090 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0: 2091 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1: 2092 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2: 2093 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3: 2094 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: 2095 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 2096 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 2097 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 2098 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 2099 case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 2100 case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 2101 case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: 2102 case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 2103 case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 2104 case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: 2105 break; 2106 case CF_NATIVE: 2107 break; 2108 default: 2109 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 2110 return -EINVAL; 2111 } 2112 } else { 2113 r = r600_bytecode_cf_build(bc, cf); 2114 if (r) 2115 return r; 2116 2117 switch (cf->inst) { 2118 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 2119 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 2120 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 2121 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 2122 nliteral = 0; 2123 memset(literal, 0, sizeof(literal)); 2124 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2125 r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); 2126 if (r) 2127 return r; 2128 r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral); 2129 r600_bytecode_assign_kcache_banks(bc, alu, cf->kcache); 2130 2131 switch(bc->chip_class) { 2132 case R600: 2133 r = r600_bytecode_alu_build(bc, alu, addr); 2134 break; 2135 case R700: 2136 r = r700_bytecode_alu_build(bc, alu, addr); 2137 break; 2138 default: 2139 R600_ERR("unknown chip class %d.\n", bc->chip_class); 2140 return -EINVAL; 2141 } 2142 if (r) 2143 return r; 2144 addr += 2; 2145 if (alu->last) { 2146 for (i = 0; i < align(nliteral, 2); ++i) { 2147 bc->bytecode[addr++] = literal[i]; 2148 } 2149 nliteral = 0; 2150 memset(literal, 0, sizeof(literal)); 2151 } 2152 } 2153 break; 2154 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 2155 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 2156 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2157 r = r600_bytecode_vtx_build(bc, vtx, addr); 2158 if (r) 2159 return r; 2160 addr += 4; 2161 } 2162 break; 2163 case V_SQ_CF_WORD1_SQ_CF_INST_TEX: 2164 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2165 r = r600_bytecode_tex_build(bc, tex, addr); 2166 if (r) 2167 return r; 2168 addr += 4; 2169 } 2170 break; 2171 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 2172 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 2173 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0: 2174 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1: 2175 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: 2176 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: 2177 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 2178 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: 2179 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 2180 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 2181 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 2182 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 2183 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 2184 case V_SQ_CF_WORD1_SQ_CF_INST_POP: 2185 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 2186 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 2187 break; 2188 default: 2189 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 2190 return -EINVAL; 2191 } 2192 } 2193 } 2194 return 0; 2195 } 2196 2197 void r600_bytecode_clear(struct r600_bytecode *bc) 2198 { 2199 struct r600_bytecode_cf *cf = NULL, *next_cf; 2200 2201 free(bc->bytecode); 2202 bc->bytecode = NULL; 2203 2204 LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { 2205 struct r600_bytecode_alu *alu = NULL, *next_alu; 2206 struct r600_bytecode_tex *tex = NULL, *next_tex; 2207 struct r600_bytecode_tex *vtx = NULL, *next_vtx; 2208 2209 LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { 2210 free(alu); 2211 } 2212 2213 LIST_INITHEAD(&cf->alu); 2214 2215 LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) { 2216 free(tex); 2217 } 2218 2219 LIST_INITHEAD(&cf->tex); 2220 2221 LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) { 2222 free(vtx); 2223 } 2224 2225 LIST_INITHEAD(&cf->vtx); 2226 2227 free(cf); 2228 } 2229 2230 LIST_INITHEAD(&cf->list); 2231 } 2232 2233 void r600_bytecode_dump(struct r600_bytecode *bc) 2234 { 2235 struct r600_bytecode_cf *cf = NULL; 2236 struct r600_bytecode_alu *alu = NULL; 2237 struct r600_bytecode_vtx *vtx = NULL; 2238 struct r600_bytecode_tex *tex = NULL; 2239 2240 unsigned i, id; 2241 uint32_t literal[4]; 2242 unsigned nliteral; 2243 char chip = '6'; 2244 2245 switch (bc->chip_class) { 2246 case R700: 2247 chip = '7'; 2248 break; 2249 case EVERGREEN: 2250 chip = 'E'; 2251 break; 2252 case CAYMAN: 2253 chip = 'C'; 2254 break; 2255 case R600: 2256 default: 2257 chip = '6'; 2258 break; 2259 } 2260 fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr); 2261 fprintf(stderr, " %c\n", chip); 2262 2263 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 2264 id = cf->id; 2265 2266 if (bc->chip_class >= EVERGREEN) { 2267 switch (cf->inst) { 2268 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 2269 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 2270 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 2271 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 2272 if (cf->eg_alu_extended) { 2273 fprintf(stderr, "%04d %08X ALU_EXT0 ", id, bc->bytecode[id]); 2274 fprintf(stderr, "KCACHE_BANK2:%X ", cf->kcache[2].bank); 2275 fprintf(stderr, "KCACHE_BANK3:%X ", cf->kcache[3].bank); 2276 fprintf(stderr, "KCACHE_MODE2:%X\n", cf->kcache[2].mode); 2277 id++; 2278 fprintf(stderr, "%04d %08X ALU_EXT1 ", id, bc->bytecode[id]); 2279 fprintf(stderr, "KCACHE_MODE3:%X ", cf->kcache[3].mode); 2280 fprintf(stderr, "KCACHE_ADDR2:%X ", cf->kcache[2].addr); 2281 fprintf(stderr, "KCACHE_ADDR3:%X\n", cf->kcache[3].addr); 2282 id++; 2283 } 2284 2285 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); 2286 fprintf(stderr, "ADDR:%d ", cf->addr); 2287 fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode); 2288 fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank); 2289 fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank); 2290 id++; 2291 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); 2292 fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_ALU_WORD1_CF_INST(cf->inst)); 2293 fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode); 2294 fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr); 2295 fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr); 2296 fprintf(stderr, "COUNT:%d\n", cf->ndw / 2); 2297 break; 2298 case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: 2299 case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: 2300 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); 2301 fprintf(stderr, "ADDR:%d\n", cf->addr); 2302 id++; 2303 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); 2304 fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_WORD1_CF_INST(cf->inst)); 2305 fprintf(stderr, "COUNT:%d\n", cf->ndw / 4); 2306 break; 2307 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 2308 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 2309 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); 2310 fprintf(stderr, "GPR:%X ", cf->output.gpr); 2311 fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); 2312 fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base); 2313 fprintf(stderr, "TYPE:%X\n", cf->output.type); 2314 id++; 2315 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); 2316 fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x); 2317 fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); 2318 fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); 2319 fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); 2320 fprintf(stderr, "BARRIER:%X ", cf->output.barrier); 2321 fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst)); 2322 fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); 2323 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); 2324 break; 2325 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0: 2326 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1: 2327 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2: 2328 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3: 2329 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0: 2330 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1: 2331 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2: 2332 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3: 2333 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0: 2334 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1: 2335 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2: 2336 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3: 2337 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0: 2338 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1: 2339 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2: 2340 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3: 2341 fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i_BUF%i ", id, bc->bytecode[id], 2342 (EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2343 EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) / 4, 2344 (EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2345 EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) % 4); 2346 fprintf(stderr, "GPR:%X ", cf->output.gpr); 2347 fprintf(stderr, "ELEM_SIZE:%i ", cf->output.elem_size); 2348 fprintf(stderr, "ARRAY_BASE:%i ", cf->output.array_base); 2349 fprintf(stderr, "TYPE:%X\n", cf->output.type); 2350 id++; 2351 fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i_BUF%i ", id, bc->bytecode[id], 2352 (EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2353 EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) / 4, 2354 (EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2355 EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) % 4); 2356 fprintf(stderr, "ARRAY_SIZE:%i ", cf->output.array_size); 2357 fprintf(stderr, "COMP_MASK:%X ", cf->output.comp_mask); 2358 fprintf(stderr, "BARRIER:%X ", cf->output.barrier); 2359 fprintf(stderr, "INST:%d ", cf->output.inst); 2360 fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); 2361 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); 2362 break; 2363 case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 2364 case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 2365 case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: 2366 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 2367 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: 2368 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 2369 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 2370 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 2371 case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 2372 case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 2373 case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: 2374 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); 2375 fprintf(stderr, "ADDR:%d\n", cf->cf_addr); 2376 id++; 2377 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); 2378 fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_WORD1_CF_INST(cf->inst)); 2379 fprintf(stderr, "COND:%X ", cf->cond); 2380 fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); 2381 break; 2382 case CF_NATIVE: 2383 fprintf(stderr, "%04d %08X CF NATIVE\n", id, bc->bytecode[id]); 2384 fprintf(stderr, "%04d %08X CF NATIVE\n", id + 1, bc->bytecode[id + 1]); 2385 break; 2386 default: 2387 R600_ERR("Unknown instruction %0x\n", cf->inst); 2388 } 2389 } else { 2390 switch (cf->inst) { 2391 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 2392 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 2393 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 2394 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 2395 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); 2396 fprintf(stderr, "ADDR:%d ", cf->addr); 2397 fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode); 2398 fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank); 2399 fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank); 2400 id++; 2401 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); 2402 fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_ALU_WORD1_CF_INST(cf->inst)); 2403 fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode); 2404 fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr); 2405 fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr); 2406 fprintf(stderr, "COUNT:%d\n", cf->ndw / 2); 2407 break; 2408 case V_SQ_CF_WORD1_SQ_CF_INST_TEX: 2409 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 2410 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 2411 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); 2412 fprintf(stderr, "ADDR:%d\n", cf->addr); 2413 id++; 2414 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); 2415 fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_WORD1_CF_INST(cf->inst)); 2416 fprintf(stderr, "COUNT:%d\n", cf->ndw / 4); 2417 break; 2418 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 2419 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 2420 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); 2421 fprintf(stderr, "GPR:%X ", cf->output.gpr); 2422 fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); 2423 fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base); 2424 fprintf(stderr, "TYPE:%X\n", cf->output.type); 2425 id++; 2426 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); 2427 fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x); 2428 fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); 2429 fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); 2430 fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); 2431 fprintf(stderr, "BARRIER:%X ", cf->output.barrier); 2432 fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst)); 2433 fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); 2434 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); 2435 break; 2436 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0: 2437 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1: 2438 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: 2439 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: 2440 fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i ", id, bc->bytecode[id], 2441 R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2442 R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0)); 2443 fprintf(stderr, "GPR:%X ", cf->output.gpr); 2444 fprintf(stderr, "ELEM_SIZE:%i ", cf->output.elem_size); 2445 fprintf(stderr, "ARRAY_BASE:%i ", cf->output.array_base); 2446 fprintf(stderr, "TYPE:%X\n", cf->output.type); 2447 id++; 2448 fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i ", id, bc->bytecode[id], 2449 R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2450 R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0)); 2451 fprintf(stderr, "ARRAY_SIZE:%i ", cf->output.array_size); 2452 fprintf(stderr, "COMP_MASK:%X ", cf->output.comp_mask); 2453 fprintf(stderr, "BARRIER:%X ", cf->output.barrier); 2454 fprintf(stderr, "INST:%d ", cf->output.inst); 2455 fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); 2456 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); 2457 break; 2458 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 2459 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 2460 case V_SQ_CF_WORD1_SQ_CF_INST_POP: 2461 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 2462 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: 2463 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 2464 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 2465 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 2466 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 2467 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 2468 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); 2469 fprintf(stderr, "ADDR:%d\n", cf->cf_addr); 2470 id++; 2471 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); 2472 fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_WORD1_CF_INST(cf->inst)); 2473 fprintf(stderr, "COND:%X ", cf->cond); 2474 fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); 2475 break; 2476 default: 2477 R600_ERR("Unknown instruction %0x\n", cf->inst); 2478 } 2479 } 2480 2481 id = cf->addr; 2482 nliteral = 0; 2483 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2484 r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); 2485 2486 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2487 fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); 2488 fprintf(stderr, "REL:%d ", alu->src[0].rel); 2489 fprintf(stderr, "CHAN:%d ", alu->src[0].chan); 2490 fprintf(stderr, "NEG:%d) ", alu->src[0].neg); 2491 fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel); 2492 fprintf(stderr, "REL:%d ", alu->src[1].rel); 2493 fprintf(stderr, "CHAN:%d ", alu->src[1].chan); 2494 fprintf(stderr, "NEG:%d ", alu->src[1].neg); 2495 fprintf(stderr, "IM:%d) ", alu->index_mode); 2496 fprintf(stderr, "PRED_SEL:%d ", alu->pred_sel); 2497 fprintf(stderr, "LAST:%d)\n", alu->last); 2498 id++; 2499 fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' '); 2500 fprintf(stderr, "INST:0x%x ", alu->inst); 2501 fprintf(stderr, "DST(SEL:%d ", alu->dst.sel); 2502 fprintf(stderr, "CHAN:%d ", alu->dst.chan); 2503 fprintf(stderr, "REL:%d ", alu->dst.rel); 2504 fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp); 2505 fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle); 2506 if (alu->is_op3) { 2507 fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel); 2508 fprintf(stderr, "REL:%d ", alu->src[2].rel); 2509 fprintf(stderr, "CHAN:%d ", alu->src[2].chan); 2510 fprintf(stderr, "NEG:%d)\n", alu->src[2].neg); 2511 } else { 2512 fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs); 2513 fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs); 2514 fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write); 2515 fprintf(stderr, "OMOD:%d ", alu->omod); 2516 fprintf(stderr, "EXECUTE_MASK:%d ", alu->execute_mask); 2517 fprintf(stderr, "UPDATE_PRED:%d\n", alu->update_pred); 2518 } 2519 2520 id++; 2521 if (alu->last) { 2522 for (i = 0; i < nliteral; i++, id++) { 2523 float *f = (float*)(bc->bytecode + id); 2524 fprintf(stderr, "%04d %08X\t%f (%d)\n", id, bc->bytecode[id], *f, 2525 *(bc->bytecode + id)); 2526 } 2527 id += nliteral & 1; 2528 nliteral = 0; 2529 } 2530 } 2531 2532 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2533 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2534 fprintf(stderr, "INST:0x%x ", tex->inst); 2535 fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id); 2536 fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr); 2537 fprintf(stderr, "REL:%d)\n", tex->src_rel); 2538 id++; 2539 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2540 fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr); 2541 fprintf(stderr, "REL:%d ", tex->dst_rel); 2542 fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x); 2543 fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y); 2544 fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z); 2545 fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w); 2546 fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias); 2547 fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x); 2548 fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y); 2549 fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z); 2550 fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w); 2551 id++; 2552 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2553 fprintf(stderr, "OFFSET_X:%d ", tex->offset_x); 2554 fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y); 2555 fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z); 2556 fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id); 2557 fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x); 2558 fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y); 2559 fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z); 2560 fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w); 2561 id++; 2562 fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); 2563 id++; 2564 } 2565 2566 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2567 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2568 fprintf(stderr, "INST:%d ", vtx->inst); 2569 fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type); 2570 fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id); 2571 id++; 2572 /* This assumes that no semantic fetches exist */ 2573 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2574 fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); 2575 fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); 2576 if (bc->chip_class < CAYMAN) 2577 fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); 2578 else 2579 fprintf(stderr, "SEL_Y:%d) ", 0); 2580 fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr); 2581 fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x); 2582 fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y); 2583 fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); 2584 fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); 2585 fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); 2586 fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format); 2587 fprintf(stderr, "NUM:%d ", vtx->num_format_all); 2588 fprintf(stderr, "COMP:%d ", vtx->format_comp_all); 2589 fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); 2590 id++; 2591 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2592 fprintf(stderr, "ENDIAN:%d ", vtx->endian); 2593 fprintf(stderr, "OFFSET:%d\n", vtx->offset); 2594 /* XXX */ 2595 id++; 2596 fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); 2597 id++; 2598 } 2599 } 2600 2601 fprintf(stderr, "--------------------------------------\n"); 2602 } 2603 2604 static void r600_vertex_data_type(enum pipe_format pformat, 2605 unsigned *format, 2606 unsigned *num_format, unsigned *format_comp, unsigned *endian) 2607 { 2608 const struct util_format_description *desc; 2609 unsigned i; 2610 2611 *format = 0; 2612 *num_format = 0; 2613 *format_comp = 0; 2614 *endian = ENDIAN_NONE; 2615 2616 desc = util_format_description(pformat); 2617 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { 2618 goto out_unknown; 2619 } 2620 2621 /* Find the first non-VOID channel. */ 2622 for (i = 0; i < 4; i++) { 2623 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2624 break; 2625 } 2626 } 2627 2628 *endian = r600_endian_swap(desc->channel[i].size); 2629 2630 switch (desc->channel[i].type) { 2631 /* Half-floats, floats, ints */ 2632 case UTIL_FORMAT_TYPE_FLOAT: 2633 switch (desc->channel[i].size) { 2634 case 16: 2635 switch (desc->nr_channels) { 2636 case 1: 2637 *format = FMT_16_FLOAT; 2638 break; 2639 case 2: 2640 *format = FMT_16_16_FLOAT; 2641 break; 2642 case 3: 2643 case 4: 2644 *format = FMT_16_16_16_16_FLOAT; 2645 break; 2646 } 2647 break; 2648 case 32: 2649 switch (desc->nr_channels) { 2650 case 1: 2651 *format = FMT_32_FLOAT; 2652 break; 2653 case 2: 2654 *format = FMT_32_32_FLOAT; 2655 break; 2656 case 3: 2657 *format = FMT_32_32_32_FLOAT; 2658 break; 2659 case 4: 2660 *format = FMT_32_32_32_32_FLOAT; 2661 break; 2662 } 2663 break; 2664 default: 2665 goto out_unknown; 2666 } 2667 break; 2668 /* Unsigned ints */ 2669 case UTIL_FORMAT_TYPE_UNSIGNED: 2670 /* Signed ints */ 2671 case UTIL_FORMAT_TYPE_SIGNED: 2672 switch (desc->channel[i].size) { 2673 case 8: 2674 switch (desc->nr_channels) { 2675 case 1: 2676 *format = FMT_8; 2677 break; 2678 case 2: 2679 *format = FMT_8_8; 2680 break; 2681 case 3: 2682 case 4: 2683 *format = FMT_8_8_8_8; 2684 break; 2685 } 2686 break; 2687 case 10: 2688 if (desc->nr_channels != 4) 2689 goto out_unknown; 2690 2691 *format = FMT_2_10_10_10; 2692 break; 2693 case 16: 2694 switch (desc->nr_channels) { 2695 case 1: 2696 *format = FMT_16; 2697 break; 2698 case 2: 2699 *format = FMT_16_16; 2700 break; 2701 case 3: 2702 case 4: 2703 *format = FMT_16_16_16_16; 2704 break; 2705 } 2706 break; 2707 case 32: 2708 switch (desc->nr_channels) { 2709 case 1: 2710 *format = FMT_32; 2711 break; 2712 case 2: 2713 *format = FMT_32_32; 2714 break; 2715 case 3: 2716 *format = FMT_32_32_32; 2717 break; 2718 case 4: 2719 *format = FMT_32_32_32_32; 2720 break; 2721 } 2722 break; 2723 default: 2724 goto out_unknown; 2725 } 2726 break; 2727 default: 2728 goto out_unknown; 2729 } 2730 2731 if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2732 *format_comp = 1; 2733 } 2734 2735 *num_format = 0; 2736 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || 2737 desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2738 if (!desc->channel[i].normalized) { 2739 if (desc->channel[i].pure_integer) 2740 *num_format = 1; 2741 else 2742 *num_format = 2; 2743 } 2744 } 2745 return; 2746 out_unknown: 2747 R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); 2748 } 2749 2750 int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r600_vertex_element *ve) 2751 { 2752 static int dump_shaders = -1; 2753 2754 struct r600_bytecode bc; 2755 struct r600_bytecode_vtx vtx; 2756 struct pipe_vertex_element *elements = ve->elements; 2757 const struct util_format_description *desc; 2758 unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; 2759 unsigned format, num_format, format_comp, endian; 2760 uint32_t *bytecode; 2761 int i, j, r; 2762 2763 memset(&bc, 0, sizeof(bc)); 2764 r600_bytecode_init(&bc, rctx->chip_class, rctx->family); 2765 2766 for (i = 0; i < ve->count; i++) { 2767 if (elements[i].instance_divisor > 1) { 2768 if (rctx->chip_class == CAYMAN) { 2769 for (j = 0; j < 4; j++) { 2770 struct r600_bytecode_alu alu; 2771 memset(&alu, 0, sizeof(alu)); 2772 alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2773 alu.src[0].sel = 0; 2774 alu.src[0].chan = 3; 2775 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2776 alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2777 alu.dst.sel = i + 1; 2778 alu.dst.chan = j; 2779 alu.dst.write = j == 3; 2780 alu.last = j == 3; 2781 if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2782 r600_bytecode_clear(&bc); 2783 return r; 2784 } 2785 } 2786 } else { 2787 struct r600_bytecode_alu alu; 2788 memset(&alu, 0, sizeof(alu)); 2789 alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2790 alu.src[0].sel = 0; 2791 alu.src[0].chan = 3; 2792 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2793 alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2794 alu.dst.sel = i + 1; 2795 alu.dst.chan = 3; 2796 alu.dst.write = 1; 2797 alu.last = 1; 2798 if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2799 r600_bytecode_clear(&bc); 2800 return r; 2801 } 2802 } 2803 } 2804 } 2805 2806 for (i = 0; i < ve->count; i++) { 2807 r600_vertex_data_type(ve->elements[i].src_format, 2808 &format, &num_format, &format_comp, &endian); 2809 2810 desc = util_format_description(ve->elements[i].src_format); 2811 if (desc == NULL) { 2812 r600_bytecode_clear(&bc); 2813 R600_ERR("unknown format %d\n", ve->elements[i].src_format); 2814 return -EINVAL; 2815 } 2816 2817 if (elements[i].src_offset > 65535) { 2818 r600_bytecode_clear(&bc); 2819 R600_ERR("too big src_offset: %u\n", elements[i].src_offset); 2820 return -EINVAL; 2821 } 2822 2823 memset(&vtx, 0, sizeof(vtx)); 2824 vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start; 2825 vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; 2826 vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; 2827 vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; 2828 vtx.mega_fetch_count = 0x1F; 2829 vtx.dst_gpr = i + 1; 2830 vtx.dst_sel_x = desc->swizzle[0]; 2831 vtx.dst_sel_y = desc->swizzle[1]; 2832 vtx.dst_sel_z = desc->swizzle[2]; 2833 vtx.dst_sel_w = desc->swizzle[3]; 2834 vtx.data_format = format; 2835 vtx.num_format_all = num_format; 2836 vtx.format_comp_all = format_comp; 2837 vtx.srf_mode_all = 1; 2838 vtx.offset = elements[i].src_offset; 2839 vtx.endian = endian; 2840 2841 if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { 2842 r600_bytecode_clear(&bc); 2843 return r; 2844 } 2845 } 2846 2847 r600_bytecode_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); 2848 2849 if ((r = r600_bytecode_build(&bc))) { 2850 r600_bytecode_clear(&bc); 2851 return r; 2852 } 2853 2854 if (dump_shaders == -1) 2855 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 2856 2857 if (dump_shaders) { 2858 fprintf(stderr, "--------------------------------------------------------------\n"); 2859 r600_bytecode_dump(&bc); 2860 fprintf(stderr, "______________________________________________________________\n"); 2861 } 2862 2863 ve->fs_size = bc.ndw*4; 2864 2865 ve->fetch_shader = (struct r600_resource*) 2866 pipe_buffer_create(rctx->context.screen, 2867 PIPE_BIND_CUSTOM, 2868 PIPE_USAGE_IMMUTABLE, ve->fs_size); 2869 if (ve->fetch_shader == NULL) { 2870 r600_bytecode_clear(&bc); 2871 return -ENOMEM; 2872 } 2873 2874 bytecode = rctx->ws->buffer_map(ve->fetch_shader->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 2875 if (bytecode == NULL) { 2876 r600_bytecode_clear(&bc); 2877 pipe_resource_reference((struct pipe_resource**)&ve->fetch_shader, NULL); 2878 return -ENOMEM; 2879 } 2880 2881 if (R600_BIG_ENDIAN) { 2882 for (i = 0; i < ve->fs_size / 4; ++i) { 2883 bytecode[i] = bswap_32(bc.bytecode[i]); 2884 } 2885 } else { 2886 memcpy(bytecode, bc.bytecode, ve->fs_size); 2887 } 2888 2889 rctx->ws->buffer_unmap(ve->fetch_shader->cs_buf); 2890 r600_bytecode_clear(&bc); 2891 2892 if (rctx->chip_class >= EVERGREEN) 2893 evergreen_fetch_shader(&rctx->context, ve); 2894 else 2895 r600_fetch_shader(&rctx->context, ve); 2896 2897 return 0; 2898 } 2899