1 /* 2 * Copyright (c) 2012 Rob Clark <robdclark (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #include "ir-a2xx.h" 25 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 #include <assert.h> 30 31 #include "freedreno_util.h" 32 #include "instr-a2xx.h" 33 34 #define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0) 35 #define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) 36 #define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) 37 38 #define REG_MASK 0x3f 39 40 static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr); 41 42 static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, 43 uint32_t idx, struct ir2_shader_info *info); 44 45 static void reg_update_stats(struct ir2_register *reg, 46 struct ir2_shader_info *info, bool dest); 47 static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n); 48 static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg); 49 static uint32_t reg_alu_dst_swiz(struct ir2_register *reg); 50 static uint32_t reg_alu_src_swiz(struct ir2_register *reg); 51 52 /* simple allocator to carve allocations out of an up-front allocated heap, 53 * so that we can free everything easily in one shot. 54 */ 55 static void * ir2_alloc(struct ir2_shader *shader, int sz) 56 { 57 void *ptr = &shader->heap[shader->heap_idx]; 58 shader->heap_idx += align(sz, 4); 59 return ptr; 60 } 61 62 static char * ir2_strdup(struct ir2_shader *shader, const char *str) 63 { 64 char *ptr = NULL; 65 if (str) { 66 int len = strlen(str); 67 ptr = ir2_alloc(shader, len+1); 68 memcpy(ptr, str, len); 69 ptr[len] = '\0'; 70 } 71 return ptr; 72 } 73 74 struct ir2_shader * ir2_shader_create(void) 75 { 76 DEBUG_MSG(""); 77 return calloc(1, sizeof(struct ir2_shader)); 78 } 79 80 void ir2_shader_destroy(struct ir2_shader *shader) 81 { 82 DEBUG_MSG(""); 83 free(shader); 84 } 85 86 /* resolve addr/cnt/sequence fields in the individual CF's */ 87 static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info) 88 { 89 uint32_t addr; 90 unsigned i; 91 int j; 92 93 addr = shader->cfs_count / 2; 94 for (i = 0; i < shader->cfs_count; i++) { 95 struct ir2_cf *cf = shader->cfs[i]; 96 if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { 97 uint32_t sequence = 0; 98 99 if (cf->exec.addr && (cf->exec.addr != addr)) 100 WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i); 101 if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count)) 102 WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i); 103 104 for (j = cf->exec.instrs_count - 1; j >= 0; j--) { 105 struct ir2_instruction *instr = cf->exec.instrs[j]; 106 sequence <<= 2; 107 if (instr->instr_type == IR2_FETCH) 108 sequence |= 0x1; 109 if (instr->sync) 110 sequence |= 0x2; 111 } 112 113 cf->exec.addr = addr; 114 cf->exec.cnt = cf->exec.instrs_count; 115 cf->exec.sequence = sequence; 116 117 addr += cf->exec.instrs_count; 118 } 119 } 120 121 info->sizedwords = 3 * addr; 122 123 return 0; 124 } 125 126 void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info) 127 { 128 uint32_t i, j; 129 uint32_t *ptr, *dwords = NULL; 130 uint32_t idx = 0; 131 int ret; 132 133 info->sizedwords = 0; 134 info->max_reg = -1; 135 info->max_input_reg = 0; 136 info->regs_written = 0; 137 138 /* we need an even # of CF's.. insert a NOP if needed */ 139 if (shader->cfs_count != align(shader->cfs_count, 2)) 140 ir2_cf_create(shader, NOP); 141 142 /* first pass, resolve sizes and addresses: */ 143 ret = shader_resolve(shader, info); 144 if (ret) { 145 ERROR_MSG("resolve failed: %d", ret); 146 goto fail; 147 } 148 149 ptr = dwords = calloc(4, info->sizedwords); 150 151 /* second pass, emit CF program in pairs: */ 152 for (i = 0; i < shader->cfs_count; i += 2) { 153 instr_cf_t *cfs = (instr_cf_t *)ptr; 154 ret = cf_emit(shader->cfs[i], &cfs[0]); 155 if (ret) { 156 ERROR_MSG("CF emit failed: %d\n", ret); 157 goto fail; 158 } 159 ret = cf_emit(shader->cfs[i+1], &cfs[1]); 160 if (ret) { 161 ERROR_MSG("CF emit failed: %d\n", ret); 162 goto fail; 163 } 164 ptr += 3; 165 assert((ptr - dwords) <= info->sizedwords); 166 } 167 168 /* third pass, emit ALU/FETCH: */ 169 for (i = 0; i < shader->cfs_count; i++) { 170 struct ir2_cf *cf = shader->cfs[i]; 171 if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { 172 for (j = 0; j < cf->exec.instrs_count; j++) { 173 ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info); 174 if (ret) { 175 ERROR_MSG("instruction emit failed: %d", ret); 176 goto fail; 177 } 178 ptr += 3; 179 assert((ptr - dwords) <= info->sizedwords); 180 } 181 } 182 } 183 184 return dwords; 185 186 fail: 187 free(dwords); 188 return NULL; 189 } 190 191 192 struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type) 193 { 194 struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf)); 195 DEBUG_MSG("%d", cf_type); 196 cf->shader = shader; 197 cf->cf_type = cf_type; 198 assert(shader->cfs_count < ARRAY_SIZE(shader->cfs)); 199 shader->cfs[shader->cfs_count++] = cf; 200 return cf; 201 } 202 203 204 /* 205 * CF instructions: 206 */ 207 208 static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr) 209 { 210 memset(instr, 0, sizeof(*instr)); 211 212 instr->opc = cf->cf_type; 213 214 switch (cf->cf_type) { 215 case NOP: 216 break; 217 case EXEC: 218 case EXEC_END: 219 assert(cf->exec.addr <= 0x1ff); 220 assert(cf->exec.cnt <= 0x6); 221 assert(cf->exec.sequence <= 0xfff); 222 instr->exec.address = cf->exec.addr; 223 instr->exec.count = cf->exec.cnt; 224 instr->exec.serialize = cf->exec.sequence; 225 break; 226 case ALLOC: 227 assert(cf->alloc.size <= 0xf); 228 instr->alloc.size = cf->alloc.size; 229 switch (cf->alloc.type) { 230 case SQ_POSITION: 231 case SQ_PARAMETER_PIXEL: 232 instr->alloc.buffer_select = cf->alloc.type; 233 break; 234 default: 235 ERROR_MSG("invalid alloc type: %d", cf->alloc.type); 236 return -1; 237 } 238 break; 239 case COND_EXEC: 240 case COND_EXEC_END: 241 case COND_PRED_EXEC: 242 case COND_PRED_EXEC_END: 243 case LOOP_START: 244 case LOOP_END: 245 case COND_CALL: 246 case RETURN: 247 case COND_JMP: 248 case COND_EXEC_PRED_CLEAN: 249 case COND_EXEC_PRED_CLEAN_END: 250 case MARK_VS_FETCH_DONE: 251 ERROR_MSG("TODO"); 252 return -1; 253 } 254 255 return 0; 256 } 257 258 259 struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type) 260 { 261 struct ir2_instruction *instr = 262 ir2_alloc(cf->shader, sizeof(struct ir2_instruction)); 263 DEBUG_MSG("%d", instr_type); 264 instr->shader = cf->shader; 265 instr->pred = cf->shader->pred; 266 instr->instr_type = instr_type; 267 assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs)); 268 cf->exec.instrs[cf->exec.instrs_count++] = instr; 269 return instr; 270 } 271 272 273 /* 274 * FETCH instructions: 275 */ 276 277 static int instr_emit_fetch(struct ir2_instruction *instr, 278 uint32_t *dwords, uint32_t idx, 279 struct ir2_shader_info *info) 280 { 281 instr_fetch_t *fetch = (instr_fetch_t *)dwords; 282 int reg = 0; 283 struct ir2_register *dst_reg = instr->regs[reg++]; 284 struct ir2_register *src_reg = instr->regs[reg++]; 285 286 memset(fetch, 0, sizeof(*fetch)); 287 288 reg_update_stats(dst_reg, info, true); 289 reg_update_stats(src_reg, info, false); 290 291 fetch->opc = instr->fetch.opc; 292 293 if (instr->fetch.opc == VTX_FETCH) { 294 instr_fetch_vtx_t *vtx = &fetch->vtx; 295 296 assert(instr->fetch.stride <= 0xff); 297 assert(instr->fetch.fmt <= 0x3f); 298 assert(instr->fetch.const_idx <= 0x1f); 299 assert(instr->fetch.const_idx_sel <= 0x3); 300 301 vtx->src_reg = src_reg->num; 302 vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); 303 vtx->dst_reg = dst_reg->num; 304 vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); 305 vtx->must_be_one = 1; 306 vtx->const_index = instr->fetch.const_idx; 307 vtx->const_index_sel = instr->fetch.const_idx_sel; 308 vtx->format_comp_all = !!instr->fetch.is_signed; 309 vtx->num_format_all = !instr->fetch.is_normalized; 310 vtx->format = instr->fetch.fmt; 311 vtx->stride = instr->fetch.stride; 312 vtx->offset = instr->fetch.offset; 313 314 if (instr->pred != IR2_PRED_NONE) { 315 vtx->pred_select = 1; 316 vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; 317 } 318 319 /* XXX seems like every FETCH but the first has 320 * this bit set: 321 */ 322 vtx->reserved3 = (idx > 0) ? 0x1 : 0x0; 323 vtx->reserved0 = (idx > 0) ? 0x2 : 0x3; 324 } else if (instr->fetch.opc == TEX_FETCH) { 325 instr_fetch_tex_t *tex = &fetch->tex; 326 327 assert(instr->fetch.const_idx <= 0x1f); 328 329 tex->src_reg = src_reg->num; 330 tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); 331 tex->dst_reg = dst_reg->num; 332 tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); 333 tex->const_idx = instr->fetch.const_idx; 334 tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; 335 tex->min_filter = TEX_FILTER_USE_FETCH_CONST; 336 tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; 337 tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; 338 tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; 339 tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; 340 tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; 341 tex->use_comp_lod = 1; 342 tex->use_reg_lod = !instr->fetch.is_cube; 343 tex->sample_location = SAMPLE_CENTER; 344 345 if (instr->pred != IR2_PRED_NONE) { 346 tex->pred_select = 1; 347 tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; 348 } 349 350 } else { 351 ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc); 352 return -1; 353 } 354 355 return 0; 356 } 357 358 /* 359 * ALU instructions: 360 */ 361 362 static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords, 363 struct ir2_shader_info *info) 364 { 365 int reg = 0; 366 instr_alu_t *alu = (instr_alu_t *)dwords; 367 struct ir2_register *dst_reg = instr->regs[reg++]; 368 struct ir2_register *src1_reg; 369 struct ir2_register *src2_reg; 370 struct ir2_register *src3_reg; 371 372 memset(alu, 0, sizeof(*alu)); 373 374 /* handle instructions w/ 3 src operands: */ 375 switch (instr->alu.vector_opc) { 376 case MULADDv: 377 case CNDEv: 378 case CNDGTEv: 379 case CNDGTv: 380 case DOT2ADDv: 381 /* note: disassembler lists 3rd src first, ie: 382 * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2) 383 * which is the reason for this strange ordering. 384 */ 385 src3_reg = instr->regs[reg++]; 386 break; 387 default: 388 src3_reg = NULL; 389 break; 390 } 391 392 src1_reg = instr->regs[reg++]; 393 src2_reg = instr->regs[reg++]; 394 395 reg_update_stats(dst_reg, info, true); 396 reg_update_stats(src1_reg, info, false); 397 reg_update_stats(src2_reg, info, false); 398 399 assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0); 400 assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4)); 401 assert((src1_reg->flags & IR2_REG_EXPORT) == 0); 402 assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4)); 403 assert((src2_reg->flags & IR2_REG_EXPORT) == 0); 404 assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4)); 405 406 if (instr->alu.vector_opc == (instr_vector_opc_t)~0) { 407 alu->vector_opc = MAXv; 408 alu->vector_write_mask = 0; 409 } else { 410 alu->vector_opc = instr->alu.vector_opc; 411 alu->vector_write_mask = reg_alu_dst_swiz(dst_reg); 412 } 413 414 alu->vector_dest = dst_reg->num; 415 alu->export_data = !!(dst_reg->flags & IR2_REG_EXPORT); 416 417 // TODO predicate case/condition.. need to add to parser 418 419 alu->src2_reg = src2_reg->num; 420 alu->src2_swiz = reg_alu_src_swiz(src2_reg); 421 alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE); 422 alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS); 423 alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST); 424 425 alu->src1_reg = src1_reg->num; 426 alu->src1_swiz = reg_alu_src_swiz(src1_reg); 427 alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE); 428 alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS); 429 alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST); 430 431 alu->vector_clamp = instr->alu.vector_clamp; 432 alu->scalar_clamp = instr->alu.scalar_clamp; 433 434 if (instr->alu.scalar_opc != (instr_scalar_opc_t)~0) { 435 struct ir2_register *sdst_reg = instr->regs[reg++]; 436 437 reg_update_stats(sdst_reg, info, true); 438 439 assert(sdst_reg->flags == dst_reg->flags); 440 441 if (src3_reg) { 442 assert(src3_reg == instr->regs[reg]); 443 reg++; 444 } else { 445 src3_reg = instr->regs[reg++]; 446 } 447 448 alu->scalar_dest = sdst_reg->num; 449 alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); 450 alu->scalar_opc = instr->alu.scalar_opc; 451 } else { 452 /* not sure if this is required, but adreno compiler seems 453 * to always set scalar opc to MAXs if it is not used: 454 */ 455 alu->scalar_opc = MAXs; 456 } 457 458 if (src3_reg) { 459 reg_update_stats(src3_reg, info, false); 460 461 alu->src3_reg = src3_reg->num; 462 alu->src3_swiz = reg_alu_src_swiz(src3_reg); 463 alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE); 464 alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS); 465 alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST); 466 } else { 467 /* not sure if this is required, but adreno compiler seems 468 * to always set register bank for 3rd src if unused: 469 */ 470 alu->src3_sel = 1; 471 } 472 473 if (instr->pred != IR2_PRED_NONE) { 474 alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2; 475 } 476 477 return 0; 478 } 479 480 static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, 481 uint32_t idx, struct ir2_shader_info *info) 482 { 483 switch (instr->instr_type) { 484 case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info); 485 case IR2_ALU: return instr_emit_alu(instr, dwords, info); 486 } 487 return -1; 488 } 489 490 491 struct ir2_register * ir2_reg_create(struct ir2_instruction *instr, 492 int num, const char *swizzle, int flags) 493 { 494 struct ir2_register *reg = 495 ir2_alloc(instr->shader, sizeof(struct ir2_register)); 496 DEBUG_MSG("%x, %d, %s", flags, num, swizzle); 497 assert(num <= REG_MASK); 498 reg->flags = flags; 499 reg->num = num; 500 reg->swizzle = ir2_strdup(instr->shader, swizzle); 501 assert(instr->regs_count < ARRAY_SIZE(instr->regs)); 502 instr->regs[instr->regs_count++] = reg; 503 return reg; 504 } 505 506 static void reg_update_stats(struct ir2_register *reg, 507 struct ir2_shader_info *info, bool dest) 508 { 509 if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) { 510 info->max_reg = MAX2(info->max_reg, reg->num); 511 512 if (dest) { 513 info->regs_written |= (1 << reg->num); 514 } else if (!(info->regs_written & (1 << reg->num))) { 515 /* for registers that haven't been written, they must be an 516 * input register that the thread scheduler (presumably?) 517 * needs to know about: 518 */ 519 info->max_input_reg = MAX2(info->max_input_reg, reg->num); 520 } 521 } 522 } 523 524 static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n) 525 { 526 uint32_t swiz = 0; 527 int i; 528 529 assert(reg->flags == 0); 530 assert(reg->swizzle); 531 532 DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); 533 534 for (i = n-1; i >= 0; i--) { 535 swiz <<= 2; 536 switch (reg->swizzle[i]) { 537 default: 538 ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle); 539 case 'x': swiz |= 0x0; break; 540 case 'y': swiz |= 0x1; break; 541 case 'z': swiz |= 0x2; break; 542 case 'w': swiz |= 0x3; break; 543 } 544 } 545 546 return swiz; 547 } 548 549 static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg) 550 { 551 uint32_t swiz = 0; 552 int i; 553 554 assert(reg->flags == 0); 555 assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); 556 557 DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle); 558 559 if (reg->swizzle) { 560 for (i = 3; i >= 0; i--) { 561 swiz <<= 3; 562 switch (reg->swizzle[i]) { 563 default: 564 ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); 565 case 'x': swiz |= 0x0; break; 566 case 'y': swiz |= 0x1; break; 567 case 'z': swiz |= 0x2; break; 568 case 'w': swiz |= 0x3; break; 569 case '0': swiz |= 0x4; break; 570 case '1': swiz |= 0x5; break; 571 case '_': swiz |= 0x7; break; 572 } 573 } 574 } else { 575 swiz = 0x688; 576 } 577 578 return swiz; 579 } 580 581 /* actually, a write-mask */ 582 static uint32_t reg_alu_dst_swiz(struct ir2_register *reg) 583 { 584 uint32_t swiz = 0; 585 int i; 586 587 assert((reg->flags & ~IR2_REG_EXPORT) == 0); 588 assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); 589 590 DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle); 591 592 if (reg->swizzle) { 593 for (i = 3; i >= 0; i--) { 594 swiz <<= 1; 595 if (reg->swizzle[i] == "xyzw"[i]) { 596 swiz |= 0x1; 597 } else if (reg->swizzle[i] != '_') { 598 ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); 599 break; 600 } 601 } 602 } else { 603 swiz = 0xf; 604 } 605 606 return swiz; 607 } 608 609 static uint32_t reg_alu_src_swiz(struct ir2_register *reg) 610 { 611 uint32_t swiz = 0; 612 int i; 613 614 assert((reg->flags & IR2_REG_EXPORT) == 0); 615 assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); 616 617 DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); 618 619 if (reg->swizzle) { 620 for (i = 3; i >= 0; i--) { 621 swiz <<= 2; 622 switch (reg->swizzle[i]) { 623 default: 624 ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle); 625 case 'x': swiz |= (0x0 - i) & 0x3; break; 626 case 'y': swiz |= (0x1 - i) & 0x3; break; 627 case 'z': swiz |= (0x2 - i) & 0x3; break; 628 case 'w': swiz |= (0x3 - i) & 0x3; break; 629 } 630 } 631 } else { 632 swiz = 0x0; 633 } 634 635 return swiz; 636 } 637