1 /* 2 * Copyright (c) 2012-2015 Etnaviv Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Wladimir J. van der Laan <laanwj (at) gmail.com> 25 */ 26 27 /* TGSI->Vivante shader ISA conversion */ 28 29 /* What does the compiler return (see etna_shader_object)? 30 * 1) instruction data 31 * 2) input-to-temporary mapping (fixed for ps) 32 * *) in case of ps, semantic -> varying id mapping 33 * *) for each varying: number of components used (r, rg, rgb, rgba) 34 * 3) temporary-to-output mapping (in case of vs, fixed for ps) 35 * 4) for each input/output: possible semantic (position, color, glpointcoord, ...) 36 * 5) immediates base offset, immediates data 37 * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to 38 * configure the hw, but useful for error checking 39 * 7) enough information to add the z=(z+w)/2.0 necessary for older chips 40 * (output reg id is enough) 41 * 42 * Empty shaders are not allowed, should always at least generate a NOP. Also 43 * if there is a label at the end of the shader, an extra NOP should be 44 * generated as jump target. 45 * 46 * TODO 47 * * Use an instruction scheduler 48 * * Indirect access to uniforms / temporaries using amode 49 */ 50 51 #include "etnaviv_compiler.h" 52 53 #include "etnaviv_asm.h" 54 #include "etnaviv_context.h" 55 #include "etnaviv_debug.h" 56 #include "etnaviv_disasm.h" 57 #include "etnaviv_uniforms.h" 58 #include "etnaviv_util.h" 59 60 #include "pipe/p_shader_tokens.h" 61 #include "tgsi/tgsi_info.h" 62 #include "tgsi/tgsi_iterate.h" 63 #include "tgsi/tgsi_lowering.h" 64 #include "tgsi/tgsi_strings.h" 65 #include "tgsi/tgsi_util.h" 66 #include "util/u_math.h" 67 #include "util/u_memory.h" 68 69 #include <fcntl.h> 70 #include <stdio.h> 71 #include <sys/stat.h> 72 #include <sys/types.h> 73 74 #define ETNA_MAX_INNER_TEMPS 2 75 76 static const float sincos_const[2][4] = { 77 { 78 2., -1., 4., -4., 79 }, 80 { 81 1. / (2. * M_PI), 0.75, 0.5, 0.0, 82 }, 83 }; 84 85 /* Native register description structure */ 86 struct etna_native_reg { 87 unsigned valid : 1; 88 unsigned is_tex : 1; /* is texture unit, overrides rgroup */ 89 unsigned rgroup : 3; 90 unsigned id : 9; 91 }; 92 93 /* Register description */ 94 struct etna_reg_desc { 95 enum tgsi_file_type file; /* IN, OUT, TEMP, ... */ 96 int idx; /* index into file */ 97 bool active; /* used in program */ 98 int first_use; /* instruction id of first use (scope begin) */ 99 int last_use; /* instruction id of last use (scope end, inclusive) */ 100 101 struct etna_native_reg native; /* native register to map to */ 102 unsigned usage_mask : 4; /* usage, per channel */ 103 bool has_semantic; /* register has associated TGSI semantic */ 104 struct tgsi_declaration_semantic semantic; /* TGSI semantic */ 105 struct tgsi_declaration_interp interp; /* Interpolation type */ 106 }; 107 108 /* Label information structure */ 109 struct etna_compile_label { 110 int inst_idx; /* Instruction id that label points to */ 111 }; 112 113 enum etna_compile_frame_type { 114 ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */ 115 ETNA_COMPILE_FRAME_LOOP, 116 }; 117 118 /* nesting scope frame (LOOP, IF, ...) during compilation 119 */ 120 struct etna_compile_frame { 121 enum etna_compile_frame_type type; 122 struct etna_compile_label *lbl_else; 123 struct etna_compile_label *lbl_endif; 124 struct etna_compile_label *lbl_loop_bgn; 125 struct etna_compile_label *lbl_loop_end; 126 }; 127 128 struct etna_compile_file { 129 /* Number of registers in each TGSI file (max register+1) */ 130 size_t reg_size; 131 /* Register descriptions, per register index */ 132 struct etna_reg_desc *reg; 133 }; 134 135 #define array_insert(arr, val) \ 136 do { \ 137 if (arr##_count == arr##_sz) { \ 138 arr##_sz = MAX2(2 * arr##_sz, 16); \ 139 arr = realloc(arr, arr##_sz * sizeof(arr[0])); \ 140 } \ 141 arr[arr##_count++] = val; \ 142 } while (0) 143 144 145 /* scratch area for compiling shader, freed after compilation finishes */ 146 struct etna_compile { 147 const struct tgsi_token *tokens; 148 bool free_tokens; 149 150 struct tgsi_shader_info info; 151 152 /* Register descriptions, per TGSI file, per register index */ 153 struct etna_compile_file file[TGSI_FILE_COUNT]; 154 155 /* Keep track of TGSI register declarations */ 156 struct etna_reg_desc decl[ETNA_MAX_DECL]; 157 uint total_decls; 158 159 /* Bitmap of dead instructions which are removed in a separate pass */ 160 bool dead_inst[ETNA_MAX_TOKENS]; 161 162 /* Immediate data */ 163 enum etna_immediate_contents imm_contents[ETNA_MAX_IMM]; 164 uint32_t imm_data[ETNA_MAX_IMM]; 165 uint32_t imm_base; /* base of immediates (in 32 bit units) */ 166 uint32_t imm_size; /* size of immediates (in 32 bit units) */ 167 168 /* Next free native register, for register allocation */ 169 uint32_t next_free_native; 170 171 /* Temporary register for use within translated TGSI instruction, 172 * only allocated when needed. 173 */ 174 int inner_temps; /* number of inner temps used; only up to one available at 175 this point */ 176 struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS]; 177 178 /* Fields for handling nested conditionals */ 179 struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH]; 180 int frame_sp; 181 struct etna_compile_label *lbl_usage[ETNA_MAX_INSTRUCTIONS]; 182 183 unsigned labels_count, labels_sz; 184 struct etna_compile_label *labels; 185 186 /* Code generation */ 187 int inst_ptr; /* current instruction pointer */ 188 uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE]; 189 190 /* I/O */ 191 192 /* Number of varyings (PS only) */ 193 int num_varyings; 194 195 /* GPU hardware specs */ 196 const struct etna_specs *specs; 197 }; 198 199 static struct etna_reg_desc * 200 etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst) 201 { 202 return &c->file[dst.File].reg[dst.Index]; 203 } 204 205 static struct etna_reg_desc * 206 etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src) 207 { 208 return &c->file[src.File].reg[src.Index]; 209 } 210 211 static struct etna_native_reg 212 etna_native_temp(unsigned reg) 213 { 214 return (struct etna_native_reg) { 215 .valid = 1, 216 .rgroup = INST_RGROUP_TEMP, 217 .id = reg 218 }; 219 } 220 221 /** Register allocation **/ 222 enum reg_sort_order { 223 FIRST_USE_ASC, 224 FIRST_USE_DESC, 225 LAST_USE_ASC, 226 LAST_USE_DESC 227 }; 228 229 /* Augmented register description for sorting */ 230 struct sort_rec { 231 struct etna_reg_desc *ptr; 232 int key; 233 }; 234 235 static int 236 sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b) 237 { 238 if (a->key < b->key) 239 return -1; 240 241 if (a->key > b->key) 242 return 1; 243 244 return 0; 245 } 246 247 /* create an index on a register set based on certain criteria. */ 248 static int 249 sort_registers(struct sort_rec *sorted, struct etna_compile_file *file, 250 enum reg_sort_order so) 251 { 252 struct etna_reg_desc *regs = file->reg; 253 int ptr = 0; 254 255 /* pre-populate keys from active registers */ 256 for (int idx = 0; idx < file->reg_size; ++idx) { 257 /* only interested in active registers now; will only assign inactive ones 258 * if no space in active ones */ 259 if (regs[idx].active) { 260 sorted[ptr].ptr = ®s[idx]; 261 262 switch (so) { 263 case FIRST_USE_ASC: 264 sorted[ptr].key = regs[idx].first_use; 265 break; 266 case LAST_USE_ASC: 267 sorted[ptr].key = regs[idx].last_use; 268 break; 269 case FIRST_USE_DESC: 270 sorted[ptr].key = -regs[idx].first_use; 271 break; 272 case LAST_USE_DESC: 273 sorted[ptr].key = -regs[idx].last_use; 274 break; 275 } 276 ptr++; 277 } 278 } 279 280 /* sort index by key */ 281 qsort(sorted, ptr, sizeof(struct sort_rec), 282 (int (*)(const void *, const void *))sort_rec_compar); 283 284 return ptr; 285 } 286 287 /* Allocate a new, unused, native temp register */ 288 static struct etna_native_reg 289 alloc_new_native_reg(struct etna_compile *c) 290 { 291 assert(c->next_free_native < ETNA_MAX_TEMPS); 292 return etna_native_temp(c->next_free_native++); 293 } 294 295 /* assign TEMPs to native registers */ 296 static void 297 assign_temporaries_to_native(struct etna_compile *c, 298 struct etna_compile_file *file) 299 { 300 struct etna_reg_desc *temps = file->reg; 301 302 for (int idx = 0; idx < file->reg_size; ++idx) 303 temps[idx].native = alloc_new_native_reg(c); 304 } 305 306 /* assign inputs and outputs to temporaries 307 * Gallium assumes that the hardware has separate registers for taking input and 308 * output, however Vivante GPUs use temporaries both for passing in inputs and 309 * passing back outputs. 310 * Try to re-use temporary registers where possible. */ 311 static void 312 assign_inouts_to_temporaries(struct etna_compile *c, uint file) 313 { 314 bool mode_inputs = (file == TGSI_FILE_INPUT); 315 int inout_ptr = 0, num_inouts; 316 int temp_ptr = 0, num_temps; 317 struct sort_rec inout_order[ETNA_MAX_TEMPS]; 318 struct sort_rec temps_order[ETNA_MAX_TEMPS]; 319 num_inouts = sort_registers(inout_order, &c->file[file], 320 mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC); 321 num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY], 322 mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC); 323 324 while (inout_ptr < num_inouts && temp_ptr < num_temps) { 325 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 326 struct etna_reg_desc *temp = temps_order[temp_ptr].ptr; 327 328 if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */ 329 inout_ptr++; 330 continue; 331 } 332 333 /* last usage of this input is before or in same instruction of first use 334 * of temporary? */ 335 if (mode_inputs ? (inout->last_use <= temp->first_use) 336 : (inout->first_use >= temp->last_use)) { 337 /* assign it and advance to next input */ 338 inout->native = temp->native; 339 inout_ptr++; 340 } 341 342 temp_ptr++; 343 } 344 345 /* if we couldn't reuse current ones, allocate new temporaries */ 346 for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) { 347 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 348 349 if (inout->active && !inout->native.valid) 350 inout->native = alloc_new_native_reg(c); 351 } 352 } 353 354 /* Allocate an immediate with a certain value and return the index. If 355 * there is already an immediate with that value, return that. 356 */ 357 static struct etna_inst_src 358 alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents, 359 uint32_t value) 360 { 361 int idx; 362 363 /* Could use a hash table to speed this up */ 364 for (idx = 0; idx < c->imm_size; ++idx) { 365 if (c->imm_contents[idx] == contents && c->imm_data[idx] == value) 366 break; 367 } 368 369 /* look if there is an unused slot */ 370 if (idx == c->imm_size) { 371 for (idx = 0; idx < c->imm_size; ++idx) { 372 if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED) 373 break; 374 } 375 } 376 377 /* allocate new immediate */ 378 if (idx == c->imm_size) { 379 assert(c->imm_size < ETNA_MAX_IMM); 380 idx = c->imm_size++; 381 c->imm_data[idx] = value; 382 c->imm_contents[idx] = contents; 383 } 384 385 /* swizzle so that component with value is returned in all components */ 386 idx += c->imm_base; 387 struct etna_inst_src imm_src = { 388 .use = 1, 389 .rgroup = INST_RGROUP_UNIFORM_0, 390 .reg = idx / 4, 391 .swiz = INST_SWIZ_BROADCAST(idx & 3) 392 }; 393 394 return imm_src; 395 } 396 397 static struct etna_inst_src 398 alloc_imm_u32(struct etna_compile *c, uint32_t value) 399 { 400 return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value); 401 } 402 403 static struct etna_inst_src 404 alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents, 405 const uint32_t *values) 406 { 407 struct etna_inst_src imm_src = { }; 408 int idx, i; 409 410 for (idx = 0; idx + 3 < c->imm_size; idx += 4) { 411 /* What if we can use a uniform with a different swizzle? */ 412 for (i = 0; i < 4; i++) 413 if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i]) 414 break; 415 if (i == 4) 416 break; 417 } 418 419 if (idx + 3 >= c->imm_size) { 420 idx = align(c->imm_size, 4); 421 assert(idx + 4 <= ETNA_MAX_IMM); 422 423 for (i = 0; i < 4; i++) { 424 c->imm_data[idx + i] = values[i]; 425 c->imm_contents[idx + i] = contents; 426 } 427 428 c->imm_size = idx + 4; 429 } 430 431 assert((c->imm_base & 3) == 0); 432 idx += c->imm_base; 433 imm_src.use = 1; 434 imm_src.rgroup = INST_RGROUP_UNIFORM_0; 435 imm_src.reg = idx / 4; 436 imm_src.swiz = INST_SWIZ_IDENTITY; 437 438 return imm_src; 439 } 440 441 static uint32_t 442 get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm, 443 unsigned swiz_idx) 444 { 445 assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0); 446 unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3); 447 448 return c->imm_data[idx]; 449 } 450 451 /* Allocate immediate with a certain float value. If there is already an 452 * immediate with that value, return that. 453 */ 454 static struct etna_inst_src 455 alloc_imm_f32(struct etna_compile *c, float value) 456 { 457 return alloc_imm_u32(c, fui(value)); 458 } 459 460 static struct etna_inst_src 461 etna_imm_vec4f(struct etna_compile *c, const float *vec4) 462 { 463 uint32_t val[4]; 464 465 for (int i = 0; i < 4; i++) 466 val[i] = fui(vec4[i]); 467 468 return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val); 469 } 470 471 /* Pass -- check register file declarations and immediates */ 472 static void 473 etna_compile_parse_declarations(struct etna_compile *c) 474 { 475 struct tgsi_parse_context ctx = { }; 476 unsigned status = TGSI_PARSE_OK; 477 status = tgsi_parse_init(&ctx, c->tokens); 478 assert(status == TGSI_PARSE_OK); 479 480 while (!tgsi_parse_end_of_tokens(&ctx)) { 481 tgsi_parse_token(&ctx); 482 483 switch (ctx.FullToken.Token.Type) { 484 case TGSI_TOKEN_TYPE_IMMEDIATE: { 485 /* immediates are handled differently from other files; they are 486 * not declared explicitly, and always add four components */ 487 const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate; 488 assert(c->imm_size <= (ETNA_MAX_IMM - 4)); 489 490 for (int i = 0; i < 4; ++i) { 491 unsigned idx = c->imm_size++; 492 493 c->imm_data[idx] = imm->u[i].Uint; 494 c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT; 495 } 496 } 497 break; 498 } 499 } 500 501 tgsi_parse_free(&ctx); 502 } 503 504 /* Allocate register declarations for the registers in all register files */ 505 static void 506 etna_allocate_decls(struct etna_compile *c) 507 { 508 uint idx = 0; 509 510 for (int x = 0; x < TGSI_FILE_COUNT; ++x) { 511 c->file[x].reg = &c->decl[idx]; 512 c->file[x].reg_size = c->info.file_max[x] + 1; 513 514 for (int sub = 0; sub < c->file[x].reg_size; ++sub) { 515 c->decl[idx].file = x; 516 c->decl[idx].idx = sub; 517 idx++; 518 } 519 } 520 521 c->total_decls = idx; 522 } 523 524 /* Pass -- check and record usage of temporaries, inputs, outputs */ 525 static void 526 etna_compile_pass_check_usage(struct etna_compile *c) 527 { 528 struct tgsi_parse_context ctx = { }; 529 unsigned status = TGSI_PARSE_OK; 530 status = tgsi_parse_init(&ctx, c->tokens); 531 assert(status == TGSI_PARSE_OK); 532 533 for (int idx = 0; idx < c->total_decls; ++idx) { 534 c->decl[idx].active = false; 535 c->decl[idx].first_use = c->decl[idx].last_use = -1; 536 } 537 538 int inst_idx = 0; 539 while (!tgsi_parse_end_of_tokens(&ctx)) { 540 tgsi_parse_token(&ctx); 541 /* find out max register #s used 542 * For every register mark first and last instruction index where it's 543 * used this allows finding ranges where the temporary can be borrowed 544 * as input and/or output register 545 * 546 * XXX in the case of loops this needs special care, or even be completely 547 * disabled, as 548 * the last usage of a register inside a loop means it can still be used 549 * on next loop 550 * iteration (execution is no longer * chronological). The register can 551 * only be 552 * declared "free" after the loop finishes. 553 * 554 * Same for inputs: the first usage of a register inside a loop doesn't 555 * mean that the register 556 * won't have been overwritten in previous iteration. The register can 557 * only be declared free before the loop 558 * starts. 559 * The proper way would be to do full dominator / post-dominator analysis 560 * (especially with more complicated 561 * control flow such as direct branch instructions) but not for now... 562 */ 563 switch (ctx.FullToken.Token.Type) { 564 case TGSI_TOKEN_TYPE_DECLARATION: { 565 /* Declaration: fill in file details */ 566 const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; 567 struct etna_compile_file *file = &c->file[decl->Declaration.File]; 568 569 for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) { 570 file->reg[idx].usage_mask = 0; // we'll compute this ourselves 571 file->reg[idx].has_semantic = decl->Declaration.Semantic; 572 file->reg[idx].semantic = decl->Semantic; 573 file->reg[idx].interp = decl->Interp; 574 } 575 } break; 576 case TGSI_TOKEN_TYPE_INSTRUCTION: { 577 /* Instruction: iterate over operands of instruction */ 578 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 579 580 /* iterate over destination registers */ 581 for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) { 582 struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index]; 583 584 if (reg_desc->first_use == -1) 585 reg_desc->first_use = inst_idx; 586 587 reg_desc->last_use = inst_idx; 588 reg_desc->active = true; 589 } 590 591 /* iterate over source registers */ 592 for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) { 593 struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index]; 594 595 if (reg_desc->first_use == -1) 596 reg_desc->first_use = inst_idx; 597 598 reg_desc->last_use = inst_idx; 599 reg_desc->active = true; 600 /* accumulate usage mask for register, this is used to determine how 601 * many slots for varyings 602 * should be allocated */ 603 reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx); 604 } 605 inst_idx += 1; 606 } break; 607 default: 608 break; 609 } 610 } 611 612 tgsi_parse_free(&ctx); 613 } 614 615 /* assign inputs that need to be assigned to specific registers */ 616 static void 617 assign_special_inputs(struct etna_compile *c) 618 { 619 if (c->info.processor == PIPE_SHADER_FRAGMENT) { 620 /* never assign t0 as it is the position output, start assigning at t1 */ 621 c->next_free_native = 1; 622 623 /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */ 624 for (int idx = 0; idx < c->total_decls; ++idx) { 625 struct etna_reg_desc *reg = &c->decl[idx]; 626 627 if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION) 628 reg->native = etna_native_temp(0); 629 } 630 } 631 } 632 633 /* Check that a move instruction does not swizzle any of the components 634 * that it writes. 635 */ 636 static bool 637 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst, 638 const struct tgsi_src_register src) 639 { 640 return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) && 641 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) && 642 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) && 643 (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W); 644 } 645 646 /* Pass -- optimize outputs 647 * Mesa tends to generate code like this at the end if their shaders 648 * MOV OUT[1], TEMP[2] 649 * MOV OUT[0], TEMP[0] 650 * MOV OUT[2], TEMP[1] 651 * Recognize if 652 * a) there is only a single assignment to an output register and 653 * b) the temporary is not used after that 654 * Also recognize direct assignment of IN to OUT (passthrough) 655 **/ 656 static void 657 etna_compile_pass_optimize_outputs(struct etna_compile *c) 658 { 659 struct tgsi_parse_context ctx = { }; 660 int inst_idx = 0; 661 unsigned status = TGSI_PARSE_OK; 662 status = tgsi_parse_init(&ctx, c->tokens); 663 assert(status == TGSI_PARSE_OK); 664 665 while (!tgsi_parse_end_of_tokens(&ctx)) { 666 tgsi_parse_token(&ctx); 667 668 switch (ctx.FullToken.Token.Type) { 669 case TGSI_TOKEN_TYPE_INSTRUCTION: { 670 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 671 672 /* iterate over operands */ 673 switch (inst->Instruction.Opcode) { 674 case TGSI_OPCODE_MOV: { 675 /* We are only interested in eliminating MOVs which write to 676 * the shader outputs. Test for this early. */ 677 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 678 break; 679 /* Elimination of a MOV must have no visible effect on the 680 * resulting shader: this means the MOV must not swizzle or 681 * saturate, and its source must not have the negate or 682 * absolute modifiers. */ 683 if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) || 684 inst->Instruction.Saturate || inst->Src[0].Register.Negate || 685 inst->Src[0].Register.Absolute) 686 break; 687 688 uint out_idx = inst->Dst[0].Register.Index; 689 uint in_idx = inst->Src[0].Register.Index; 690 /* assignment of temporary to output -- 691 * and the output doesn't yet have a native register assigned 692 * and the last use of the temporary is this instruction 693 * and the MOV does not do a swizzle 694 */ 695 if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY && 696 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 697 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) { 698 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 699 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native; 700 /* prevent temp from being re-used for the rest of the shader */ 701 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS; 702 /* mark this MOV instruction as a no-op */ 703 c->dead_inst[inst_idx] = true; 704 } 705 /* direct assignment of input to output -- 706 * and the input or output doesn't yet have a native register 707 * assigned 708 * and the output is only used in this instruction, 709 * allocate a new register, and associate both input and output to 710 * it 711 * and the MOV does not do a swizzle 712 */ 713 if (inst->Src[0].Register.File == TGSI_FILE_INPUT && 714 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid && 715 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 716 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx && 717 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) { 718 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 719 c->file[TGSI_FILE_INPUT].reg[in_idx].native = 720 alloc_new_native_reg(c); 721 /* mark this MOV instruction as a no-op */ 722 c->dead_inst[inst_idx] = true; 723 } 724 } break; 725 default:; 726 } 727 inst_idx += 1; 728 } break; 729 } 730 } 731 732 tgsi_parse_free(&ctx); 733 } 734 735 /* Get a temporary to be used within one TGSI instruction. 736 * The first time that this function is called the temporary will be allocated. 737 * Each call to this function will return the same temporary. 738 */ 739 static struct etna_native_reg 740 etna_compile_get_inner_temp(struct etna_compile *c) 741 { 742 int inner_temp = c->inner_temps; 743 744 if (inner_temp < ETNA_MAX_INNER_TEMPS) { 745 if (!c->inner_temp[inner_temp].valid) 746 c->inner_temp[inner_temp] = alloc_new_native_reg(c); 747 748 /* alloc_new_native_reg() handles lack of registers */ 749 c->inner_temps += 1; 750 } else { 751 BUG("Too many inner temporaries (%i) requested in one instruction", 752 inner_temp + 1); 753 } 754 755 return c->inner_temp[inner_temp]; 756 } 757 758 static struct etna_inst_dst 759 etna_native_to_dst(struct etna_native_reg native, unsigned comps) 760 { 761 /* Can only assign to temporaries */ 762 assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP); 763 764 struct etna_inst_dst rv = { 765 .comps = comps, 766 .use = 1, 767 .reg = native.id, 768 }; 769 770 return rv; 771 } 772 773 static struct etna_inst_src 774 etna_native_to_src(struct etna_native_reg native, uint32_t swizzle) 775 { 776 assert(native.valid && !native.is_tex); 777 778 struct etna_inst_src rv = { 779 .use = 1, 780 .swiz = swizzle, 781 .rgroup = native.rgroup, 782 .reg = native.id, 783 .amode = INST_AMODE_DIRECT, 784 }; 785 786 return rv; 787 } 788 789 static inline struct etna_inst_src 790 negate(struct etna_inst_src src) 791 { 792 src.neg = !src.neg; 793 794 return src; 795 } 796 797 static inline struct etna_inst_src 798 absolute(struct etna_inst_src src) 799 { 800 src.abs = 1; 801 802 return src; 803 } 804 805 static inline struct etna_inst_src 806 swizzle(struct etna_inst_src src, unsigned swizzle) 807 { 808 src.swiz = inst_swiz_compose(src.swiz, swizzle); 809 810 return src; 811 } 812 813 /* Emit instruction and append it to program */ 814 static void 815 emit_inst(struct etna_compile *c, struct etna_inst *inst) 816 { 817 assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS); 818 819 /* Check for uniform conflicts (each instruction can only access one 820 * uniform), 821 * if detected, use an intermediate temporary */ 822 unsigned uni_rgroup = -1; 823 unsigned uni_reg = -1; 824 825 for (int src = 0; src < ETNA_NUM_SRC; ++src) { 826 if (etna_rgroup_is_uniform(inst->src[src].rgroup)) { 827 if (uni_reg == -1) { /* first unique uniform used */ 828 uni_rgroup = inst->src[src].rgroup; 829 uni_reg = inst->src[src].reg; 830 } else { /* second or later; check that it is a re-use */ 831 if (uni_rgroup != inst->src[src].rgroup || 832 uni_reg != inst->src[src].reg) { 833 DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that " 834 "accesses different uniforms, " 835 "need to generate extra MOV"); 836 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 837 838 /* Generate move instruction to temporary */ 839 etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) { 840 .opcode = INST_OPCODE_MOV, 841 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y | 842 INST_COMPS_Z | INST_COMPS_W), 843 .src[2] = inst->src[src] 844 }); 845 846 c->inst_ptr++; 847 848 /* Modify instruction to use temp register instead of uniform */ 849 inst->src[src].use = 1; 850 inst->src[src].rgroup = INST_RGROUP_TEMP; 851 inst->src[src].reg = inner_temp.id; 852 inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */ 853 inst->src[src].neg = 0; /* negation happens on MOV */ 854 inst->src[src].abs = 0; /* abs happens on MOV */ 855 inst->src[src].amode = 0; /* amode effects happen on MOV */ 856 } 857 } 858 } 859 } 860 861 /* Finally assemble the actual instruction */ 862 etna_assemble(&c->code[c->inst_ptr * 4], inst); 863 c->inst_ptr++; 864 } 865 866 static unsigned int 867 etna_amode(struct tgsi_ind_register indirect) 868 { 869 assert(indirect.File == TGSI_FILE_ADDRESS); 870 assert(indirect.Index == 0); 871 872 switch (indirect.Swizzle) { 873 case TGSI_SWIZZLE_X: 874 return INST_AMODE_ADD_A_X; 875 case TGSI_SWIZZLE_Y: 876 return INST_AMODE_ADD_A_Y; 877 case TGSI_SWIZZLE_Z: 878 return INST_AMODE_ADD_A_Z; 879 case TGSI_SWIZZLE_W: 880 return INST_AMODE_ADD_A_W; 881 default: 882 assert(!"Invalid swizzle"); 883 } 884 } 885 886 /* convert destination operand */ 887 static struct etna_inst_dst 888 convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in) 889 { 890 struct etna_inst_dst rv = { 891 /// XXX .amode 892 .comps = in->Register.WriteMask, 893 }; 894 895 if (in->Register.File == TGSI_FILE_ADDRESS) { 896 assert(in->Register.Index == 0); 897 rv.reg = in->Register.Index; 898 rv.use = 0; 899 } else { 900 rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native, 901 in->Register.WriteMask); 902 } 903 904 if (in->Register.Indirect) 905 rv.amode = etna_amode(in->Indirect); 906 907 return rv; 908 } 909 910 /* convert texture operand */ 911 static struct etna_inst_tex 912 convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in, 913 const struct tgsi_instruction_texture *tex) 914 { 915 struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native; 916 struct etna_inst_tex rv = { 917 // XXX .amode (to allow for an array of samplers?) 918 .swiz = INST_SWIZ_IDENTITY 919 }; 920 921 assert(native_reg.is_tex && native_reg.valid); 922 rv.id = native_reg.id; 923 924 return rv; 925 } 926 927 /* convert source operand */ 928 static struct etna_inst_src 929 etna_create_src(const struct tgsi_full_src_register *tgsi, 930 const struct etna_native_reg *native) 931 { 932 const struct tgsi_src_register *reg = &tgsi->Register; 933 struct etna_inst_src rv = { 934 .use = 1, 935 .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW), 936 .neg = reg->Negate, 937 .abs = reg->Absolute, 938 .rgroup = native->rgroup, 939 .reg = native->id, 940 .amode = INST_AMODE_DIRECT, 941 }; 942 943 assert(native->valid && !native->is_tex); 944 945 if (reg->Indirect) 946 rv.amode = etna_amode(tgsi->Indirect); 947 948 return rv; 949 } 950 951 static struct etna_inst_src 952 etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src, 953 struct etna_native_reg temp) 954 { 955 struct etna_inst mov = { }; 956 957 mov.opcode = INST_OPCODE_MOV; 958 mov.sat = 0; 959 mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 960 INST_COMPS_Z | INST_COMPS_W); 961 mov.src[2] = src; 962 emit_inst(c, &mov); 963 964 src.swiz = INST_SWIZ_IDENTITY; 965 src.neg = src.abs = 0; 966 src.rgroup = temp.rgroup; 967 src.reg = temp.id; 968 969 return src; 970 } 971 972 static struct etna_inst_src 973 etna_mov_src(struct etna_compile *c, struct etna_inst_src src) 974 { 975 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 976 977 return etna_mov_src_to_temp(c, src, temp); 978 } 979 980 static bool 981 etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b) 982 { 983 return etna_rgroup_is_uniform(a.rgroup) && 984 etna_rgroup_is_uniform(b.rgroup) && 985 (a.rgroup != b.rgroup || a.reg != b.reg); 986 } 987 988 /* create a new label */ 989 static struct etna_compile_label * 990 alloc_new_label(struct etna_compile *c) 991 { 992 struct etna_compile_label label = { 993 .inst_idx = -1, /* start by point to no specific instruction */ 994 }; 995 996 array_insert(c->labels, label); 997 998 return &c->labels[c->labels_count - 1]; 999 } 1000 1001 /* place label at current instruction pointer */ 1002 static void 1003 label_place(struct etna_compile *c, struct etna_compile_label *label) 1004 { 1005 label->inst_idx = c->inst_ptr; 1006 } 1007 1008 /* mark label use at current instruction. 1009 * target of the label will be filled in in the marked instruction's src2.imm 1010 * slot as soon 1011 * as the value becomes known. 1012 */ 1013 static void 1014 label_mark_use(struct etna_compile *c, struct etna_compile_label *label) 1015 { 1016 assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS); 1017 c->lbl_usage[c->inst_ptr] = label; 1018 } 1019 1020 /* walk the frame stack and return first frame with matching type */ 1021 static struct etna_compile_frame * 1022 find_frame(struct etna_compile *c, enum etna_compile_frame_type type) 1023 { 1024 for (int sp = c->frame_sp; sp >= 0; sp--) 1025 if (c->frame_stack[sp].type == type) 1026 return &c->frame_stack[sp]; 1027 1028 assert(0); 1029 return NULL; 1030 } 1031 1032 struct instr_translater { 1033 void (*fxn)(const struct instr_translater *t, struct etna_compile *c, 1034 const struct tgsi_full_instruction *inst, 1035 struct etna_inst_src *src); 1036 unsigned tgsi_opc; 1037 uint8_t opc; 1038 1039 /* tgsi src -> etna src swizzle */ 1040 int src[3]; 1041 1042 unsigned cond; 1043 }; 1044 1045 static void 1046 trans_instr(const struct instr_translater *t, struct etna_compile *c, 1047 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1048 { 1049 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode); 1050 struct etna_inst instr = { }; 1051 1052 instr.opcode = t->opc; 1053 instr.cond = t->cond; 1054 instr.sat = inst->Instruction.Saturate; 1055 1056 assert(info->num_dst <= 1); 1057 if (info->num_dst) 1058 instr.dst = convert_dst(c, &inst->Dst[0]); 1059 1060 assert(info->num_src <= ETNA_NUM_SRC); 1061 1062 for (unsigned i = 0; i < info->num_src; i++) { 1063 int swizzle = t->src[i]; 1064 1065 assert(swizzle != -1); 1066 instr.src[swizzle] = src[i]; 1067 } 1068 1069 emit_inst(c, &instr); 1070 } 1071 1072 static void 1073 trans_min_max(const struct instr_translater *t, struct etna_compile *c, 1074 const struct tgsi_full_instruction *inst, 1075 struct etna_inst_src *src) 1076 { 1077 emit_inst(c, &(struct etna_inst) { 1078 .opcode = INST_OPCODE_SELECT, 1079 .cond = t->cond, 1080 .sat = inst->Instruction.Saturate, 1081 .dst = convert_dst(c, &inst->Dst[0]), 1082 .src[0] = src[0], 1083 .src[1] = src[1], 1084 .src[2] = src[0], 1085 }); 1086 } 1087 1088 static void 1089 trans_if(const struct instr_translater *t, struct etna_compile *c, 1090 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1091 { 1092 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1093 struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f); 1094 1095 /* push IF to stack */ 1096 f->type = ETNA_COMPILE_FRAME_IF; 1097 /* create "else" label */ 1098 f->lbl_else = alloc_new_label(c); 1099 f->lbl_endif = NULL; 1100 1101 /* We need to avoid the emit_inst() below becoming two instructions */ 1102 if (etna_src_uniforms_conflict(src[0], imm_0)) 1103 src[0] = etna_mov_src(c, src[0]); 1104 1105 /* mark position in instruction stream of label reference so that it can be 1106 * filled in in next pass */ 1107 label_mark_use(c, f->lbl_else); 1108 1109 /* create conditional branch to label if src0 EQ 0 */ 1110 emit_inst(c, &(struct etna_inst){ 1111 .opcode = INST_OPCODE_BRANCH, 1112 .cond = INST_CONDITION_EQ, 1113 .src[0] = src[0], 1114 .src[1] = imm_0, 1115 /* imm is filled in later */ 1116 }); 1117 } 1118 1119 static void 1120 trans_else(const struct instr_translater *t, struct etna_compile *c, 1121 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1122 { 1123 assert(c->frame_sp > 0); 1124 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1]; 1125 assert(f->type == ETNA_COMPILE_FRAME_IF); 1126 1127 /* create "endif" label, and branch to endif label */ 1128 f->lbl_endif = alloc_new_label(c); 1129 label_mark_use(c, f->lbl_endif); 1130 emit_inst(c, &(struct etna_inst) { 1131 .opcode = INST_OPCODE_BRANCH, 1132 .cond = INST_CONDITION_TRUE, 1133 /* imm is filled in later */ 1134 }); 1135 1136 /* mark "else" label at this position in instruction stream */ 1137 label_place(c, f->lbl_else); 1138 } 1139 1140 static void 1141 trans_endif(const struct instr_translater *t, struct etna_compile *c, 1142 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1143 { 1144 assert(c->frame_sp > 0); 1145 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1146 assert(f->type == ETNA_COMPILE_FRAME_IF); 1147 1148 /* assign "endif" or "else" (if no ELSE) label to current position in 1149 * instruction stream, pop IF */ 1150 if (f->lbl_endif != NULL) 1151 label_place(c, f->lbl_endif); 1152 else 1153 label_place(c, f->lbl_else); 1154 } 1155 1156 static void 1157 trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c, 1158 const struct tgsi_full_instruction *inst, 1159 struct etna_inst_src *src) 1160 { 1161 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1162 1163 /* push LOOP to stack */ 1164 f->type = ETNA_COMPILE_FRAME_LOOP; 1165 f->lbl_loop_bgn = alloc_new_label(c); 1166 f->lbl_loop_end = alloc_new_label(c); 1167 1168 label_place(c, f->lbl_loop_bgn); 1169 } 1170 1171 static void 1172 trans_loop_end(const struct instr_translater *t, struct etna_compile *c, 1173 const struct tgsi_full_instruction *inst, 1174 struct etna_inst_src *src) 1175 { 1176 assert(c->frame_sp > 0); 1177 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1178 assert(f->type == ETNA_COMPILE_FRAME_LOOP); 1179 1180 /* mark position in instruction stream of label reference so that it can be 1181 * filled in in next pass */ 1182 label_mark_use(c, f->lbl_loop_bgn); 1183 1184 /* create branch to loop_bgn label */ 1185 emit_inst(c, &(struct etna_inst) { 1186 .opcode = INST_OPCODE_BRANCH, 1187 .cond = INST_CONDITION_TRUE, 1188 .src[0] = src[0], 1189 /* imm is filled in later */ 1190 }); 1191 1192 label_place(c, f->lbl_loop_end); 1193 } 1194 1195 static void 1196 trans_brk(const struct instr_translater *t, struct etna_compile *c, 1197 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1198 { 1199 assert(c->frame_sp > 0); 1200 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1201 1202 /* mark position in instruction stream of label reference so that it can be 1203 * filled in in next pass */ 1204 label_mark_use(c, f->lbl_loop_end); 1205 1206 /* create branch to loop_end label */ 1207 emit_inst(c, &(struct etna_inst) { 1208 .opcode = INST_OPCODE_BRANCH, 1209 .cond = INST_CONDITION_TRUE, 1210 .src[0] = src[0], 1211 /* imm is filled in later */ 1212 }); 1213 } 1214 1215 static void 1216 trans_cont(const struct instr_translater *t, struct etna_compile *c, 1217 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1218 { 1219 assert(c->frame_sp > 0); 1220 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1221 1222 /* mark position in instruction stream of label reference so that it can be 1223 * filled in in next pass */ 1224 label_mark_use(c, f->lbl_loop_bgn); 1225 1226 /* create branch to loop_end label */ 1227 emit_inst(c, &(struct etna_inst) { 1228 .opcode = INST_OPCODE_BRANCH, 1229 .cond = INST_CONDITION_TRUE, 1230 .src[0] = src[0], 1231 /* imm is filled in later */ 1232 }); 1233 } 1234 1235 static void 1236 trans_deriv(const struct instr_translater *t, struct etna_compile *c, 1237 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1238 { 1239 emit_inst(c, &(struct etna_inst) { 1240 .opcode = t->opc, 1241 .sat = inst->Instruction.Saturate, 1242 .dst = convert_dst(c, &inst->Dst[0]), 1243 .src[0] = src[0], 1244 .src[2] = src[0], 1245 }); 1246 } 1247 1248 static void 1249 trans_arl(const struct instr_translater *t, struct etna_compile *c, 1250 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1251 { 1252 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1253 struct etna_inst arl = { }; 1254 struct etna_inst_dst dst; 1255 1256 dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | 1257 INST_COMPS_W); 1258 1259 if (c->specs->has_sign_floor_ceil) { 1260 struct etna_inst floor = { }; 1261 1262 floor.opcode = INST_OPCODE_FLOOR; 1263 floor.src[2] = src[0]; 1264 floor.dst = dst; 1265 1266 emit_inst(c, &floor); 1267 } else { 1268 struct etna_inst floor[2] = { }; 1269 1270 floor[0].opcode = INST_OPCODE_FRC; 1271 floor[0].sat = inst->Instruction.Saturate; 1272 floor[0].dst = dst; 1273 floor[0].src[2] = src[0]; 1274 1275 floor[1].opcode = INST_OPCODE_ADD; 1276 floor[1].sat = inst->Instruction.Saturate; 1277 floor[1].dst = dst; 1278 floor[1].src[0] = src[0]; 1279 floor[1].src[2].use = 1; 1280 floor[1].src[2].swiz = INST_SWIZ_IDENTITY; 1281 floor[1].src[2].neg = 1; 1282 floor[1].src[2].rgroup = temp.rgroup; 1283 floor[1].src[2].reg = temp.id; 1284 1285 emit_inst(c, &floor[0]); 1286 emit_inst(c, &floor[1]); 1287 } 1288 1289 arl.opcode = INST_OPCODE_MOVAR; 1290 arl.sat = inst->Instruction.Saturate; 1291 arl.dst = convert_dst(c, &inst->Dst[0]); 1292 arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1293 1294 emit_inst(c, &arl); 1295 } 1296 1297 static void 1298 trans_lrp(const struct instr_translater *t, struct etna_compile *c, 1299 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1300 { 1301 /* dst = src0 * src1 + (1 - src0) * src2 1302 * => src0 * src1 - (src0 - 1) * src2 1303 * => src0 * src1 - (src0 * src2 - src2) 1304 * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw 1305 * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw 1306 */ 1307 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1308 if (etna_src_uniforms_conflict(src[0], src[1]) || 1309 etna_src_uniforms_conflict(src[0], src[2])) { 1310 src[0] = etna_mov_src(c, src[0]); 1311 } 1312 1313 struct etna_inst mad[2] = { }; 1314 mad[0].opcode = INST_OPCODE_MAD; 1315 mad[0].sat = 0; 1316 mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1317 INST_COMPS_Z | INST_COMPS_W); 1318 mad[0].src[0] = src[0]; 1319 mad[0].src[1] = src[2]; 1320 mad[0].src[2] = negate(src[2]); 1321 mad[1].opcode = INST_OPCODE_MAD; 1322 mad[1].sat = inst->Instruction.Saturate; 1323 mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0]; 1324 mad[1].src[1] = src[1]; 1325 mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY)); 1326 1327 emit_inst(c, &mad[0]); 1328 emit_inst(c, &mad[1]); 1329 } 1330 1331 static void 1332 trans_lit(const struct instr_translater *t, struct etna_compile *c, 1333 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1334 { 1335 /* SELECT.LT tmp._y__, 0, src.yyyy, 0 1336 * - can be eliminated if src.y is a uniform and >= 0 1337 * SELECT.GT tmp.___w, 128, src.wwww, 128 1338 * SELECT.LT tmp.___w, -128, tmp.wwww, -128 1339 * - can be eliminated if src.w is a uniform and fits clamp 1340 * LOG tmp.x, void, void, tmp.yyyy 1341 * MUL tmp.x, tmp.xxxx, tmp.wwww, void 1342 * LITP dst, undef, src.xxxx, tmp.xxxx 1343 */ 1344 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 1345 struct etna_inst_src src_y = { }; 1346 1347 if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1348 src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)); 1349 1350 struct etna_inst ins = { }; 1351 ins.opcode = INST_OPCODE_SELECT; 1352 ins.cond = INST_CONDITION_LT; 1353 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y); 1354 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0); 1355 ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1356 emit_inst(c, &ins); 1357 } else if (uif(get_imm_u32(c, &src[0], 1)) < 0) 1358 src_y = alloc_imm_f32(c, 0.0); 1359 else 1360 src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1361 1362 struct etna_inst_src src_w = { }; 1363 1364 if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1365 src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W)); 1366 1367 struct etna_inst ins = { }; 1368 ins.opcode = INST_OPCODE_SELECT; 1369 ins.cond = INST_CONDITION_GT; 1370 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W); 1371 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.); 1372 ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W)); 1373 emit_inst(c, &ins); 1374 ins.cond = INST_CONDITION_LT; 1375 ins.src[0].neg = !ins.src[0].neg; 1376 ins.src[2].neg = !ins.src[2].neg; 1377 ins.src[1] = src_w; 1378 emit_inst(c, &ins); 1379 } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.) 1380 src_w = alloc_imm_f32(c, -128.); 1381 else if (uif(get_imm_u32(c, &src[0], 3)) > 128.) 1382 src_w = alloc_imm_f32(c, 128.); 1383 else 1384 src_w = swizzle(src[0], SWIZZLE(W, W, W, W)); 1385 1386 struct etna_inst ins[3] = { }; 1387 ins[0].opcode = INST_OPCODE_LOG; 1388 ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X); 1389 ins[0].src[2] = src_y; 1390 1391 emit_inst(c, &ins[0]); 1392 emit_inst(c, &(struct etna_inst) { 1393 .opcode = INST_OPCODE_MUL, 1394 .sat = 0, 1395 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1396 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1397 .src[1] = src_w, 1398 }); 1399 emit_inst(c, &(struct etna_inst) { 1400 .opcode = INST_OPCODE_LITP, 1401 .sat = 0, 1402 .dst = convert_dst(c, &inst->Dst[0]), 1403 .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1404 .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1405 .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1406 }); 1407 } 1408 1409 static void 1410 trans_ssg(const struct instr_translater *t, struct etna_compile *c, 1411 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1412 { 1413 if (c->specs->has_sign_floor_ceil) { 1414 emit_inst(c, &(struct etna_inst){ 1415 .opcode = INST_OPCODE_SIGN, 1416 .sat = inst->Instruction.Saturate, 1417 .dst = convert_dst(c, &inst->Dst[0]), 1418 .src[2] = src[0], 1419 }); 1420 } else { 1421 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1422 struct etna_inst ins[2] = { }; 1423 1424 ins[0].opcode = INST_OPCODE_SET; 1425 ins[0].cond = INST_CONDITION_NZ; 1426 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1427 INST_COMPS_Z | INST_COMPS_W); 1428 ins[0].src[0] = src[0]; 1429 1430 ins[1].opcode = INST_OPCODE_SELECT; 1431 ins[1].cond = INST_CONDITION_LZ; 1432 ins[1].sat = inst->Instruction.Saturate; 1433 ins[1].dst = convert_dst(c, &inst->Dst[0]); 1434 ins[1].src[0] = src[0]; 1435 ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1436 ins[1].src[1] = negate(ins[1].src[2]); 1437 1438 emit_inst(c, &ins[0]); 1439 emit_inst(c, &ins[1]); 1440 } 1441 } 1442 1443 static void 1444 trans_trig(const struct instr_translater *t, struct etna_compile *c, 1445 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1446 { 1447 if (c->specs->has_new_sin_cos) { /* Alternative SIN/COS */ 1448 /* On newer chips alternative SIN/COS instructions are implemented, 1449 * which: 1450 * - Need their input scaled by 1/pi instead of 2/pi 1451 * - Output an x and y component, which need to be multiplied to 1452 * get the result 1453 */ 1454 /* TGSI lowering should deal with SCS */ 1455 assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS); 1456 1457 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */ 1458 emit_inst(c, &(struct etna_inst) { 1459 .opcode = INST_OPCODE_MUL, 1460 .sat = 0, 1461 .dst = etna_native_to_dst(temp, INST_COMPS_Z), 1462 .src[0] = src[0], /* any swizzling happens here */ 1463 .src[1] = alloc_imm_f32(c, 1.0f / M_PI), 1464 }); 1465 emit_inst(c, &(struct etna_inst) { 1466 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1467 ? INST_OPCODE_COS 1468 : INST_OPCODE_SIN, 1469 .sat = 0, 1470 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1471 .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)), 1472 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1473 }); 1474 emit_inst(c, &(struct etna_inst) { 1475 .opcode = INST_OPCODE_MUL, 1476 .sat = inst->Instruction.Saturate, 1477 .dst = convert_dst(c, &inst->Dst[0]), 1478 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1479 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1480 }); 1481 1482 } else if (c->specs->has_sin_cos_sqrt) { 1483 /* TGSI lowering should deal with SCS */ 1484 assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS); 1485 1486 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1487 /* add divide by PI/2, using a temp register. GC2000 1488 * fails with src==dst for the trig instruction. */ 1489 emit_inst(c, &(struct etna_inst) { 1490 .opcode = INST_OPCODE_MUL, 1491 .sat = 0, 1492 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1493 INST_COMPS_Z | INST_COMPS_W), 1494 .src[0] = src[0], /* any swizzling happens here */ 1495 .src[1] = alloc_imm_f32(c, 2.0f / M_PI), 1496 }); 1497 emit_inst(c, &(struct etna_inst) { 1498 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1499 ? INST_OPCODE_COS 1500 : INST_OPCODE_SIN, 1501 .sat = inst->Instruction.Saturate, 1502 .dst = convert_dst(c, &inst->Dst[0]), 1503 .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), 1504 }); 1505 } else { 1506 /* Implement Nick's fast sine/cosine. Taken from: 1507 * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648 1508 * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X) 1509 * MAD t.x_zw, src.xxxx, A, B 1510 * FRC t.x_z_, void, void, t.xwzw 1511 * MAD t.x_z_, t.xwzw, 2, -1 1512 * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs) 1513 * DP3 t.x_z_, t.zyww, C, void (for sin) 1514 * DP3 t.__z_, t.zyww, C, void (for scs) 1515 * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs) 1516 * DP3 t.x_z_, t.xyww, C, void (for cos) 1517 * DP3 t.x___, t.xyww, C, void (for scs) 1518 * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz 1519 * MAD dst, t.ywyw, .2225, t.xzxz 1520 * 1521 * TODO: we don't set dst.zw correctly for SCS. 1522 */ 1523 struct etna_inst *p, ins[9] = { }; 1524 struct etna_native_reg t0 = etna_compile_get_inner_temp(c); 1525 struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY); 1526 struct etna_inst_src sincos[3], in = src[0]; 1527 sincos[0] = etna_imm_vec4f(c, sincos_const[0]); 1528 sincos[1] = etna_imm_vec4f(c, sincos_const[1]); 1529 1530 /* A uniform source will cause the inner temp limit to 1531 * be exceeded. Explicitly deal with that scenario. 1532 */ 1533 if (etna_rgroup_is_uniform(src[0].rgroup)) { 1534 struct etna_inst ins = { }; 1535 ins.opcode = INST_OPCODE_MOV; 1536 ins.dst = etna_native_to_dst(t0, INST_COMPS_X); 1537 ins.src[2] = in; 1538 emit_inst(c, &ins); 1539 in = t0s; 1540 } 1541 1542 ins[0].opcode = INST_OPCODE_MAD; 1543 ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W); 1544 ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X)); 1545 ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */ 1546 ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */ 1547 1548 ins[1].opcode = INST_OPCODE_FRC; 1549 ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1550 ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1551 1552 ins[2].opcode = INST_OPCODE_MAD; 1553 ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1554 ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1555 ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */ 1556 ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */ 1557 1558 unsigned mul_swiz, dp3_swiz; 1559 if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) { 1560 mul_swiz = SWIZZLE(W, Z, W, W); 1561 dp3_swiz = SWIZZLE(Z, Y, W, W); 1562 } else { 1563 mul_swiz = SWIZZLE(W, X, W, W); 1564 dp3_swiz = SWIZZLE(X, Y, W, W); 1565 } 1566 1567 ins[3].opcode = INST_OPCODE_MUL; 1568 ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y); 1569 ins[3].src[0] = swizzle(t0s, mul_swiz); 1570 ins[3].src[1] = absolute(ins[3].src[0]); 1571 1572 ins[4].opcode = INST_OPCODE_DP3; 1573 ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1574 ins[4].src[0] = swizzle(t0s, dp3_swiz); 1575 ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W)); 1576 1577 if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) { 1578 ins[5] = ins[3]; 1579 ins[6] = ins[4]; 1580 ins[4].dst.comps = INST_COMPS_X; 1581 ins[6].dst.comps = INST_COMPS_Z; 1582 ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W)); 1583 ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W)); 1584 ins[5].src[1] = absolute(ins[5].src[0]); 1585 p = &ins[7]; 1586 } else { 1587 p = &ins[5]; 1588 } 1589 1590 p->opcode = INST_OPCODE_MAD; 1591 p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W); 1592 p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z)); 1593 p->src[1] = absolute(p->src[0]); 1594 p->src[2] = negate(p->src[0]); 1595 1596 p++; 1597 p->opcode = INST_OPCODE_MAD; 1598 p->sat = inst->Instruction.Saturate; 1599 p->dst = convert_dst(c, &inst->Dst[0]), 1600 p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W)); 1601 p->src[1] = alloc_imm_f32(c, 0.2225); 1602 p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z)); 1603 1604 for (int i = 0; &ins[i] <= p; i++) 1605 emit_inst(c, &ins[i]); 1606 } 1607 } 1608 1609 static void 1610 trans_dph(const struct instr_translater *t, struct etna_compile *c, 1611 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1612 { 1613 /* 1614 DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void 1615 ADD dst.xyzw, tmp.xyzw, void, src1.wwww 1616 */ 1617 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1618 struct etna_inst ins[2] = { }; 1619 1620 ins[0].opcode = INST_OPCODE_DP3; 1621 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1622 INST_COMPS_Z | INST_COMPS_W); 1623 ins[0].src[0] = src[0]; 1624 ins[0].src[1] = src[1]; 1625 1626 ins[1].opcode = INST_OPCODE_ADD; 1627 ins[1].sat = inst->Instruction.Saturate; 1628 ins[1].dst = convert_dst(c, &inst->Dst[0]); 1629 ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1630 ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W)); 1631 1632 emit_inst(c, &ins[0]); 1633 emit_inst(c, &ins[1]); 1634 } 1635 1636 static void 1637 trans_sampler(const struct instr_translater *t, struct etna_compile *c, 1638 const struct tgsi_full_instruction *inst, 1639 struct etna_inst_src *src) 1640 { 1641 /* There is no native support for GL texture rectangle coordinates, so 1642 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */ 1643 if (inst->Texture.Texture == TGSI_TEXTURE_RECT) { 1644 uint32_t unit = inst->Src[1].Register.Index; 1645 struct etna_inst ins[2] = { }; 1646 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1647 1648 ins[0].opcode = INST_OPCODE_MUL; 1649 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X); 1650 ins[0].src[0] = src[0]; 1651 ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit); 1652 1653 ins[1].opcode = INST_OPCODE_MUL; 1654 ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y); 1655 ins[1].src[0] = src[0]; 1656 ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit); 1657 1658 emit_inst(c, &ins[0]); 1659 emit_inst(c, &ins[1]); 1660 1661 src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */ 1662 } 1663 1664 switch (inst->Instruction.Opcode) { 1665 case TGSI_OPCODE_TEX: 1666 emit_inst(c, &(struct etna_inst) { 1667 .opcode = INST_OPCODE_TEXLD, 1668 .sat = 0, 1669 .dst = convert_dst(c, &inst->Dst[0]), 1670 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1671 .src[0] = src[0], 1672 }); 1673 break; 1674 1675 case TGSI_OPCODE_TXB: 1676 emit_inst(c, &(struct etna_inst) { 1677 .opcode = INST_OPCODE_TEXLDB, 1678 .sat = 0, 1679 .dst = convert_dst(c, &inst->Dst[0]), 1680 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1681 .src[0] = src[0], 1682 }); 1683 break; 1684 1685 case TGSI_OPCODE_TXL: 1686 emit_inst(c, &(struct etna_inst) { 1687 .opcode = INST_OPCODE_TEXLDL, 1688 .sat = 0, 1689 .dst = convert_dst(c, &inst->Dst[0]), 1690 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1691 .src[0] = src[0], 1692 }); 1693 break; 1694 1695 case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */ 1696 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1697 1698 emit_inst(c, &(struct etna_inst) { 1699 .opcode = INST_OPCODE_RCP, 1700 .sat = 0, 1701 .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */ 1702 .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)), 1703 }); 1704 emit_inst(c, &(struct etna_inst) { 1705 .opcode = INST_OPCODE_MUL, 1706 .sat = 0, 1707 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1708 INST_COMPS_Z), /* tmp.xyz */ 1709 .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)), 1710 .src[1] = src[0], /* src.xyzw */ 1711 }); 1712 emit_inst(c, &(struct etna_inst) { 1713 .opcode = INST_OPCODE_TEXLD, 1714 .sat = 0, 1715 .dst = convert_dst(c, &inst->Dst[0]), 1716 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1717 .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */ 1718 }); 1719 } break; 1720 1721 default: 1722 BUG("Unhandled instruction %s", 1723 tgsi_get_opcode_name(inst->Instruction.Opcode)); 1724 assert(0); 1725 break; 1726 } 1727 } 1728 1729 static void 1730 trans_dummy(const struct instr_translater *t, struct etna_compile *c, 1731 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1732 { 1733 /* nothing to do */ 1734 } 1735 1736 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { 1737 #define INSTR(n, f, ...) \ 1738 [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__} 1739 1740 INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}), 1741 INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}), 1742 INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}), 1743 INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}), 1744 INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}), 1745 INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}), 1746 INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}), 1747 INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}), 1748 INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}), 1749 INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}), 1750 INSTR(LG2, trans_instr, .opc = INST_OPCODE_LOG, .src = {2, -1, -1}), 1751 INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}), 1752 INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}), 1753 INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}), 1754 INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}), 1755 INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ), 1756 1757 INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL), 1758 INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ), 1759 1760 INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX), 1761 INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY), 1762 1763 INSTR(IF, trans_if), 1764 INSTR(ELSE, trans_else), 1765 INSTR(ENDIF, trans_endif), 1766 1767 INSTR(BGNLOOP, trans_loop_bgn), 1768 INSTR(ENDLOOP, trans_loop_end), 1769 INSTR(BRK, trans_brk), 1770 INSTR(CONT, trans_cont), 1771 1772 INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT), 1773 INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT), 1774 1775 INSTR(ARL, trans_arl), 1776 INSTR(LRP, trans_lrp), 1777 INSTR(LIT, trans_lit), 1778 INSTR(SSG, trans_ssg), 1779 INSTR(DPH, trans_dph), 1780 1781 INSTR(SIN, trans_trig), 1782 INSTR(COS, trans_trig), 1783 INSTR(SCS, trans_trig), 1784 1785 INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT), 1786 INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE), 1787 INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ), 1788 INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT), 1789 INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE), 1790 INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE), 1791 1792 INSTR(TEX, trans_sampler), 1793 INSTR(TXB, trans_sampler), 1794 INSTR(TXL, trans_sampler), 1795 INSTR(TXP, trans_sampler), 1796 1797 INSTR(NOP, trans_dummy), 1798 INSTR(END, trans_dummy), 1799 }; 1800 1801 /* Pass -- compile instructions */ 1802 static void 1803 etna_compile_pass_generate_code(struct etna_compile *c) 1804 { 1805 struct tgsi_parse_context ctx = { }; 1806 unsigned status = tgsi_parse_init(&ctx, c->tokens); 1807 assert(status == TGSI_PARSE_OK); 1808 1809 int inst_idx = 0; 1810 while (!tgsi_parse_end_of_tokens(&ctx)) { 1811 const struct tgsi_full_instruction *inst = 0; 1812 1813 /* No inner temps used yet for this instruction, clear counter */ 1814 c->inner_temps = 0; 1815 1816 tgsi_parse_token(&ctx); 1817 1818 switch (ctx.FullToken.Token.Type) { 1819 case TGSI_TOKEN_TYPE_INSTRUCTION: 1820 /* iterate over operands */ 1821 inst = &ctx.FullToken.FullInstruction; 1822 if (c->dead_inst[inst_idx]) { /* skip dead instructions */ 1823 inst_idx++; 1824 continue; 1825 } 1826 1827 /* Lookup the TGSI information and generate the source arguments */ 1828 struct etna_inst_src src[ETNA_NUM_SRC]; 1829 memset(src, 0, sizeof(src)); 1830 1831 const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode); 1832 1833 for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) { 1834 const struct tgsi_full_src_register *reg = &inst->Src[i]; 1835 const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native; 1836 1837 if (!n->valid || n->is_tex) 1838 continue; 1839 1840 src[i] = etna_create_src(reg, n); 1841 } 1842 1843 const unsigned opc = inst->Instruction.Opcode; 1844 const struct instr_translater *t = &translaters[opc]; 1845 1846 if (t->fxn) { 1847 t->fxn(t, c, inst, src); 1848 1849 inst_idx += 1; 1850 } else { 1851 BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc)); 1852 assert(0); 1853 } 1854 break; 1855 } 1856 } 1857 tgsi_parse_free(&ctx); 1858 } 1859 1860 /* Look up register by semantic */ 1861 static struct etna_reg_desc * 1862 find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index) 1863 { 1864 for (int idx = 0; idx < c->file[file].reg_size; ++idx) { 1865 struct etna_reg_desc *reg = &c->file[file].reg[idx]; 1866 1867 if (reg->semantic.Name == name && reg->semantic.Index == index) 1868 return reg; 1869 } 1870 1871 return NULL; /* not found */ 1872 } 1873 1874 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed: 1875 * - this is a vertex shader 1876 * - and this is an older GPU 1877 */ 1878 static void 1879 etna_compile_add_z_div_if_needed(struct etna_compile *c) 1880 { 1881 if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) { 1882 /* find position out */ 1883 struct etna_reg_desc *pos_reg = 1884 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0); 1885 1886 if (pos_reg != NULL) { 1887 /* 1888 * ADD tX.__z_, tX.zzzz, void, tX.wwww 1889 * MUL tX.__z_, tX.zzzz, 0.5, void 1890 */ 1891 emit_inst(c, &(struct etna_inst) { 1892 .opcode = INST_OPCODE_ADD, 1893 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1894 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1895 .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)), 1896 }); 1897 emit_inst(c, &(struct etna_inst) { 1898 .opcode = INST_OPCODE_MUL, 1899 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1900 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1901 .src[1] = alloc_imm_f32(c, 0.5f), 1902 }); 1903 } 1904 } 1905 } 1906 1907 /** add a NOP to the shader if 1908 * a) the shader is empty 1909 * or 1910 * b) there is a label at the end of the shader 1911 */ 1912 static void 1913 etna_compile_add_nop_if_needed(struct etna_compile *c) 1914 { 1915 bool label_at_last_inst = false; 1916 1917 for (int idx = 0; idx < c->labels_count; ++idx) { 1918 if (c->labels[idx].inst_idx == c->inst_ptr) 1919 label_at_last_inst = true; 1920 1921 } 1922 1923 if (c->inst_ptr == 0 || label_at_last_inst) 1924 emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP}); 1925 } 1926 1927 static void 1928 assign_uniforms(struct etna_compile_file *file, unsigned base) 1929 { 1930 for (int idx = 0; idx < file->reg_size; ++idx) { 1931 file->reg[idx].native.valid = 1; 1932 file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0; 1933 file->reg[idx].native.id = base + idx; 1934 } 1935 } 1936 1937 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x). 1938 * CONST must be consecutive as const buffers are supposed to be consecutive, 1939 * and before IMM, as this is 1940 * more convenient because is possible for the compilation process itself to 1941 * generate extra 1942 * immediates for constants such as pi, one, zero. 1943 */ 1944 static void 1945 assign_constants_and_immediates(struct etna_compile *c) 1946 { 1947 assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0); 1948 /* immediates start after the constants */ 1949 c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4; 1950 assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4); 1951 DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base, 1952 c->imm_size); 1953 } 1954 1955 /* Assign declared samplers to native texture units */ 1956 static void 1957 assign_texture_units(struct etna_compile *c) 1958 { 1959 uint tex_base = 0; 1960 1961 if (c->info.processor == PIPE_SHADER_VERTEX) 1962 tex_base = c->specs->vertex_sampler_offset; 1963 1964 for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) { 1965 c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1; 1966 c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup 1967 c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx; 1968 } 1969 } 1970 1971 /* Additional pass to fill in branch targets. This pass should be last 1972 * as no instruction reordering or removing/addition can be done anymore 1973 * once the branch targets are computed. 1974 */ 1975 static void 1976 etna_compile_fill_in_labels(struct etna_compile *c) 1977 { 1978 for (int idx = 0; idx < c->inst_ptr; ++idx) { 1979 if (c->lbl_usage[idx]) 1980 etna_assemble_set_imm(&c->code[idx * 4], c->lbl_usage[idx]->inst_idx); 1981 } 1982 } 1983 1984 /* compare two etna_native_reg structures, return true if equal */ 1985 static bool 1986 cmp_etna_native_reg(const struct etna_native_reg to, 1987 const struct etna_native_reg from) 1988 { 1989 return to.valid == from.valid && to.is_tex == from.is_tex && 1990 to.rgroup == from.rgroup && to.id == from.id; 1991 } 1992 1993 /* go through all declarations and swap native registers *to* and *from* */ 1994 static void 1995 swap_native_registers(struct etna_compile *c, const struct etna_native_reg to, 1996 const struct etna_native_reg from) 1997 { 1998 if (cmp_etna_native_reg(from, to)) 1999 return; /* Nothing to do */ 2000 2001 for (int idx = 0; idx < c->total_decls; ++idx) { 2002 if (cmp_etna_native_reg(c->decl[idx].native, from)) { 2003 c->decl[idx].native = to; 2004 } else if (cmp_etna_native_reg(c->decl[idx].native, to)) { 2005 c->decl[idx].native = from; 2006 } 2007 } 2008 } 2009 2010 /* For PS we need to permute so that inputs are always in temporary 0..N-1. 2011 * Semantic POS is always t0. If that semantic is not used, avoid t0. 2012 */ 2013 static void 2014 permute_ps_inputs(struct etna_compile *c) 2015 { 2016 /* Special inputs: 2017 * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION 2018 * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD 2019 */ 2020 uint native_idx = 1; 2021 2022 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2023 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2024 uint input_id; 2025 assert(reg->has_semantic); 2026 2027 if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION) 2028 continue; 2029 2030 input_id = native_idx++; 2031 swap_native_registers(c, etna_native_temp(input_id), 2032 c->file[TGSI_FILE_INPUT].reg[idx].native); 2033 } 2034 2035 c->num_varyings = native_idx - 1; 2036 2037 if (native_idx > c->next_free_native) 2038 c->next_free_native = native_idx; 2039 } 2040 2041 /* fill in ps inputs into shader object */ 2042 static void 2043 fill_in_ps_inputs(struct etna_shader *sobj, struct etna_compile *c) 2044 { 2045 struct etna_shader_io_file *sf = &sobj->infile; 2046 2047 sf->num_reg = 0; 2048 2049 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2050 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2051 2052 if (reg->native.id > 0) { 2053 assert(sf->num_reg < ETNA_NUM_INPUTS); 2054 sf->reg[sf->num_reg].reg = reg->native.id; 2055 sf->reg[sf->num_reg].semantic = reg->semantic; 2056 /* convert usage mask to number of components (*=wildcard) 2057 * .r (0..1) -> 1 component 2058 * .*g (2..3) -> 2 component 2059 * .**b (4..7) -> 3 components 2060 * .***a (8..15) -> 4 components 2061 */ 2062 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2063 sf->num_reg++; 2064 } 2065 } 2066 2067 assert(sf->num_reg == c->num_varyings); 2068 sobj->input_count_unk8 = 31; /* XXX what is this */ 2069 } 2070 2071 /* fill in output mapping for ps into shader object */ 2072 static void 2073 fill_in_ps_outputs(struct etna_shader *sobj, struct etna_compile *c) 2074 { 2075 sobj->outfile.num_reg = 0; 2076 2077 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2078 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2079 2080 switch (reg->semantic.Name) { 2081 case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */ 2082 sobj->ps_color_out_reg = reg->native.id; 2083 break; 2084 case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */ 2085 sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */ 2086 break; 2087 default: 2088 assert(0); /* only outputs supported are COLOR and POSITION at the moment */ 2089 } 2090 } 2091 } 2092 2093 /* fill in inputs for vs into shader object */ 2094 static void 2095 fill_in_vs_inputs(struct etna_shader *sobj, struct etna_compile *c) 2096 { 2097 struct etna_shader_io_file *sf = &sobj->infile; 2098 2099 sf->num_reg = 0; 2100 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2101 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2102 assert(sf->num_reg < ETNA_NUM_INPUTS); 2103 /* XXX exclude inputs with special semantics such as gl_frontFacing */ 2104 sf->reg[sf->num_reg].reg = reg->native.id; 2105 sf->reg[sf->num_reg].semantic = reg->semantic; 2106 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2107 sf->num_reg++; 2108 } 2109 2110 sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */ 2111 } 2112 2113 /* build two-level output index [Semantic][Index] for fast linking */ 2114 static void 2115 build_output_index(struct etna_shader *sobj) 2116 { 2117 int total = 0; 2118 int offset = 0; 2119 2120 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) 2121 total += sobj->output_count_per_semantic[name]; 2122 2123 sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *)); 2124 2125 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) { 2126 sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset]; 2127 offset += sobj->output_count_per_semantic[name]; 2128 } 2129 2130 for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) { 2131 sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name] 2132 [sobj->outfile.reg[idx].semantic.Index] = 2133 &sobj->outfile.reg[idx]; 2134 } 2135 } 2136 2137 /* fill in outputs for vs into shader object */ 2138 static void 2139 fill_in_vs_outputs(struct etna_shader *sobj, struct etna_compile *c) 2140 { 2141 struct etna_shader_io_file *sf = &sobj->outfile; 2142 2143 sf->num_reg = 0; 2144 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2145 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2146 assert(sf->num_reg < ETNA_NUM_INPUTS); 2147 2148 switch (reg->semantic.Name) { 2149 case TGSI_SEMANTIC_POSITION: 2150 sobj->vs_pos_out_reg = reg->native.id; 2151 break; 2152 case TGSI_SEMANTIC_PSIZE: 2153 sobj->vs_pointsize_out_reg = reg->native.id; 2154 break; 2155 default: 2156 sf->reg[sf->num_reg].reg = reg->native.id; 2157 sf->reg[sf->num_reg].semantic = reg->semantic; 2158 sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components; 2159 sf->num_reg++; 2160 sobj->output_count_per_semantic[reg->semantic.Name] = 2161 MAX2(reg->semantic.Index + 1, 2162 sobj->output_count_per_semantic[reg->semantic.Name]); 2163 } 2164 } 2165 2166 /* build two-level index for linking */ 2167 build_output_index(sobj); 2168 2169 /* fill in "mystery meat" load balancing value. This value determines how 2170 * work is scheduled between VS and PS 2171 * in the unified shader architecture. More precisely, it is determined from 2172 * the number of VS outputs, as well as chip-specific 2173 * vertex output buffer size, vertex cache size, and the number of shader 2174 * cores. 2175 * 2176 * XXX this is a conservative estimate, the "optimal" value is only known for 2177 * sure at link time because some 2178 * outputs may be unused and thus unmapped. Then again, in the general use 2179 * case with GLSL the vertex and fragment 2180 * shaders are linked already before submitting to Gallium, thus all outputs 2181 * are used. 2182 */ 2183 int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2; 2184 assert(half_out); 2185 2186 uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size - 2187 2 * half_out * c->specs->vertex_cache_size)) + 2188 9) / 2189 10; 2190 uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2; 2191 sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | 2192 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | 2193 VIVS_VS_LOAD_BALANCING_C(0x3f) | 2194 VIVS_VS_LOAD_BALANCING_D(0x0f); 2195 } 2196 2197 static bool 2198 etna_compile_check_limits(struct etna_compile *c) 2199 { 2200 int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX) 2201 ? c->specs->max_vs_uniforms 2202 : c->specs->max_ps_uniforms; 2203 /* round up number of uniforms, including immediates, in units of four */ 2204 int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4; 2205 2206 if (c->inst_ptr > c->specs->max_instructions) { 2207 DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr, 2208 c->specs->max_instructions); 2209 return false; 2210 } 2211 2212 if (c->next_free_native > c->specs->max_registers) { 2213 DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native, 2214 c->specs->max_registers); 2215 return false; 2216 } 2217 2218 if (num_uniforms > max_uniforms) { 2219 DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms, 2220 max_uniforms); 2221 return false; 2222 } 2223 2224 if (c->num_varyings > c->specs->max_varyings) { 2225 DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings, 2226 c->specs->max_varyings); 2227 return false; 2228 } 2229 2230 if (c->imm_base > c->specs->num_constants) { 2231 DBG("Number of constants (%d) exceeds maximum %d", c->imm_base, 2232 c->specs->num_constants); 2233 } 2234 2235 return true; 2236 } 2237 2238 static void 2239 copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader *sobj) 2240 { 2241 uint32_t count = c->imm_size; 2242 struct etna_shader_uniform_info *uinfo = &sobj->uniforms; 2243 2244 uinfo->const_count = c->imm_base; 2245 uinfo->imm_count = count; 2246 uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data)); 2247 uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents)); 2248 2249 etna_set_shader_uniforms_dirty_flags(sobj); 2250 } 2251 2252 struct etna_shader * 2253 etna_compile_shader(const struct etna_specs *specs, 2254 const struct tgsi_token *tokens) 2255 { 2256 /* Create scratch space that may be too large to fit on stack 2257 */ 2258 bool ret; 2259 struct etna_compile *c; 2260 struct etna_shader *shader; 2261 2262 struct tgsi_lowering_config lconfig = { 2263 .lower_SCS = specs->has_sin_cos_sqrt, 2264 .lower_FLR = !specs->has_sign_floor_ceil, 2265 .lower_CEIL = !specs->has_sign_floor_ceil, 2266 .lower_POW = true, 2267 .lower_EXP = true, 2268 .lower_LOG = true, 2269 .lower_DP2 = true, 2270 .lower_DP2A = true, 2271 .lower_TRUNC = true, 2272 .lower_XPD = true 2273 }; 2274 2275 c = CALLOC_STRUCT(etna_compile); 2276 if (!c) 2277 return NULL; 2278 2279 shader = CALLOC_STRUCT(etna_shader); 2280 if (!shader) 2281 goto out; 2282 2283 c->specs = specs; 2284 c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info); 2285 c->free_tokens = !!c->tokens; 2286 if (!c->tokens) { 2287 /* no lowering */ 2288 c->tokens = tokens; 2289 } 2290 2291 /* Build a map from gallium register to native registers for files 2292 * CONST, SAMP, IMM, OUT, IN, TEMP. 2293 * SAMP will map as-is for fragment shaders, there will be a +8 offset for 2294 * vertex shaders. 2295 */ 2296 /* Pass one -- check register file declarations and immediates */ 2297 etna_compile_parse_declarations(c); 2298 2299 etna_allocate_decls(c); 2300 2301 /* Pass two -- check usage of temporaries, inputs, outputs */ 2302 etna_compile_pass_check_usage(c); 2303 2304 assign_special_inputs(c); 2305 2306 /* Assign native temp register to TEMPs */ 2307 assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]); 2308 2309 /* optimize outputs */ 2310 etna_compile_pass_optimize_outputs(c); 2311 2312 /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE) 2313 * this is part of RGROUP_INTERNAL 2314 */ 2315 2316 /* assign inputs: last usage of input should be <= first usage of temp */ 2317 /* potential optimization case: 2318 * if single MOV TEMP[y], IN[x] before which temp y is not used, and 2319 * after which IN[x] 2320 * is not read, temp[y] can be used as input register as-is 2321 */ 2322 /* sort temporaries by first use 2323 * sort inputs by last usage 2324 * iterate over inputs, temporaries 2325 * if last usage of input <= first usage of temp: 2326 * assign input to temp 2327 * advance input, temporary pointer 2328 * else 2329 * advance temporary pointer 2330 * 2331 * potential problem: instruction with multiple inputs of which one is the 2332 * temp and the other is the input; 2333 * however, as the temp is not used before this, how would this make 2334 * sense? uninitialized temporaries have an undefined 2335 * value, so this would be ok 2336 */ 2337 assign_inouts_to_temporaries(c, TGSI_FILE_INPUT); 2338 2339 /* assign outputs: first usage of output should be >= last usage of temp */ 2340 /* potential optimization case: 2341 * if single MOV OUT[x], TEMP[y] (with full write mask, or at least 2342 * writing all components that are used in 2343 * the shader) after which temp y is no longer used temp[y] can be 2344 * used as output register as-is 2345 * 2346 * potential problem: instruction with multiple outputs of which one is the 2347 * temp and the other is the output; 2348 * however, as the temp is not used after this, how would this make 2349 * sense? could just discard the output value 2350 */ 2351 /* sort temporaries by last use 2352 * sort outputs by first usage 2353 * iterate over outputs, temporaries 2354 * if first usage of output >= last usage of temp: 2355 * assign output to temp 2356 * advance output, temporary pointer 2357 * else 2358 * advance temporary pointer 2359 */ 2360 assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT); 2361 2362 assign_constants_and_immediates(c); 2363 assign_texture_units(c); 2364 2365 /* list declarations */ 2366 for (int x = 0; x < c->total_decls; ++x) { 2367 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2368 "last_use=%i native=%i usage_mask=%x " 2369 "has_semantic=%i", 2370 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2371 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2372 c->decl[x].native.valid ? c->decl[x].native.id : -1, 2373 c->decl[x].usage_mask, c->decl[x].has_semantic); 2374 if (c->decl[x].has_semantic) 2375 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2376 tgsi_semantic_names[c->decl[x].semantic.Name], 2377 c->decl[x].semantic.Index); 2378 } 2379 /* XXX for PS we need to permute so that inputs are always in temporary 2380 * 0..N-1. 2381 * There is no "switchboard" for varyings (AFAIK!). The output color, 2382 * however, can be routed 2383 * from an arbitrary temporary. 2384 */ 2385 if (c->info.processor == PIPE_SHADER_FRAGMENT) 2386 permute_ps_inputs(c); 2387 2388 2389 /* list declarations */ 2390 for (int x = 0; x < c->total_decls; ++x) { 2391 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2392 "last_use=%i native=%i usage_mask=%x " 2393 "has_semantic=%i", 2394 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2395 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2396 c->decl[x].native.valid ? c->decl[x].native.id : -1, 2397 c->decl[x].usage_mask, c->decl[x].has_semantic); 2398 if (c->decl[x].has_semantic) 2399 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2400 tgsi_semantic_names[c->decl[x].semantic.Name], 2401 c->decl[x].semantic.Index); 2402 } 2403 2404 /* pass 3: generate instructions */ 2405 etna_compile_pass_generate_code(c); 2406 etna_compile_add_z_div_if_needed(c); 2407 etna_compile_add_nop_if_needed(c); 2408 etna_compile_fill_in_labels(c); 2409 2410 ret = etna_compile_check_limits(c); 2411 if (!ret) { 2412 FREE(shader); 2413 shader = NULL; 2414 goto out; 2415 } 2416 2417 /* fill in output structure */ 2418 shader->processor = c->info.processor; 2419 shader->code_size = c->inst_ptr * 4; 2420 shader->code = mem_dup(c->code, c->inst_ptr * 16); 2421 shader->num_temps = c->next_free_native; 2422 shader->vs_pos_out_reg = -1; 2423 shader->vs_pointsize_out_reg = -1; 2424 shader->ps_color_out_reg = -1; 2425 shader->ps_depth_out_reg = -1; 2426 copy_uniform_state_to_shader(c, shader); 2427 2428 if (c->info.processor == PIPE_SHADER_VERTEX) { 2429 fill_in_vs_inputs(shader, c); 2430 fill_in_vs_outputs(shader, c); 2431 } else if (c->info.processor == PIPE_SHADER_FRAGMENT) { 2432 fill_in_ps_inputs(shader, c); 2433 fill_in_ps_outputs(shader, c); 2434 } 2435 2436 out: 2437 if (c->free_tokens) 2438 FREE((void *)c->tokens); 2439 2440 FREE(c->labels); 2441 FREE(c); 2442 2443 return shader; 2444 } 2445 2446 extern const char *tgsi_swizzle_names[]; 2447 void 2448 etna_dump_shader(const struct etna_shader *shader) 2449 { 2450 if (shader->processor == PIPE_SHADER_VERTEX) 2451 printf("VERT\n"); 2452 else 2453 printf("FRAG\n"); 2454 2455 2456 etna_disasm(shader->code, shader->code_size, PRINT_RAW); 2457 2458 printf("num temps: %i\n", shader->num_temps); 2459 printf("num const: %i\n", shader->uniforms.const_count); 2460 printf("immediates:\n"); 2461 for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) { 2462 printf(" [%i].%s = %f (0x%08x)\n", 2463 (idx + shader->uniforms.const_count) / 4, 2464 tgsi_swizzle_names[idx % 4], 2465 *((float *)&shader->uniforms.imm_data[idx]), 2466 shader->uniforms.imm_data[idx]); 2467 } 2468 printf("inputs:\n"); 2469 for (int idx = 0; idx < shader->infile.num_reg; ++idx) { 2470 printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg, 2471 tgsi_semantic_names[shader->infile.reg[idx].semantic.Name], 2472 shader->infile.reg[idx].semantic.Index, 2473 shader->infile.reg[idx].num_components); 2474 } 2475 printf("outputs:\n"); 2476 for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { 2477 printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg, 2478 tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name], 2479 shader->outfile.reg[idx].semantic.Index, 2480 shader->outfile.reg[idx].num_components); 2481 } 2482 printf("special:\n"); 2483 if (shader->processor == PIPE_SHADER_VERTEX) { 2484 printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); 2485 printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); 2486 printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); 2487 } else { 2488 printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg); 2489 printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg); 2490 } 2491 printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8); 2492 } 2493 2494 void 2495 etna_destroy_shader(struct etna_shader *shader) 2496 { 2497 assert(shader); 2498 2499 FREE(shader->code); 2500 FREE(shader->uniforms.imm_data); 2501 FREE(shader->uniforms.imm_contents); 2502 FREE(shader->output_per_semantic_list); 2503 FREE(shader); 2504 } 2505 2506 static const struct etna_shader_inout * 2507 etna_shader_vs_lookup(const struct etna_shader *sobj, 2508 const struct etna_shader_inout *in) 2509 { 2510 if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name]) 2511 return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index]; 2512 2513 return NULL; 2514 } 2515 2516 bool 2517 etna_link_shader(struct etna_shader_link_info *info, 2518 const struct etna_shader *vs, const struct etna_shader *fs) 2519 { 2520 /* For each fragment input we need to find the associated vertex shader 2521 * output, which can be found by matching on semantic name and index. A 2522 * binary search could be used because the vs outputs are sorted by their 2523 * semantic index and grouped by semantic type by fill_in_vs_outputs. 2524 */ 2525 assert(fs->infile.num_reg < ETNA_NUM_INPUTS); 2526 2527 for (int idx = 0; idx < fs->infile.num_reg; ++idx) { 2528 const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; 2529 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); 2530 struct etna_varying *varying; 2531 2532 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); 2533 2534 if (fsio->reg > info->num_varyings) 2535 info->num_varyings = fsio->reg; 2536 2537 varying = &info->varyings[fsio->reg - 1]; 2538 varying->num_components = fsio->num_components; 2539 2540 if (fsio->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */ 2541 varying->pa_attributes = 0x200; 2542 else /* texture coord or other bypasses flat shading */ 2543 varying->pa_attributes = 0x2f1; 2544 2545 if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) { 2546 varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X; 2547 varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y; 2548 varying->use[2] = VARYING_COMPONENT_USE_USED; 2549 varying->use[3] = VARYING_COMPONENT_USE_USED; 2550 varying->reg = 0; /* replaced by point coord -- doesn't matter */ 2551 continue; 2552 } 2553 2554 if (vsio == NULL) 2555 return true; /* not found -- link error */ 2556 2557 varying->use[0] = VARYING_COMPONENT_USE_USED; 2558 varying->use[1] = VARYING_COMPONENT_USE_USED; 2559 varying->use[2] = VARYING_COMPONENT_USE_USED; 2560 varying->use[3] = VARYING_COMPONENT_USE_USED; 2561 varying->reg = vsio->reg; 2562 } 2563 2564 assert(info->num_varyings == fs->infile.num_reg); 2565 2566 return false; 2567 } 2568