1 /* 2 * Copyright (c) 2012-2015 Etnaviv Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Wladimir J. van der Laan <laanwj (at) gmail.com> 25 */ 26 27 /* TGSI->Vivante shader ISA conversion */ 28 29 /* What does the compiler return (see etna_shader_object)? 30 * 1) instruction data 31 * 2) input-to-temporary mapping (fixed for ps) 32 * *) in case of ps, semantic -> varying id mapping 33 * *) for each varying: number of components used (r, rg, rgb, rgba) 34 * 3) temporary-to-output mapping (in case of vs, fixed for ps) 35 * 4) for each input/output: possible semantic (position, color, glpointcoord, ...) 36 * 5) immediates base offset, immediates data 37 * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to 38 * configure the hw, but useful for error checking 39 * 7) enough information to add the z=(z+w)/2.0 necessary for older chips 40 * (output reg id is enough) 41 * 42 * Empty shaders are not allowed, should always at least generate a NOP. Also 43 * if there is a label at the end of the shader, an extra NOP should be 44 * generated as jump target. 45 * 46 * TODO 47 * * Use an instruction scheduler 48 * * Indirect access to uniforms / temporaries using amode 49 */ 50 51 #include "etnaviv_compiler.h" 52 53 #include "etnaviv_asm.h" 54 #include "etnaviv_context.h" 55 #include "etnaviv_debug.h" 56 #include "etnaviv_disasm.h" 57 #include "etnaviv_uniforms.h" 58 #include "etnaviv_util.h" 59 60 #include "pipe/p_shader_tokens.h" 61 #include "tgsi/tgsi_info.h" 62 #include "tgsi/tgsi_iterate.h" 63 #include "tgsi/tgsi_lowering.h" 64 #include "tgsi/tgsi_strings.h" 65 #include "tgsi/tgsi_util.h" 66 #include "util/u_math.h" 67 #include "util/u_memory.h" 68 69 #include <fcntl.h> 70 #include <stdio.h> 71 #include <sys/stat.h> 72 #include <sys/types.h> 73 74 #define ETNA_MAX_INNER_TEMPS 2 75 76 static const float sincos_const[2][4] = { 77 { 78 2., -1., 4., -4., 79 }, 80 { 81 1. / (2. * M_PI), 0.75, 0.5, 0.0, 82 }, 83 }; 84 85 /* Native register description structure */ 86 struct etna_native_reg { 87 unsigned valid : 1; 88 unsigned is_tex : 1; /* is texture unit, overrides rgroup */ 89 unsigned rgroup : 3; 90 unsigned id : 9; 91 }; 92 93 /* Register description */ 94 struct etna_reg_desc { 95 enum tgsi_file_type file; /* IN, OUT, TEMP, ... */ 96 int idx; /* index into file */ 97 bool active; /* used in program */ 98 int first_use; /* instruction id of first use (scope begin) */ 99 int last_use; /* instruction id of last use (scope end, inclusive) */ 100 101 struct etna_native_reg native; /* native register to map to */ 102 unsigned usage_mask : 4; /* usage, per channel */ 103 bool has_semantic; /* register has associated TGSI semantic */ 104 struct tgsi_declaration_semantic semantic; /* TGSI semantic */ 105 struct tgsi_declaration_interp interp; /* Interpolation type */ 106 }; 107 108 /* Label information structure */ 109 struct etna_compile_label { 110 int inst_idx; /* Instruction id that label points to */ 111 }; 112 113 enum etna_compile_frame_type { 114 ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */ 115 ETNA_COMPILE_FRAME_LOOP, 116 }; 117 118 /* nesting scope frame (LOOP, IF, ...) during compilation 119 */ 120 struct etna_compile_frame { 121 enum etna_compile_frame_type type; 122 int lbl_else_idx; 123 int lbl_endif_idx; 124 int lbl_loop_bgn_idx; 125 int lbl_loop_end_idx; 126 }; 127 128 struct etna_compile_file { 129 /* Number of registers in each TGSI file (max register+1) */ 130 size_t reg_size; 131 /* Register descriptions, per register index */ 132 struct etna_reg_desc *reg; 133 }; 134 135 #define array_insert(arr, val) \ 136 do { \ 137 if (arr##_count == arr##_sz) { \ 138 arr##_sz = MAX2(2 * arr##_sz, 16); \ 139 arr = realloc(arr, arr##_sz * sizeof(arr[0])); \ 140 } \ 141 arr[arr##_count++] = val; \ 142 } while (0) 143 144 145 /* scratch area for compiling shader, freed after compilation finishes */ 146 struct etna_compile { 147 const struct tgsi_token *tokens; 148 bool free_tokens; 149 150 struct tgsi_shader_info info; 151 152 /* Register descriptions, per TGSI file, per register index */ 153 struct etna_compile_file file[TGSI_FILE_COUNT]; 154 155 /* Keep track of TGSI register declarations */ 156 struct etna_reg_desc decl[ETNA_MAX_DECL]; 157 uint total_decls; 158 159 /* Bitmap of dead instructions which are removed in a separate pass */ 160 bool dead_inst[ETNA_MAX_TOKENS]; 161 162 /* Immediate data */ 163 enum etna_immediate_contents imm_contents[ETNA_MAX_IMM]; 164 uint32_t imm_data[ETNA_MAX_IMM]; 165 uint32_t imm_base; /* base of immediates (in 32 bit units) */ 166 uint32_t imm_size; /* size of immediates (in 32 bit units) */ 167 168 /* Next free native register, for register allocation */ 169 uint32_t next_free_native; 170 171 /* Temporary register for use within translated TGSI instruction, 172 * only allocated when needed. 173 */ 174 int inner_temps; /* number of inner temps used; only up to one available at 175 this point */ 176 struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS]; 177 178 /* Fields for handling nested conditionals */ 179 struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH]; 180 int frame_sp; 181 int lbl_usage[ETNA_MAX_INSTRUCTIONS]; 182 183 unsigned labels_count, labels_sz; 184 struct etna_compile_label *labels; 185 186 unsigned num_loops; 187 188 /* Code generation */ 189 int inst_ptr; /* current instruction pointer */ 190 uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE]; 191 192 /* I/O */ 193 194 /* Number of varyings (PS only) */ 195 int num_varyings; 196 197 /* GPU hardware specs */ 198 const struct etna_specs *specs; 199 200 const struct etna_shader_key *key; 201 }; 202 203 static struct etna_reg_desc * 204 etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst) 205 { 206 return &c->file[dst.File].reg[dst.Index]; 207 } 208 209 static struct etna_reg_desc * 210 etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src) 211 { 212 return &c->file[src.File].reg[src.Index]; 213 } 214 215 static struct etna_native_reg 216 etna_native_temp(unsigned reg) 217 { 218 return (struct etna_native_reg) { 219 .valid = 1, 220 .rgroup = INST_RGROUP_TEMP, 221 .id = reg 222 }; 223 } 224 225 /** Register allocation **/ 226 enum reg_sort_order { 227 FIRST_USE_ASC, 228 FIRST_USE_DESC, 229 LAST_USE_ASC, 230 LAST_USE_DESC 231 }; 232 233 /* Augmented register description for sorting */ 234 struct sort_rec { 235 struct etna_reg_desc *ptr; 236 int key; 237 }; 238 239 static int 240 sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b) 241 { 242 if (a->key < b->key) 243 return -1; 244 245 if (a->key > b->key) 246 return 1; 247 248 return 0; 249 } 250 251 /* create an index on a register set based on certain criteria. */ 252 static int 253 sort_registers(struct sort_rec *sorted, struct etna_compile_file *file, 254 enum reg_sort_order so) 255 { 256 struct etna_reg_desc *regs = file->reg; 257 int ptr = 0; 258 259 /* pre-populate keys from active registers */ 260 for (int idx = 0; idx < file->reg_size; ++idx) { 261 /* only interested in active registers now; will only assign inactive ones 262 * if no space in active ones */ 263 if (regs[idx].active) { 264 sorted[ptr].ptr = ®s[idx]; 265 266 switch (so) { 267 case FIRST_USE_ASC: 268 sorted[ptr].key = regs[idx].first_use; 269 break; 270 case LAST_USE_ASC: 271 sorted[ptr].key = regs[idx].last_use; 272 break; 273 case FIRST_USE_DESC: 274 sorted[ptr].key = -regs[idx].first_use; 275 break; 276 case LAST_USE_DESC: 277 sorted[ptr].key = -regs[idx].last_use; 278 break; 279 } 280 ptr++; 281 } 282 } 283 284 /* sort index by key */ 285 qsort(sorted, ptr, sizeof(struct sort_rec), 286 (int (*)(const void *, const void *))sort_rec_compar); 287 288 return ptr; 289 } 290 291 /* Allocate a new, unused, native temp register */ 292 static struct etna_native_reg 293 alloc_new_native_reg(struct etna_compile *c) 294 { 295 assert(c->next_free_native < ETNA_MAX_TEMPS); 296 return etna_native_temp(c->next_free_native++); 297 } 298 299 /* assign TEMPs to native registers */ 300 static void 301 assign_temporaries_to_native(struct etna_compile *c, 302 struct etna_compile_file *file) 303 { 304 struct etna_reg_desc *temps = file->reg; 305 306 for (int idx = 0; idx < file->reg_size; ++idx) 307 temps[idx].native = alloc_new_native_reg(c); 308 } 309 310 /* assign inputs and outputs to temporaries 311 * Gallium assumes that the hardware has separate registers for taking input and 312 * output, however Vivante GPUs use temporaries both for passing in inputs and 313 * passing back outputs. 314 * Try to re-use temporary registers where possible. */ 315 static void 316 assign_inouts_to_temporaries(struct etna_compile *c, uint file) 317 { 318 bool mode_inputs = (file == TGSI_FILE_INPUT); 319 int inout_ptr = 0, num_inouts; 320 int temp_ptr = 0, num_temps; 321 struct sort_rec inout_order[ETNA_MAX_TEMPS]; 322 struct sort_rec temps_order[ETNA_MAX_TEMPS]; 323 num_inouts = sort_registers(inout_order, &c->file[file], 324 mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC); 325 num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY], 326 mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC); 327 328 while (inout_ptr < num_inouts && temp_ptr < num_temps) { 329 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 330 struct etna_reg_desc *temp = temps_order[temp_ptr].ptr; 331 332 if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */ 333 inout_ptr++; 334 continue; 335 } 336 337 /* last usage of this input is before or in same instruction of first use 338 * of temporary? */ 339 if (mode_inputs ? (inout->last_use <= temp->first_use) 340 : (inout->first_use >= temp->last_use)) { 341 /* assign it and advance to next input */ 342 inout->native = temp->native; 343 inout_ptr++; 344 } 345 346 temp_ptr++; 347 } 348 349 /* if we couldn't reuse current ones, allocate new temporaries */ 350 for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) { 351 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; 352 353 if (inout->active && !inout->native.valid) 354 inout->native = alloc_new_native_reg(c); 355 } 356 } 357 358 /* Allocate an immediate with a certain value and return the index. If 359 * there is already an immediate with that value, return that. 360 */ 361 static struct etna_inst_src 362 alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents, 363 uint32_t value) 364 { 365 int idx; 366 367 /* Could use a hash table to speed this up */ 368 for (idx = 0; idx < c->imm_size; ++idx) { 369 if (c->imm_contents[idx] == contents && c->imm_data[idx] == value) 370 break; 371 } 372 373 /* look if there is an unused slot */ 374 if (idx == c->imm_size) { 375 for (idx = 0; idx < c->imm_size; ++idx) { 376 if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED) 377 break; 378 } 379 } 380 381 /* allocate new immediate */ 382 if (idx == c->imm_size) { 383 assert(c->imm_size < ETNA_MAX_IMM); 384 idx = c->imm_size++; 385 c->imm_data[idx] = value; 386 c->imm_contents[idx] = contents; 387 } 388 389 /* swizzle so that component with value is returned in all components */ 390 idx += c->imm_base; 391 struct etna_inst_src imm_src = { 392 .use = 1, 393 .rgroup = INST_RGROUP_UNIFORM_0, 394 .reg = idx / 4, 395 .swiz = INST_SWIZ_BROADCAST(idx & 3) 396 }; 397 398 return imm_src; 399 } 400 401 static struct etna_inst_src 402 alloc_imm_u32(struct etna_compile *c, uint32_t value) 403 { 404 return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value); 405 } 406 407 static struct etna_inst_src 408 alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents, 409 const uint32_t *values) 410 { 411 struct etna_inst_src imm_src = { }; 412 int idx, i; 413 414 for (idx = 0; idx + 3 < c->imm_size; idx += 4) { 415 /* What if we can use a uniform with a different swizzle? */ 416 for (i = 0; i < 4; i++) 417 if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i]) 418 break; 419 if (i == 4) 420 break; 421 } 422 423 if (idx + 3 >= c->imm_size) { 424 idx = align(c->imm_size, 4); 425 assert(idx + 4 <= ETNA_MAX_IMM); 426 427 for (i = 0; i < 4; i++) { 428 c->imm_data[idx + i] = values[i]; 429 c->imm_contents[idx + i] = contents; 430 } 431 432 c->imm_size = idx + 4; 433 } 434 435 assert((c->imm_base & 3) == 0); 436 idx += c->imm_base; 437 imm_src.use = 1; 438 imm_src.rgroup = INST_RGROUP_UNIFORM_0; 439 imm_src.reg = idx / 4; 440 imm_src.swiz = INST_SWIZ_IDENTITY; 441 442 return imm_src; 443 } 444 445 static uint32_t 446 get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm, 447 unsigned swiz_idx) 448 { 449 assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0); 450 unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3); 451 452 return c->imm_data[idx]; 453 } 454 455 /* Allocate immediate with a certain float value. If there is already an 456 * immediate with that value, return that. 457 */ 458 static struct etna_inst_src 459 alloc_imm_f32(struct etna_compile *c, float value) 460 { 461 return alloc_imm_u32(c, fui(value)); 462 } 463 464 static struct etna_inst_src 465 etna_imm_vec4f(struct etna_compile *c, const float *vec4) 466 { 467 uint32_t val[4]; 468 469 for (int i = 0; i < 4; i++) 470 val[i] = fui(vec4[i]); 471 472 return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val); 473 } 474 475 /* Pass -- check register file declarations and immediates */ 476 static void 477 etna_compile_parse_declarations(struct etna_compile *c) 478 { 479 struct tgsi_parse_context ctx = { }; 480 unsigned status = TGSI_PARSE_OK; 481 status = tgsi_parse_init(&ctx, c->tokens); 482 assert(status == TGSI_PARSE_OK); 483 484 while (!tgsi_parse_end_of_tokens(&ctx)) { 485 tgsi_parse_token(&ctx); 486 487 switch (ctx.FullToken.Token.Type) { 488 case TGSI_TOKEN_TYPE_IMMEDIATE: { 489 /* immediates are handled differently from other files; they are 490 * not declared explicitly, and always add four components */ 491 const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate; 492 assert(c->imm_size <= (ETNA_MAX_IMM - 4)); 493 494 for (int i = 0; i < 4; ++i) { 495 unsigned idx = c->imm_size++; 496 497 c->imm_data[idx] = imm->u[i].Uint; 498 c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT; 499 } 500 } 501 break; 502 } 503 } 504 505 tgsi_parse_free(&ctx); 506 } 507 508 /* Allocate register declarations for the registers in all register files */ 509 static void 510 etna_allocate_decls(struct etna_compile *c) 511 { 512 uint idx = 0; 513 514 for (int x = 0; x < TGSI_FILE_COUNT; ++x) { 515 c->file[x].reg = &c->decl[idx]; 516 c->file[x].reg_size = c->info.file_max[x] + 1; 517 518 for (int sub = 0; sub < c->file[x].reg_size; ++sub) { 519 c->decl[idx].file = x; 520 c->decl[idx].idx = sub; 521 idx++; 522 } 523 } 524 525 c->total_decls = idx; 526 } 527 528 /* Pass -- check and record usage of temporaries, inputs, outputs */ 529 static void 530 etna_compile_pass_check_usage(struct etna_compile *c) 531 { 532 struct tgsi_parse_context ctx = { }; 533 unsigned status = TGSI_PARSE_OK; 534 status = tgsi_parse_init(&ctx, c->tokens); 535 assert(status == TGSI_PARSE_OK); 536 537 for (int idx = 0; idx < c->total_decls; ++idx) { 538 c->decl[idx].active = false; 539 c->decl[idx].first_use = c->decl[idx].last_use = -1; 540 } 541 542 int inst_idx = 0; 543 while (!tgsi_parse_end_of_tokens(&ctx)) { 544 tgsi_parse_token(&ctx); 545 /* find out max register #s used 546 * For every register mark first and last instruction index where it's 547 * used this allows finding ranges where the temporary can be borrowed 548 * as input and/or output register 549 * 550 * XXX in the case of loops this needs special care, or even be completely 551 * disabled, as 552 * the last usage of a register inside a loop means it can still be used 553 * on next loop 554 * iteration (execution is no longer * chronological). The register can 555 * only be 556 * declared "free" after the loop finishes. 557 * 558 * Same for inputs: the first usage of a register inside a loop doesn't 559 * mean that the register 560 * won't have been overwritten in previous iteration. The register can 561 * only be declared free before the loop 562 * starts. 563 * The proper way would be to do full dominator / post-dominator analysis 564 * (especially with more complicated 565 * control flow such as direct branch instructions) but not for now... 566 */ 567 switch (ctx.FullToken.Token.Type) { 568 case TGSI_TOKEN_TYPE_DECLARATION: { 569 /* Declaration: fill in file details */ 570 const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; 571 struct etna_compile_file *file = &c->file[decl->Declaration.File]; 572 573 for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) { 574 file->reg[idx].usage_mask = 0; // we'll compute this ourselves 575 file->reg[idx].has_semantic = decl->Declaration.Semantic; 576 file->reg[idx].semantic = decl->Semantic; 577 file->reg[idx].interp = decl->Interp; 578 } 579 } break; 580 case TGSI_TOKEN_TYPE_INSTRUCTION: { 581 /* Instruction: iterate over operands of instruction */ 582 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 583 584 /* iterate over destination registers */ 585 for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) { 586 struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index]; 587 588 if (reg_desc->first_use == -1) 589 reg_desc->first_use = inst_idx; 590 591 reg_desc->last_use = inst_idx; 592 reg_desc->active = true; 593 } 594 595 /* iterate over source registers */ 596 for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) { 597 struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index]; 598 599 if (reg_desc->first_use == -1) 600 reg_desc->first_use = inst_idx; 601 602 reg_desc->last_use = inst_idx; 603 reg_desc->active = true; 604 /* accumulate usage mask for register, this is used to determine how 605 * many slots for varyings 606 * should be allocated */ 607 reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx); 608 } 609 inst_idx += 1; 610 } break; 611 default: 612 break; 613 } 614 } 615 616 tgsi_parse_free(&ctx); 617 } 618 619 /* assign inputs that need to be assigned to specific registers */ 620 static void 621 assign_special_inputs(struct etna_compile *c) 622 { 623 if (c->info.processor == PIPE_SHADER_FRAGMENT) { 624 /* never assign t0 as it is the position output, start assigning at t1 */ 625 c->next_free_native = 1; 626 627 /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */ 628 for (int idx = 0; idx < c->total_decls; ++idx) { 629 struct etna_reg_desc *reg = &c->decl[idx]; 630 631 if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION) 632 reg->native = etna_native_temp(0); 633 } 634 } 635 } 636 637 /* Check that a move instruction does not swizzle any of the components 638 * that it writes. 639 */ 640 static bool 641 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst, 642 const struct tgsi_src_register src) 643 { 644 return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) && 645 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) && 646 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) && 647 (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W); 648 } 649 650 /* Pass -- optimize outputs 651 * Mesa tends to generate code like this at the end if their shaders 652 * MOV OUT[1], TEMP[2] 653 * MOV OUT[0], TEMP[0] 654 * MOV OUT[2], TEMP[1] 655 * Recognize if 656 * a) there is only a single assignment to an output register and 657 * b) the temporary is not used after that 658 * Also recognize direct assignment of IN to OUT (passthrough) 659 **/ 660 static void 661 etna_compile_pass_optimize_outputs(struct etna_compile *c) 662 { 663 struct tgsi_parse_context ctx = { }; 664 int inst_idx = 0; 665 unsigned status = TGSI_PARSE_OK; 666 status = tgsi_parse_init(&ctx, c->tokens); 667 assert(status == TGSI_PARSE_OK); 668 669 while (!tgsi_parse_end_of_tokens(&ctx)) { 670 tgsi_parse_token(&ctx); 671 672 switch (ctx.FullToken.Token.Type) { 673 case TGSI_TOKEN_TYPE_INSTRUCTION: { 674 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; 675 676 /* iterate over operands */ 677 switch (inst->Instruction.Opcode) { 678 case TGSI_OPCODE_MOV: { 679 /* We are only interested in eliminating MOVs which write to 680 * the shader outputs. Test for this early. */ 681 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 682 break; 683 /* Elimination of a MOV must have no visible effect on the 684 * resulting shader: this means the MOV must not swizzle or 685 * saturate, and its source must not have the negate or 686 * absolute modifiers. */ 687 if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) || 688 inst->Instruction.Saturate || inst->Src[0].Register.Negate || 689 inst->Src[0].Register.Absolute) 690 break; 691 692 uint out_idx = inst->Dst[0].Register.Index; 693 uint in_idx = inst->Src[0].Register.Index; 694 /* assignment of temporary to output -- 695 * and the output doesn't yet have a native register assigned 696 * and the last use of the temporary is this instruction 697 * and the MOV does not do a swizzle 698 */ 699 if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY && 700 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 701 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) { 702 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 703 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native; 704 /* prevent temp from being re-used for the rest of the shader */ 705 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS; 706 /* mark this MOV instruction as a no-op */ 707 c->dead_inst[inst_idx] = true; 708 } 709 /* direct assignment of input to output -- 710 * and the input or output doesn't yet have a native register 711 * assigned 712 * and the output is only used in this instruction, 713 * allocate a new register, and associate both input and output to 714 * it 715 * and the MOV does not do a swizzle 716 */ 717 if (inst->Src[0].Register.File == TGSI_FILE_INPUT && 718 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid && 719 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid && 720 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx && 721 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) { 722 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native = 723 c->file[TGSI_FILE_INPUT].reg[in_idx].native = 724 alloc_new_native_reg(c); 725 /* mark this MOV instruction as a no-op */ 726 c->dead_inst[inst_idx] = true; 727 } 728 } break; 729 default:; 730 } 731 inst_idx += 1; 732 } break; 733 } 734 } 735 736 tgsi_parse_free(&ctx); 737 } 738 739 /* Get a temporary to be used within one TGSI instruction. 740 * The first time that this function is called the temporary will be allocated. 741 * Each call to this function will return the same temporary. 742 */ 743 static struct etna_native_reg 744 etna_compile_get_inner_temp(struct etna_compile *c) 745 { 746 int inner_temp = c->inner_temps; 747 748 if (inner_temp < ETNA_MAX_INNER_TEMPS) { 749 if (!c->inner_temp[inner_temp].valid) 750 c->inner_temp[inner_temp] = alloc_new_native_reg(c); 751 752 /* alloc_new_native_reg() handles lack of registers */ 753 c->inner_temps += 1; 754 } else { 755 BUG("Too many inner temporaries (%i) requested in one instruction", 756 inner_temp + 1); 757 } 758 759 return c->inner_temp[inner_temp]; 760 } 761 762 static struct etna_inst_dst 763 etna_native_to_dst(struct etna_native_reg native, unsigned comps) 764 { 765 /* Can only assign to temporaries */ 766 assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP); 767 768 struct etna_inst_dst rv = { 769 .comps = comps, 770 .use = 1, 771 .reg = native.id, 772 }; 773 774 return rv; 775 } 776 777 static struct etna_inst_src 778 etna_native_to_src(struct etna_native_reg native, uint32_t swizzle) 779 { 780 assert(native.valid && !native.is_tex); 781 782 struct etna_inst_src rv = { 783 .use = 1, 784 .swiz = swizzle, 785 .rgroup = native.rgroup, 786 .reg = native.id, 787 .amode = INST_AMODE_DIRECT, 788 }; 789 790 return rv; 791 } 792 793 static inline struct etna_inst_src 794 negate(struct etna_inst_src src) 795 { 796 src.neg = !src.neg; 797 798 return src; 799 } 800 801 static inline struct etna_inst_src 802 absolute(struct etna_inst_src src) 803 { 804 src.abs = 1; 805 806 return src; 807 } 808 809 static inline struct etna_inst_src 810 swizzle(struct etna_inst_src src, unsigned swizzle) 811 { 812 src.swiz = inst_swiz_compose(src.swiz, swizzle); 813 814 return src; 815 } 816 817 /* Emit instruction and append it to program */ 818 static void 819 emit_inst(struct etna_compile *c, struct etna_inst *inst) 820 { 821 assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS); 822 823 /* Check for uniform conflicts (each instruction can only access one 824 * uniform), 825 * if detected, use an intermediate temporary */ 826 unsigned uni_rgroup = -1; 827 unsigned uni_reg = -1; 828 829 for (int src = 0; src < ETNA_NUM_SRC; ++src) { 830 if (etna_rgroup_is_uniform(inst->src[src].rgroup)) { 831 if (uni_reg == -1) { /* first unique uniform used */ 832 uni_rgroup = inst->src[src].rgroup; 833 uni_reg = inst->src[src].reg; 834 } else { /* second or later; check that it is a re-use */ 835 if (uni_rgroup != inst->src[src].rgroup || 836 uni_reg != inst->src[src].reg) { 837 DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that " 838 "accesses different uniforms, " 839 "need to generate extra MOV"); 840 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 841 842 /* Generate move instruction to temporary */ 843 etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) { 844 .opcode = INST_OPCODE_MOV, 845 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y | 846 INST_COMPS_Z | INST_COMPS_W), 847 .src[2] = inst->src[src] 848 }); 849 850 c->inst_ptr++; 851 852 /* Modify instruction to use temp register instead of uniform */ 853 inst->src[src].use = 1; 854 inst->src[src].rgroup = INST_RGROUP_TEMP; 855 inst->src[src].reg = inner_temp.id; 856 inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */ 857 inst->src[src].neg = 0; /* negation happens on MOV */ 858 inst->src[src].abs = 0; /* abs happens on MOV */ 859 inst->src[src].amode = 0; /* amode effects happen on MOV */ 860 } 861 } 862 } 863 } 864 865 /* Finally assemble the actual instruction */ 866 etna_assemble(&c->code[c->inst_ptr * 4], inst); 867 c->inst_ptr++; 868 } 869 870 static unsigned int 871 etna_amode(struct tgsi_ind_register indirect) 872 { 873 assert(indirect.File == TGSI_FILE_ADDRESS); 874 assert(indirect.Index == 0); 875 876 switch (indirect.Swizzle) { 877 case TGSI_SWIZZLE_X: 878 return INST_AMODE_ADD_A_X; 879 case TGSI_SWIZZLE_Y: 880 return INST_AMODE_ADD_A_Y; 881 case TGSI_SWIZZLE_Z: 882 return INST_AMODE_ADD_A_Z; 883 case TGSI_SWIZZLE_W: 884 return INST_AMODE_ADD_A_W; 885 default: 886 assert(!"Invalid swizzle"); 887 } 888 889 unreachable("bad swizzle"); 890 } 891 892 /* convert destination operand */ 893 static struct etna_inst_dst 894 convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in) 895 { 896 struct etna_inst_dst rv = { 897 /// XXX .amode 898 .comps = in->Register.WriteMask, 899 }; 900 901 if (in->Register.File == TGSI_FILE_ADDRESS) { 902 assert(in->Register.Index == 0); 903 rv.reg = in->Register.Index; 904 rv.use = 0; 905 } else { 906 rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native, 907 in->Register.WriteMask); 908 } 909 910 if (in->Register.Indirect) 911 rv.amode = etna_amode(in->Indirect); 912 913 return rv; 914 } 915 916 /* convert texture operand */ 917 static struct etna_inst_tex 918 convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in, 919 const struct tgsi_instruction_texture *tex) 920 { 921 struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native; 922 struct etna_inst_tex rv = { 923 // XXX .amode (to allow for an array of samplers?) 924 .swiz = INST_SWIZ_IDENTITY 925 }; 926 927 assert(native_reg.is_tex && native_reg.valid); 928 rv.id = native_reg.id; 929 930 return rv; 931 } 932 933 /* convert source operand */ 934 static struct etna_inst_src 935 etna_create_src(const struct tgsi_full_src_register *tgsi, 936 const struct etna_native_reg *native) 937 { 938 const struct tgsi_src_register *reg = &tgsi->Register; 939 struct etna_inst_src rv = { 940 .use = 1, 941 .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW), 942 .neg = reg->Negate, 943 .abs = reg->Absolute, 944 .rgroup = native->rgroup, 945 .reg = native->id, 946 .amode = INST_AMODE_DIRECT, 947 }; 948 949 assert(native->valid && !native->is_tex); 950 951 if (reg->Indirect) 952 rv.amode = etna_amode(tgsi->Indirect); 953 954 return rv; 955 } 956 957 static struct etna_inst_src 958 etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src, 959 struct etna_native_reg temp) 960 { 961 struct etna_inst mov = { }; 962 963 mov.opcode = INST_OPCODE_MOV; 964 mov.sat = 0; 965 mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 966 INST_COMPS_Z | INST_COMPS_W); 967 mov.src[2] = src; 968 emit_inst(c, &mov); 969 970 src.swiz = INST_SWIZ_IDENTITY; 971 src.neg = src.abs = 0; 972 src.rgroup = temp.rgroup; 973 src.reg = temp.id; 974 975 return src; 976 } 977 978 static struct etna_inst_src 979 etna_mov_src(struct etna_compile *c, struct etna_inst_src src) 980 { 981 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 982 983 return etna_mov_src_to_temp(c, src, temp); 984 } 985 986 static bool 987 etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b) 988 { 989 return etna_rgroup_is_uniform(a.rgroup) && 990 etna_rgroup_is_uniform(b.rgroup) && 991 (a.rgroup != b.rgroup || a.reg != b.reg); 992 } 993 994 /* create a new label */ 995 static unsigned int 996 alloc_new_label(struct etna_compile *c) 997 { 998 struct etna_compile_label label = { 999 .inst_idx = -1, /* start by point to no specific instruction */ 1000 }; 1001 1002 array_insert(c->labels, label); 1003 1004 return c->labels_count - 1; 1005 } 1006 1007 /* place label at current instruction pointer */ 1008 static void 1009 label_place(struct etna_compile *c, struct etna_compile_label *label) 1010 { 1011 label->inst_idx = c->inst_ptr; 1012 } 1013 1014 /* mark label use at current instruction. 1015 * target of the label will be filled in in the marked instruction's src2.imm 1016 * slot as soon 1017 * as the value becomes known. 1018 */ 1019 static void 1020 label_mark_use(struct etna_compile *c, int lbl_idx) 1021 { 1022 assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS); 1023 c->lbl_usage[c->inst_ptr] = lbl_idx; 1024 } 1025 1026 /* walk the frame stack and return first frame with matching type */ 1027 static struct etna_compile_frame * 1028 find_frame(struct etna_compile *c, enum etna_compile_frame_type type) 1029 { 1030 for (int sp = c->frame_sp; sp >= 0; sp--) 1031 if (c->frame_stack[sp].type == type) 1032 return &c->frame_stack[sp]; 1033 1034 assert(0); 1035 return NULL; 1036 } 1037 1038 struct instr_translater { 1039 void (*fxn)(const struct instr_translater *t, struct etna_compile *c, 1040 const struct tgsi_full_instruction *inst, 1041 struct etna_inst_src *src); 1042 unsigned tgsi_opc; 1043 uint8_t opc; 1044 1045 /* tgsi src -> etna src swizzle */ 1046 int src[3]; 1047 1048 unsigned cond; 1049 }; 1050 1051 static void 1052 trans_instr(const struct instr_translater *t, struct etna_compile *c, 1053 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1054 { 1055 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode); 1056 struct etna_inst instr = { }; 1057 1058 instr.opcode = t->opc; 1059 instr.cond = t->cond; 1060 instr.sat = inst->Instruction.Saturate; 1061 1062 assert(info->num_dst <= 1); 1063 if (info->num_dst) 1064 instr.dst = convert_dst(c, &inst->Dst[0]); 1065 1066 assert(info->num_src <= ETNA_NUM_SRC); 1067 1068 for (unsigned i = 0; i < info->num_src; i++) { 1069 int swizzle = t->src[i]; 1070 1071 assert(swizzle != -1); 1072 instr.src[swizzle] = src[i]; 1073 } 1074 1075 emit_inst(c, &instr); 1076 } 1077 1078 static void 1079 trans_min_max(const struct instr_translater *t, struct etna_compile *c, 1080 const struct tgsi_full_instruction *inst, 1081 struct etna_inst_src *src) 1082 { 1083 emit_inst(c, &(struct etna_inst) { 1084 .opcode = INST_OPCODE_SELECT, 1085 .cond = t->cond, 1086 .sat = inst->Instruction.Saturate, 1087 .dst = convert_dst(c, &inst->Dst[0]), 1088 .src[0] = src[0], 1089 .src[1] = src[1], 1090 .src[2] = src[0], 1091 }); 1092 } 1093 1094 static void 1095 trans_if(const struct instr_translater *t, struct etna_compile *c, 1096 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1097 { 1098 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1099 struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f); 1100 1101 /* push IF to stack */ 1102 f->type = ETNA_COMPILE_FRAME_IF; 1103 /* create "else" label */ 1104 f->lbl_else_idx = alloc_new_label(c); 1105 f->lbl_endif_idx = -1; 1106 1107 /* We need to avoid the emit_inst() below becoming two instructions */ 1108 if (etna_src_uniforms_conflict(src[0], imm_0)) 1109 src[0] = etna_mov_src(c, src[0]); 1110 1111 /* mark position in instruction stream of label reference so that it can be 1112 * filled in in next pass */ 1113 label_mark_use(c, f->lbl_else_idx); 1114 1115 /* create conditional branch to label if src0 EQ 0 */ 1116 emit_inst(c, &(struct etna_inst){ 1117 .opcode = INST_OPCODE_BRANCH, 1118 .cond = INST_CONDITION_EQ, 1119 .src[0] = src[0], 1120 .src[1] = imm_0, 1121 /* imm is filled in later */ 1122 }); 1123 } 1124 1125 static void 1126 trans_else(const struct instr_translater *t, struct etna_compile *c, 1127 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1128 { 1129 assert(c->frame_sp > 0); 1130 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1]; 1131 assert(f->type == ETNA_COMPILE_FRAME_IF); 1132 1133 /* create "endif" label, and branch to endif label */ 1134 f->lbl_endif_idx = alloc_new_label(c); 1135 label_mark_use(c, f->lbl_endif_idx); 1136 emit_inst(c, &(struct etna_inst) { 1137 .opcode = INST_OPCODE_BRANCH, 1138 .cond = INST_CONDITION_TRUE, 1139 /* imm is filled in later */ 1140 }); 1141 1142 /* mark "else" label at this position in instruction stream */ 1143 label_place(c, &c->labels[f->lbl_else_idx]); 1144 } 1145 1146 static void 1147 trans_endif(const struct instr_translater *t, struct etna_compile *c, 1148 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1149 { 1150 assert(c->frame_sp > 0); 1151 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1152 assert(f->type == ETNA_COMPILE_FRAME_IF); 1153 1154 /* assign "endif" or "else" (if no ELSE) label to current position in 1155 * instruction stream, pop IF */ 1156 if (f->lbl_endif_idx != -1) 1157 label_place(c, &c->labels[f->lbl_endif_idx]); 1158 else 1159 label_place(c, &c->labels[f->lbl_else_idx]); 1160 } 1161 1162 static void 1163 trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c, 1164 const struct tgsi_full_instruction *inst, 1165 struct etna_inst_src *src) 1166 { 1167 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++]; 1168 1169 /* push LOOP to stack */ 1170 f->type = ETNA_COMPILE_FRAME_LOOP; 1171 f->lbl_loop_bgn_idx = alloc_new_label(c); 1172 f->lbl_loop_end_idx = alloc_new_label(c); 1173 1174 label_place(c, &c->labels[f->lbl_loop_bgn_idx]); 1175 1176 c->num_loops++; 1177 } 1178 1179 static void 1180 trans_loop_end(const struct instr_translater *t, struct etna_compile *c, 1181 const struct tgsi_full_instruction *inst, 1182 struct etna_inst_src *src) 1183 { 1184 assert(c->frame_sp > 0); 1185 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp]; 1186 assert(f->type == ETNA_COMPILE_FRAME_LOOP); 1187 1188 /* mark position in instruction stream of label reference so that it can be 1189 * filled in in next pass */ 1190 label_mark_use(c, f->lbl_loop_bgn_idx); 1191 1192 /* create branch to loop_bgn label */ 1193 emit_inst(c, &(struct etna_inst) { 1194 .opcode = INST_OPCODE_BRANCH, 1195 .cond = INST_CONDITION_TRUE, 1196 .src[0] = src[0], 1197 /* imm is filled in later */ 1198 }); 1199 1200 label_place(c, &c->labels[f->lbl_loop_end_idx]); 1201 } 1202 1203 static void 1204 trans_brk(const struct instr_translater *t, struct etna_compile *c, 1205 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1206 { 1207 assert(c->frame_sp > 0); 1208 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1209 1210 /* mark position in instruction stream of label reference so that it can be 1211 * filled in in next pass */ 1212 label_mark_use(c, f->lbl_loop_end_idx); 1213 1214 /* create branch to loop_end label */ 1215 emit_inst(c, &(struct etna_inst) { 1216 .opcode = INST_OPCODE_BRANCH, 1217 .cond = INST_CONDITION_TRUE, 1218 .src[0] = src[0], 1219 /* imm is filled in later */ 1220 }); 1221 } 1222 1223 static void 1224 trans_cont(const struct instr_translater *t, struct etna_compile *c, 1225 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1226 { 1227 assert(c->frame_sp > 0); 1228 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP); 1229 1230 /* mark position in instruction stream of label reference so that it can be 1231 * filled in in next pass */ 1232 label_mark_use(c, f->lbl_loop_bgn_idx); 1233 1234 /* create branch to loop_end label */ 1235 emit_inst(c, &(struct etna_inst) { 1236 .opcode = INST_OPCODE_BRANCH, 1237 .cond = INST_CONDITION_TRUE, 1238 .src[0] = src[0], 1239 /* imm is filled in later */ 1240 }); 1241 } 1242 1243 static void 1244 trans_deriv(const struct instr_translater *t, struct etna_compile *c, 1245 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1246 { 1247 emit_inst(c, &(struct etna_inst) { 1248 .opcode = t->opc, 1249 .sat = inst->Instruction.Saturate, 1250 .dst = convert_dst(c, &inst->Dst[0]), 1251 .src[0] = src[0], 1252 .src[2] = src[0], 1253 }); 1254 } 1255 1256 static void 1257 trans_arl(const struct instr_translater *t, struct etna_compile *c, 1258 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1259 { 1260 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1261 struct etna_inst arl = { }; 1262 struct etna_inst_dst dst; 1263 1264 dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | 1265 INST_COMPS_W); 1266 1267 if (c->specs->has_sign_floor_ceil) { 1268 struct etna_inst floor = { }; 1269 1270 floor.opcode = INST_OPCODE_FLOOR; 1271 floor.src[2] = src[0]; 1272 floor.dst = dst; 1273 1274 emit_inst(c, &floor); 1275 } else { 1276 struct etna_inst floor[2] = { }; 1277 1278 floor[0].opcode = INST_OPCODE_FRC; 1279 floor[0].sat = inst->Instruction.Saturate; 1280 floor[0].dst = dst; 1281 floor[0].src[2] = src[0]; 1282 1283 floor[1].opcode = INST_OPCODE_ADD; 1284 floor[1].sat = inst->Instruction.Saturate; 1285 floor[1].dst = dst; 1286 floor[1].src[0] = src[0]; 1287 floor[1].src[2].use = 1; 1288 floor[1].src[2].swiz = INST_SWIZ_IDENTITY; 1289 floor[1].src[2].neg = 1; 1290 floor[1].src[2].rgroup = temp.rgroup; 1291 floor[1].src[2].reg = temp.id; 1292 1293 emit_inst(c, &floor[0]); 1294 emit_inst(c, &floor[1]); 1295 } 1296 1297 arl.opcode = INST_OPCODE_MOVAR; 1298 arl.sat = inst->Instruction.Saturate; 1299 arl.dst = convert_dst(c, &inst->Dst[0]); 1300 arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1301 1302 emit_inst(c, &arl); 1303 } 1304 1305 static void 1306 trans_lrp(const struct instr_translater *t, struct etna_compile *c, 1307 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1308 { 1309 /* dst = src0 * src1 + (1 - src0) * src2 1310 * => src0 * src1 - (src0 - 1) * src2 1311 * => src0 * src1 - (src0 * src2 - src2) 1312 * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw 1313 * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw 1314 */ 1315 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1316 if (etna_src_uniforms_conflict(src[0], src[1]) || 1317 etna_src_uniforms_conflict(src[0], src[2])) { 1318 src[0] = etna_mov_src(c, src[0]); 1319 } 1320 1321 struct etna_inst mad[2] = { }; 1322 mad[0].opcode = INST_OPCODE_MAD; 1323 mad[0].sat = 0; 1324 mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1325 INST_COMPS_Z | INST_COMPS_W); 1326 mad[0].src[0] = src[0]; 1327 mad[0].src[1] = src[2]; 1328 mad[0].src[2] = negate(src[2]); 1329 mad[1].opcode = INST_OPCODE_MAD; 1330 mad[1].sat = inst->Instruction.Saturate; 1331 mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0]; 1332 mad[1].src[1] = src[1]; 1333 mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY)); 1334 1335 emit_inst(c, &mad[0]); 1336 emit_inst(c, &mad[1]); 1337 } 1338 1339 static void 1340 trans_lit(const struct instr_translater *t, struct etna_compile *c, 1341 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1342 { 1343 /* SELECT.LT tmp._y__, 0, src.yyyy, 0 1344 * - can be eliminated if src.y is a uniform and >= 0 1345 * SELECT.GT tmp.___w, 128, src.wwww, 128 1346 * SELECT.LT tmp.___w, -128, tmp.wwww, -128 1347 * - can be eliminated if src.w is a uniform and fits clamp 1348 * LOG tmp.x, void, void, tmp.yyyy 1349 * MUL tmp.x, tmp.xxxx, tmp.wwww, void 1350 * LITP dst, undef, src.xxxx, tmp.xxxx 1351 */ 1352 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c); 1353 struct etna_inst_src src_y = { }; 1354 1355 if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1356 src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)); 1357 1358 struct etna_inst ins = { }; 1359 ins.opcode = INST_OPCODE_SELECT; 1360 ins.cond = INST_CONDITION_LT; 1361 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y); 1362 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0); 1363 ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1364 emit_inst(c, &ins); 1365 } else if (uif(get_imm_u32(c, &src[0], 1)) < 0) 1366 src_y = alloc_imm_f32(c, 0.0); 1367 else 1368 src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y)); 1369 1370 struct etna_inst_src src_w = { }; 1371 1372 if (!etna_rgroup_is_uniform(src[0].rgroup)) { 1373 src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W)); 1374 1375 struct etna_inst ins = { }; 1376 ins.opcode = INST_OPCODE_SELECT; 1377 ins.cond = INST_CONDITION_GT; 1378 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W); 1379 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.); 1380 ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W)); 1381 emit_inst(c, &ins); 1382 ins.cond = INST_CONDITION_LT; 1383 ins.src[0].neg = !ins.src[0].neg; 1384 ins.src[2].neg = !ins.src[2].neg; 1385 ins.src[1] = src_w; 1386 emit_inst(c, &ins); 1387 } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.) 1388 src_w = alloc_imm_f32(c, -128.); 1389 else if (uif(get_imm_u32(c, &src[0], 3)) > 128.) 1390 src_w = alloc_imm_f32(c, 128.); 1391 else 1392 src_w = swizzle(src[0], SWIZZLE(W, W, W, W)); 1393 1394 if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */ 1395 emit_inst(c, &(struct etna_inst) { 1396 .opcode = INST_OPCODE_LOG, 1397 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y), 1398 .src[2] = src_y, 1399 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1400 }); 1401 emit_inst(c, &(struct etna_inst) { 1402 .opcode = INST_OPCODE_MUL, 1403 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1404 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1405 .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)), 1406 }); 1407 } else { 1408 struct etna_inst ins[3] = { }; 1409 ins[0].opcode = INST_OPCODE_LOG; 1410 ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X); 1411 ins[0].src[2] = src_y; 1412 1413 emit_inst(c, &ins[0]); 1414 } 1415 emit_inst(c, &(struct etna_inst) { 1416 .opcode = INST_OPCODE_MUL, 1417 .sat = 0, 1418 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X), 1419 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1420 .src[1] = src_w, 1421 }); 1422 emit_inst(c, &(struct etna_inst) { 1423 .opcode = INST_OPCODE_LITP, 1424 .sat = 0, 1425 .dst = convert_dst(c, &inst->Dst[0]), 1426 .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1427 .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)), 1428 .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)), 1429 }); 1430 } 1431 1432 static void 1433 trans_ssg(const struct instr_translater *t, struct etna_compile *c, 1434 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1435 { 1436 if (c->specs->has_sign_floor_ceil) { 1437 emit_inst(c, &(struct etna_inst){ 1438 .opcode = INST_OPCODE_SIGN, 1439 .sat = inst->Instruction.Saturate, 1440 .dst = convert_dst(c, &inst->Dst[0]), 1441 .src[2] = src[0], 1442 }); 1443 } else { 1444 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1445 struct etna_inst ins[2] = { }; 1446 1447 ins[0].opcode = INST_OPCODE_SET; 1448 ins[0].cond = INST_CONDITION_NZ; 1449 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1450 INST_COMPS_Z | INST_COMPS_W); 1451 ins[0].src[0] = src[0]; 1452 1453 ins[1].opcode = INST_OPCODE_SELECT; 1454 ins[1].cond = INST_CONDITION_LZ; 1455 ins[1].sat = inst->Instruction.Saturate; 1456 ins[1].dst = convert_dst(c, &inst->Dst[0]); 1457 ins[1].src[0] = src[0]; 1458 ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); 1459 ins[1].src[1] = negate(ins[1].src[2]); 1460 1461 emit_inst(c, &ins[0]); 1462 emit_inst(c, &ins[1]); 1463 } 1464 } 1465 1466 static void 1467 trans_trig(const struct instr_translater *t, struct etna_compile *c, 1468 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1469 { 1470 if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */ 1471 /* On newer chips alternative SIN/COS instructions are implemented, 1472 * which: 1473 * - Need their input scaled by 1/pi instead of 2/pi 1474 * - Output an x and y component, which need to be multiplied to 1475 * get the result 1476 */ 1477 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */ 1478 emit_inst(c, &(struct etna_inst) { 1479 .opcode = INST_OPCODE_MUL, 1480 .sat = 0, 1481 .dst = etna_native_to_dst(temp, INST_COMPS_Z), 1482 .src[0] = src[0], /* any swizzling happens here */ 1483 .src[1] = alloc_imm_f32(c, 1.0f / M_PI), 1484 }); 1485 emit_inst(c, &(struct etna_inst) { 1486 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1487 ? INST_OPCODE_COS 1488 : INST_OPCODE_SIN, 1489 .sat = 0, 1490 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1491 .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)), 1492 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1493 }); 1494 emit_inst(c, &(struct etna_inst) { 1495 .opcode = INST_OPCODE_MUL, 1496 .sat = inst->Instruction.Saturate, 1497 .dst = convert_dst(c, &inst->Dst[0]), 1498 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1499 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1500 }); 1501 1502 } else if (c->specs->has_sin_cos_sqrt) { 1503 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1504 /* add divide by PI/2, using a temp register. GC2000 1505 * fails with src==dst for the trig instruction. */ 1506 emit_inst(c, &(struct etna_inst) { 1507 .opcode = INST_OPCODE_MUL, 1508 .sat = 0, 1509 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1510 INST_COMPS_Z | INST_COMPS_W), 1511 .src[0] = src[0], /* any swizzling happens here */ 1512 .src[1] = alloc_imm_f32(c, 2.0f / M_PI), 1513 }); 1514 emit_inst(c, &(struct etna_inst) { 1515 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS 1516 ? INST_OPCODE_COS 1517 : INST_OPCODE_SIN, 1518 .sat = inst->Instruction.Saturate, 1519 .dst = convert_dst(c, &inst->Dst[0]), 1520 .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), 1521 }); 1522 } else { 1523 /* Implement Nick's fast sine/cosine. Taken from: 1524 * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648 1525 * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X) 1526 * MAD t.x_zw, src.xxxx, A, B 1527 * FRC t.x_z_, void, void, t.xwzw 1528 * MAD t.x_z_, t.xwzw, 2, -1 1529 * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs) 1530 * DP3 t.x_z_, t.zyww, C, void (for sin) 1531 * DP3 t.__z_, t.zyww, C, void (for scs) 1532 * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs) 1533 * DP3 t.x_z_, t.xyww, C, void (for cos) 1534 * DP3 t.x___, t.xyww, C, void (for scs) 1535 * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz 1536 * MAD dst, t.ywyw, .2225, t.xzxz 1537 */ 1538 struct etna_inst *p, ins[9] = { }; 1539 struct etna_native_reg t0 = etna_compile_get_inner_temp(c); 1540 struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY); 1541 struct etna_inst_src sincos[3], in = src[0]; 1542 sincos[0] = etna_imm_vec4f(c, sincos_const[0]); 1543 sincos[1] = etna_imm_vec4f(c, sincos_const[1]); 1544 1545 /* A uniform source will cause the inner temp limit to 1546 * be exceeded. Explicitly deal with that scenario. 1547 */ 1548 if (etna_rgroup_is_uniform(src[0].rgroup)) { 1549 struct etna_inst ins = { }; 1550 ins.opcode = INST_OPCODE_MOV; 1551 ins.dst = etna_native_to_dst(t0, INST_COMPS_X); 1552 ins.src[2] = in; 1553 emit_inst(c, &ins); 1554 in = t0s; 1555 } 1556 1557 ins[0].opcode = INST_OPCODE_MAD; 1558 ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W); 1559 ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X)); 1560 ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */ 1561 ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */ 1562 1563 ins[1].opcode = INST_OPCODE_FRC; 1564 ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1565 ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1566 1567 ins[2].opcode = INST_OPCODE_MAD; 1568 ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1569 ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W)); 1570 ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */ 1571 ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */ 1572 1573 unsigned mul_swiz, dp3_swiz; 1574 if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) { 1575 mul_swiz = SWIZZLE(W, Z, W, W); 1576 dp3_swiz = SWIZZLE(Z, Y, W, W); 1577 } else { 1578 mul_swiz = SWIZZLE(W, X, W, W); 1579 dp3_swiz = SWIZZLE(X, Y, W, W); 1580 } 1581 1582 ins[3].opcode = INST_OPCODE_MUL; 1583 ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y); 1584 ins[3].src[0] = swizzle(t0s, mul_swiz); 1585 ins[3].src[1] = absolute(ins[3].src[0]); 1586 1587 ins[4].opcode = INST_OPCODE_DP3; 1588 ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z); 1589 ins[4].src[0] = swizzle(t0s, dp3_swiz); 1590 ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W)); 1591 1592 p = &ins[5]; 1593 p->opcode = INST_OPCODE_MAD; 1594 p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W); 1595 p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z)); 1596 p->src[1] = absolute(p->src[0]); 1597 p->src[2] = negate(p->src[0]); 1598 1599 p++; 1600 p->opcode = INST_OPCODE_MAD; 1601 p->sat = inst->Instruction.Saturate; 1602 p->dst = convert_dst(c, &inst->Dst[0]), 1603 p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W)); 1604 p->src[1] = alloc_imm_f32(c, 0.2225); 1605 p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z)); 1606 1607 for (int i = 0; &ins[i] <= p; i++) 1608 emit_inst(c, &ins[i]); 1609 } 1610 } 1611 1612 static void 1613 trans_lg2(const struct instr_translater *t, struct etna_compile *c, 1614 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1615 { 1616 if (c->specs->has_new_transcendentals) { 1617 /* On newer chips alternative LOG instruction is implemented, 1618 * which outputs an x and y component, which need to be multiplied to 1619 * get the result. 1620 */ 1621 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */ 1622 emit_inst(c, &(struct etna_inst) { 1623 .opcode = INST_OPCODE_LOG, 1624 .sat = 0, 1625 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y), 1626 .src[2] = src[0], 1627 .tex = { .amode=1 }, /* Unknown bit needs to be set */ 1628 }); 1629 emit_inst(c, &(struct etna_inst) { 1630 .opcode = INST_OPCODE_MUL, 1631 .sat = inst->Instruction.Saturate, 1632 .dst = convert_dst(c, &inst->Dst[0]), 1633 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)), 1634 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)), 1635 }); 1636 } else { 1637 emit_inst(c, &(struct etna_inst) { 1638 .opcode = INST_OPCODE_LOG, 1639 .sat = inst->Instruction.Saturate, 1640 .dst = convert_dst(c, &inst->Dst[0]), 1641 .src[2] = src[0], 1642 }); 1643 } 1644 } 1645 1646 static void 1647 trans_sampler(const struct instr_translater *t, struct etna_compile *c, 1648 const struct tgsi_full_instruction *inst, 1649 struct etna_inst_src *src) 1650 { 1651 /* There is no native support for GL texture rectangle coordinates, so 1652 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */ 1653 if (inst->Texture.Texture == TGSI_TEXTURE_RECT) { 1654 uint32_t unit = inst->Src[1].Register.Index; 1655 struct etna_inst ins[2] = { }; 1656 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1657 1658 ins[0].opcode = INST_OPCODE_MUL; 1659 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X); 1660 ins[0].src[0] = src[0]; 1661 ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit); 1662 1663 ins[1].opcode = INST_OPCODE_MUL; 1664 ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y); 1665 ins[1].src[0] = src[0]; 1666 ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit); 1667 1668 emit_inst(c, &ins[0]); 1669 emit_inst(c, &ins[1]); 1670 1671 src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */ 1672 } 1673 1674 switch (inst->Instruction.Opcode) { 1675 case TGSI_OPCODE_TEX: 1676 emit_inst(c, &(struct etna_inst) { 1677 .opcode = INST_OPCODE_TEXLD, 1678 .sat = 0, 1679 .dst = convert_dst(c, &inst->Dst[0]), 1680 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1681 .src[0] = src[0], 1682 }); 1683 break; 1684 1685 case TGSI_OPCODE_TXB: 1686 emit_inst(c, &(struct etna_inst) { 1687 .opcode = INST_OPCODE_TEXLDB, 1688 .sat = 0, 1689 .dst = convert_dst(c, &inst->Dst[0]), 1690 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1691 .src[0] = src[0], 1692 }); 1693 break; 1694 1695 case TGSI_OPCODE_TXL: 1696 emit_inst(c, &(struct etna_inst) { 1697 .opcode = INST_OPCODE_TEXLDL, 1698 .sat = 0, 1699 .dst = convert_dst(c, &inst->Dst[0]), 1700 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1701 .src[0] = src[0], 1702 }); 1703 break; 1704 1705 case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */ 1706 struct etna_native_reg temp = etna_compile_get_inner_temp(c); 1707 1708 emit_inst(c, &(struct etna_inst) { 1709 .opcode = INST_OPCODE_RCP, 1710 .sat = 0, 1711 .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */ 1712 .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)), 1713 }); 1714 emit_inst(c, &(struct etna_inst) { 1715 .opcode = INST_OPCODE_MUL, 1716 .sat = 0, 1717 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | 1718 INST_COMPS_Z), /* tmp.xyz */ 1719 .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)), 1720 .src[1] = src[0], /* src.xyzw */ 1721 }); 1722 emit_inst(c, &(struct etna_inst) { 1723 .opcode = INST_OPCODE_TEXLD, 1724 .sat = 0, 1725 .dst = convert_dst(c, &inst->Dst[0]), 1726 .tex = convert_tex(c, &inst->Src[1], &inst->Texture), 1727 .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */ 1728 }); 1729 } break; 1730 1731 default: 1732 BUG("Unhandled instruction %s", 1733 tgsi_get_opcode_name(inst->Instruction.Opcode)); 1734 assert(0); 1735 break; 1736 } 1737 } 1738 1739 static void 1740 trans_dummy(const struct instr_translater *t, struct etna_compile *c, 1741 const struct tgsi_full_instruction *inst, struct etna_inst_src *src) 1742 { 1743 /* nothing to do */ 1744 } 1745 1746 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { 1747 #define INSTR(n, f, ...) \ 1748 [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__} 1749 1750 INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}), 1751 INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}), 1752 INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}), 1753 INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}), 1754 INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}), 1755 INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}), 1756 INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}), 1757 INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}), 1758 INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}), 1759 INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}), 1760 INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}), 1761 INSTR(LG2, trans_lg2), 1762 INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}), 1763 INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}), 1764 INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}), 1765 INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}), 1766 INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ), 1767 1768 INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL), 1769 INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ), 1770 1771 INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX), 1772 INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY), 1773 1774 INSTR(IF, trans_if), 1775 INSTR(ELSE, trans_else), 1776 INSTR(ENDIF, trans_endif), 1777 1778 INSTR(BGNLOOP, trans_loop_bgn), 1779 INSTR(ENDLOOP, trans_loop_end), 1780 INSTR(BRK, trans_brk), 1781 INSTR(CONT, trans_cont), 1782 1783 INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT), 1784 INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT), 1785 1786 INSTR(ARL, trans_arl), 1787 INSTR(LRP, trans_lrp), 1788 INSTR(LIT, trans_lit), 1789 INSTR(SSG, trans_ssg), 1790 1791 INSTR(SIN, trans_trig), 1792 INSTR(COS, trans_trig), 1793 1794 INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT), 1795 INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE), 1796 INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ), 1797 INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT), 1798 INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE), 1799 INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE), 1800 1801 INSTR(TEX, trans_sampler), 1802 INSTR(TXB, trans_sampler), 1803 INSTR(TXL, trans_sampler), 1804 INSTR(TXP, trans_sampler), 1805 1806 INSTR(NOP, trans_dummy), 1807 INSTR(END, trans_dummy), 1808 }; 1809 1810 /* Pass -- compile instructions */ 1811 static void 1812 etna_compile_pass_generate_code(struct etna_compile *c) 1813 { 1814 struct tgsi_parse_context ctx = { }; 1815 unsigned status = tgsi_parse_init(&ctx, c->tokens); 1816 assert(status == TGSI_PARSE_OK); 1817 1818 int inst_idx = 0; 1819 while (!tgsi_parse_end_of_tokens(&ctx)) { 1820 const struct tgsi_full_instruction *inst = 0; 1821 1822 /* No inner temps used yet for this instruction, clear counter */ 1823 c->inner_temps = 0; 1824 1825 tgsi_parse_token(&ctx); 1826 1827 switch (ctx.FullToken.Token.Type) { 1828 case TGSI_TOKEN_TYPE_INSTRUCTION: 1829 /* iterate over operands */ 1830 inst = &ctx.FullToken.FullInstruction; 1831 if (c->dead_inst[inst_idx]) { /* skip dead instructions */ 1832 inst_idx++; 1833 continue; 1834 } 1835 1836 /* Lookup the TGSI information and generate the source arguments */ 1837 struct etna_inst_src src[ETNA_NUM_SRC]; 1838 memset(src, 0, sizeof(src)); 1839 1840 const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode); 1841 1842 for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) { 1843 const struct tgsi_full_src_register *reg = &inst->Src[i]; 1844 const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native; 1845 1846 if (!n->valid || n->is_tex) 1847 continue; 1848 1849 src[i] = etna_create_src(reg, n); 1850 } 1851 1852 const unsigned opc = inst->Instruction.Opcode; 1853 const struct instr_translater *t = &translaters[opc]; 1854 1855 if (t->fxn) { 1856 t->fxn(t, c, inst, src); 1857 1858 inst_idx += 1; 1859 } else { 1860 BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc)); 1861 assert(0); 1862 } 1863 break; 1864 } 1865 } 1866 tgsi_parse_free(&ctx); 1867 } 1868 1869 /* Look up register by semantic */ 1870 static struct etna_reg_desc * 1871 find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index) 1872 { 1873 for (int idx = 0; idx < c->file[file].reg_size; ++idx) { 1874 struct etna_reg_desc *reg = &c->file[file].reg[idx]; 1875 1876 if (reg->semantic.Name == name && reg->semantic.Index == index) 1877 return reg; 1878 } 1879 1880 return NULL; /* not found */ 1881 } 1882 1883 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed: 1884 * - this is a vertex shader 1885 * - and this is an older GPU 1886 */ 1887 static void 1888 etna_compile_add_z_div_if_needed(struct etna_compile *c) 1889 { 1890 if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) { 1891 /* find position out */ 1892 struct etna_reg_desc *pos_reg = 1893 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0); 1894 1895 if (pos_reg != NULL) { 1896 /* 1897 * ADD tX.__z_, tX.zzzz, void, tX.wwww 1898 * MUL tX.__z_, tX.zzzz, 0.5, void 1899 */ 1900 emit_inst(c, &(struct etna_inst) { 1901 .opcode = INST_OPCODE_ADD, 1902 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1903 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1904 .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)), 1905 }); 1906 emit_inst(c, &(struct etna_inst) { 1907 .opcode = INST_OPCODE_MUL, 1908 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z), 1909 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)), 1910 .src[1] = alloc_imm_f32(c, 0.5f), 1911 }); 1912 } 1913 } 1914 } 1915 1916 static void 1917 etna_compile_frag_rb_swap(struct etna_compile *c) 1918 { 1919 if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) { 1920 /* find color out */ 1921 struct etna_reg_desc *color_reg = 1922 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0); 1923 1924 emit_inst(c, &(struct etna_inst) { 1925 .opcode = INST_OPCODE_MOV, 1926 .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W), 1927 .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)), 1928 }); 1929 } 1930 } 1931 1932 /** add a NOP to the shader if 1933 * a) the shader is empty 1934 * or 1935 * b) there is a label at the end of the shader 1936 */ 1937 static void 1938 etna_compile_add_nop_if_needed(struct etna_compile *c) 1939 { 1940 bool label_at_last_inst = false; 1941 1942 for (int idx = 0; idx < c->labels_count; ++idx) { 1943 if (c->labels[idx].inst_idx == c->inst_ptr) 1944 label_at_last_inst = true; 1945 1946 } 1947 1948 if (c->inst_ptr == 0 || label_at_last_inst) 1949 emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP}); 1950 } 1951 1952 static void 1953 assign_uniforms(struct etna_compile_file *file, unsigned base) 1954 { 1955 for (int idx = 0; idx < file->reg_size; ++idx) { 1956 file->reg[idx].native.valid = 1; 1957 file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0; 1958 file->reg[idx].native.id = base + idx; 1959 } 1960 } 1961 1962 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x). 1963 * CONST must be consecutive as const buffers are supposed to be consecutive, 1964 * and before IMM, as this is 1965 * more convenient because is possible for the compilation process itself to 1966 * generate extra 1967 * immediates for constants such as pi, one, zero. 1968 */ 1969 static void 1970 assign_constants_and_immediates(struct etna_compile *c) 1971 { 1972 assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0); 1973 /* immediates start after the constants */ 1974 c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4; 1975 assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4); 1976 DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base, 1977 c->imm_size); 1978 } 1979 1980 /* Assign declared samplers to native texture units */ 1981 static void 1982 assign_texture_units(struct etna_compile *c) 1983 { 1984 uint tex_base = 0; 1985 1986 if (c->info.processor == PIPE_SHADER_VERTEX) 1987 tex_base = c->specs->vertex_sampler_offset; 1988 1989 for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) { 1990 c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1; 1991 c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup 1992 c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx; 1993 } 1994 } 1995 1996 /* Additional pass to fill in branch targets. This pass should be last 1997 * as no instruction reordering or removing/addition can be done anymore 1998 * once the branch targets are computed. 1999 */ 2000 static void 2001 etna_compile_fill_in_labels(struct etna_compile *c) 2002 { 2003 for (int idx = 0; idx < c->inst_ptr; ++idx) { 2004 if (c->lbl_usage[idx] != -1) 2005 etna_assemble_set_imm(&c->code[idx * 4], 2006 c->labels[c->lbl_usage[idx]].inst_idx); 2007 } 2008 } 2009 2010 /* compare two etna_native_reg structures, return true if equal */ 2011 static bool 2012 cmp_etna_native_reg(const struct etna_native_reg to, 2013 const struct etna_native_reg from) 2014 { 2015 return to.valid == from.valid && to.is_tex == from.is_tex && 2016 to.rgroup == from.rgroup && to.id == from.id; 2017 } 2018 2019 /* go through all declarations and swap native registers *to* and *from* */ 2020 static void 2021 swap_native_registers(struct etna_compile *c, const struct etna_native_reg to, 2022 const struct etna_native_reg from) 2023 { 2024 if (cmp_etna_native_reg(from, to)) 2025 return; /* Nothing to do */ 2026 2027 for (int idx = 0; idx < c->total_decls; ++idx) { 2028 if (cmp_etna_native_reg(c->decl[idx].native, from)) { 2029 c->decl[idx].native = to; 2030 } else if (cmp_etna_native_reg(c->decl[idx].native, to)) { 2031 c->decl[idx].native = from; 2032 } 2033 } 2034 } 2035 2036 /* For PS we need to permute so that inputs are always in temporary 0..N-1. 2037 * Semantic POS is always t0. If that semantic is not used, avoid t0. 2038 */ 2039 static void 2040 permute_ps_inputs(struct etna_compile *c) 2041 { 2042 /* Special inputs: 2043 * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION 2044 * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD 2045 */ 2046 uint native_idx = 1; 2047 2048 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2049 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2050 uint input_id; 2051 assert(reg->has_semantic); 2052 2053 if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION) 2054 continue; 2055 2056 input_id = native_idx++; 2057 swap_native_registers(c, etna_native_temp(input_id), 2058 c->file[TGSI_FILE_INPUT].reg[idx].native); 2059 } 2060 2061 c->num_varyings = native_idx - 1; 2062 2063 if (native_idx > c->next_free_native) 2064 c->next_free_native = native_idx; 2065 } 2066 2067 /* fill in ps inputs into shader object */ 2068 static void 2069 fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2070 { 2071 struct etna_shader_io_file *sf = &sobj->infile; 2072 2073 sf->num_reg = 0; 2074 2075 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2076 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2077 2078 if (reg->native.id > 0) { 2079 assert(sf->num_reg < ETNA_NUM_INPUTS); 2080 sf->reg[sf->num_reg].reg = reg->native.id; 2081 sf->reg[sf->num_reg].semantic = reg->semantic; 2082 /* convert usage mask to number of components (*=wildcard) 2083 * .r (0..1) -> 1 component 2084 * .*g (2..3) -> 2 component 2085 * .**b (4..7) -> 3 components 2086 * .***a (8..15) -> 4 components 2087 */ 2088 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2089 sf->num_reg++; 2090 } 2091 } 2092 2093 assert(sf->num_reg == c->num_varyings); 2094 sobj->input_count_unk8 = 31; /* XXX what is this */ 2095 } 2096 2097 /* fill in output mapping for ps into shader object */ 2098 static void 2099 fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2100 { 2101 sobj->outfile.num_reg = 0; 2102 2103 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2104 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2105 2106 switch (reg->semantic.Name) { 2107 case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */ 2108 sobj->ps_color_out_reg = reg->native.id; 2109 break; 2110 case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */ 2111 sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */ 2112 break; 2113 default: 2114 assert(0); /* only outputs supported are COLOR and POSITION at the moment */ 2115 } 2116 } 2117 } 2118 2119 /* fill in inputs for vs into shader object */ 2120 static void 2121 fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2122 { 2123 struct etna_shader_io_file *sf = &sobj->infile; 2124 2125 sf->num_reg = 0; 2126 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) { 2127 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx]; 2128 assert(sf->num_reg < ETNA_NUM_INPUTS); 2129 2130 if (!reg->native.valid) 2131 continue; 2132 2133 /* XXX exclude inputs with special semantics such as gl_frontFacing */ 2134 sf->reg[sf->num_reg].reg = reg->native.id; 2135 sf->reg[sf->num_reg].semantic = reg->semantic; 2136 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); 2137 sf->num_reg++; 2138 } 2139 2140 sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */ 2141 } 2142 2143 /* build two-level output index [Semantic][Index] for fast linking */ 2144 static void 2145 build_output_index(struct etna_shader_variant *sobj) 2146 { 2147 int total = 0; 2148 int offset = 0; 2149 2150 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) 2151 total += sobj->output_count_per_semantic[name]; 2152 2153 sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *)); 2154 2155 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) { 2156 sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset]; 2157 offset += sobj->output_count_per_semantic[name]; 2158 } 2159 2160 for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) { 2161 sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name] 2162 [sobj->outfile.reg[idx].semantic.Index] = 2163 &sobj->outfile.reg[idx]; 2164 } 2165 } 2166 2167 /* fill in outputs for vs into shader object */ 2168 static void 2169 fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) 2170 { 2171 struct etna_shader_io_file *sf = &sobj->outfile; 2172 2173 sf->num_reg = 0; 2174 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) { 2175 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx]; 2176 assert(sf->num_reg < ETNA_NUM_INPUTS); 2177 2178 switch (reg->semantic.Name) { 2179 case TGSI_SEMANTIC_POSITION: 2180 sobj->vs_pos_out_reg = reg->native.id; 2181 break; 2182 case TGSI_SEMANTIC_PSIZE: 2183 sobj->vs_pointsize_out_reg = reg->native.id; 2184 break; 2185 default: 2186 sf->reg[sf->num_reg].reg = reg->native.id; 2187 sf->reg[sf->num_reg].semantic = reg->semantic; 2188 sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components; 2189 sf->num_reg++; 2190 sobj->output_count_per_semantic[reg->semantic.Name] = 2191 MAX2(reg->semantic.Index + 1, 2192 sobj->output_count_per_semantic[reg->semantic.Name]); 2193 } 2194 } 2195 2196 /* build two-level index for linking */ 2197 build_output_index(sobj); 2198 2199 /* fill in "mystery meat" load balancing value. This value determines how 2200 * work is scheduled between VS and PS 2201 * in the unified shader architecture. More precisely, it is determined from 2202 * the number of VS outputs, as well as chip-specific 2203 * vertex output buffer size, vertex cache size, and the number of shader 2204 * cores. 2205 * 2206 * XXX this is a conservative estimate, the "optimal" value is only known for 2207 * sure at link time because some 2208 * outputs may be unused and thus unmapped. Then again, in the general use 2209 * case with GLSL the vertex and fragment 2210 * shaders are linked already before submitting to Gallium, thus all outputs 2211 * are used. 2212 */ 2213 int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2; 2214 assert(half_out); 2215 2216 uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size - 2217 2 * half_out * c->specs->vertex_cache_size)) + 2218 9) / 2219 10; 2220 uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2; 2221 sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | 2222 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | 2223 VIVS_VS_LOAD_BALANCING_C(0x3f) | 2224 VIVS_VS_LOAD_BALANCING_D(0x0f); 2225 } 2226 2227 static bool 2228 etna_compile_check_limits(struct etna_compile *c) 2229 { 2230 int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX) 2231 ? c->specs->max_vs_uniforms 2232 : c->specs->max_ps_uniforms; 2233 /* round up number of uniforms, including immediates, in units of four */ 2234 int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4; 2235 2236 if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) { 2237 DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr, 2238 c->specs->max_instructions); 2239 return false; 2240 } 2241 2242 if (c->next_free_native > c->specs->max_registers) { 2243 DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native, 2244 c->specs->max_registers); 2245 return false; 2246 } 2247 2248 if (num_uniforms > max_uniforms) { 2249 DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms, 2250 max_uniforms); 2251 return false; 2252 } 2253 2254 if (c->num_varyings > c->specs->max_varyings) { 2255 DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings, 2256 c->specs->max_varyings); 2257 return false; 2258 } 2259 2260 if (c->imm_base > c->specs->num_constants) { 2261 DBG("Number of constants (%d) exceeds maximum %d", c->imm_base, 2262 c->specs->num_constants); 2263 } 2264 2265 return true; 2266 } 2267 2268 static void 2269 copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj) 2270 { 2271 uint32_t count = c->imm_size; 2272 struct etna_shader_uniform_info *uinfo = &sobj->uniforms; 2273 2274 uinfo->const_count = c->imm_base; 2275 uinfo->imm_count = count; 2276 uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data)); 2277 uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents)); 2278 2279 etna_set_shader_uniforms_dirty_flags(sobj); 2280 } 2281 2282 bool 2283 etna_compile_shader(struct etna_shader_variant *v) 2284 { 2285 /* Create scratch space that may be too large to fit on stack 2286 */ 2287 bool ret; 2288 struct etna_compile *c; 2289 2290 if (unlikely(!v)) 2291 return false; 2292 2293 const struct etna_specs *specs = v->shader->specs; 2294 2295 struct tgsi_lowering_config lconfig = { 2296 .lower_FLR = !specs->has_sign_floor_ceil, 2297 .lower_CEIL = !specs->has_sign_floor_ceil, 2298 .lower_POW = true, 2299 .lower_EXP = true, 2300 .lower_LOG = true, 2301 .lower_DP2 = !specs->has_halti2_instructions, 2302 .lower_TRUNC = true, 2303 }; 2304 2305 c = CALLOC_STRUCT(etna_compile); 2306 if (!c) 2307 return false; 2308 2309 memset(&c->lbl_usage, -1, sizeof(c->lbl_usage)); 2310 2311 const struct tgsi_token *tokens = v->shader->tokens; 2312 2313 c->specs = specs; 2314 c->key = &v->key; 2315 c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info); 2316 c->free_tokens = !!c->tokens; 2317 if (!c->tokens) { 2318 /* no lowering */ 2319 c->tokens = tokens; 2320 } 2321 2322 /* Build a map from gallium register to native registers for files 2323 * CONST, SAMP, IMM, OUT, IN, TEMP. 2324 * SAMP will map as-is for fragment shaders, there will be a +8 offset for 2325 * vertex shaders. 2326 */ 2327 /* Pass one -- check register file declarations and immediates */ 2328 etna_compile_parse_declarations(c); 2329 2330 etna_allocate_decls(c); 2331 2332 /* Pass two -- check usage of temporaries, inputs, outputs */ 2333 etna_compile_pass_check_usage(c); 2334 2335 assign_special_inputs(c); 2336 2337 /* Assign native temp register to TEMPs */ 2338 assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]); 2339 2340 /* optimize outputs */ 2341 etna_compile_pass_optimize_outputs(c); 2342 2343 /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE) 2344 * this is part of RGROUP_INTERNAL 2345 */ 2346 2347 /* assign inputs: last usage of input should be <= first usage of temp */ 2348 /* potential optimization case: 2349 * if single MOV TEMP[y], IN[x] before which temp y is not used, and 2350 * after which IN[x] 2351 * is not read, temp[y] can be used as input register as-is 2352 */ 2353 /* sort temporaries by first use 2354 * sort inputs by last usage 2355 * iterate over inputs, temporaries 2356 * if last usage of input <= first usage of temp: 2357 * assign input to temp 2358 * advance input, temporary pointer 2359 * else 2360 * advance temporary pointer 2361 * 2362 * potential problem: instruction with multiple inputs of which one is the 2363 * temp and the other is the input; 2364 * however, as the temp is not used before this, how would this make 2365 * sense? uninitialized temporaries have an undefined 2366 * value, so this would be ok 2367 */ 2368 assign_inouts_to_temporaries(c, TGSI_FILE_INPUT); 2369 2370 /* assign outputs: first usage of output should be >= last usage of temp */ 2371 /* potential optimization case: 2372 * if single MOV OUT[x], TEMP[y] (with full write mask, or at least 2373 * writing all components that are used in 2374 * the shader) after which temp y is no longer used temp[y] can be 2375 * used as output register as-is 2376 * 2377 * potential problem: instruction with multiple outputs of which one is the 2378 * temp and the other is the output; 2379 * however, as the temp is not used after this, how would this make 2380 * sense? could just discard the output value 2381 */ 2382 /* sort temporaries by last use 2383 * sort outputs by first usage 2384 * iterate over outputs, temporaries 2385 * if first usage of output >= last usage of temp: 2386 * assign output to temp 2387 * advance output, temporary pointer 2388 * else 2389 * advance temporary pointer 2390 */ 2391 assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT); 2392 2393 assign_constants_and_immediates(c); 2394 assign_texture_units(c); 2395 2396 /* list declarations */ 2397 for (int x = 0; x < c->total_decls; ++x) { 2398 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2399 "last_use=%i native=%i usage_mask=%x " 2400 "has_semantic=%i", 2401 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2402 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2403 c->decl[x].native.valid ? c->decl[x].native.id : -1, 2404 c->decl[x].usage_mask, c->decl[x].has_semantic); 2405 if (c->decl[x].has_semantic) 2406 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2407 tgsi_semantic_names[c->decl[x].semantic.Name], 2408 c->decl[x].semantic.Index); 2409 } 2410 /* XXX for PS we need to permute so that inputs are always in temporary 2411 * 0..N-1. 2412 * There is no "switchboard" for varyings (AFAIK!). The output color, 2413 * however, can be routed 2414 * from an arbitrary temporary. 2415 */ 2416 if (c->info.processor == PIPE_SHADER_FRAGMENT) 2417 permute_ps_inputs(c); 2418 2419 2420 /* list declarations */ 2421 for (int x = 0; x < c->total_decls; ++x) { 2422 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i " 2423 "last_use=%i native=%i usage_mask=%x " 2424 "has_semantic=%i", 2425 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx, 2426 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use, 2427 c->decl[x].native.valid ? c->decl[x].native.id : -1, 2428 c->decl[x].usage_mask, c->decl[x].has_semantic); 2429 if (c->decl[x].has_semantic) 2430 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", 2431 tgsi_semantic_names[c->decl[x].semantic.Name], 2432 c->decl[x].semantic.Index); 2433 } 2434 2435 /* pass 3: generate instructions */ 2436 etna_compile_pass_generate_code(c); 2437 etna_compile_add_z_div_if_needed(c); 2438 etna_compile_frag_rb_swap(c); 2439 etna_compile_add_nop_if_needed(c); 2440 2441 ret = etna_compile_check_limits(c); 2442 if (!ret) 2443 goto out; 2444 2445 etna_compile_fill_in_labels(c); 2446 2447 /* fill in output structure */ 2448 v->processor = c->info.processor; 2449 v->code_size = c->inst_ptr * 4; 2450 v->code = mem_dup(c->code, c->inst_ptr * 16); 2451 v->num_loops = c->num_loops; 2452 v->num_temps = c->next_free_native; 2453 v->vs_pos_out_reg = -1; 2454 v->vs_pointsize_out_reg = -1; 2455 v->ps_color_out_reg = -1; 2456 v->ps_depth_out_reg = -1; 2457 v->needs_icache = c->inst_ptr > c->specs->max_instructions; 2458 copy_uniform_state_to_shader(c, v); 2459 2460 if (c->info.processor == PIPE_SHADER_VERTEX) { 2461 fill_in_vs_inputs(v, c); 2462 fill_in_vs_outputs(v, c); 2463 } else if (c->info.processor == PIPE_SHADER_FRAGMENT) { 2464 fill_in_ps_inputs(v, c); 2465 fill_in_ps_outputs(v, c); 2466 } 2467 2468 out: 2469 if (c->free_tokens) 2470 FREE((void *)c->tokens); 2471 2472 FREE(c->labels); 2473 FREE(c); 2474 2475 return ret; 2476 } 2477 2478 extern const char *tgsi_swizzle_names[]; 2479 void 2480 etna_dump_shader(const struct etna_shader_variant *shader) 2481 { 2482 if (shader->processor == PIPE_SHADER_VERTEX) 2483 printf("VERT\n"); 2484 else 2485 printf("FRAG\n"); 2486 2487 2488 etna_disasm(shader->code, shader->code_size, PRINT_RAW); 2489 2490 printf("num loops: %i\n", shader->num_loops); 2491 printf("num temps: %i\n", shader->num_temps); 2492 printf("num const: %i\n", shader->uniforms.const_count); 2493 printf("immediates:\n"); 2494 for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) { 2495 printf(" [%i].%s = %f (0x%08x)\n", 2496 (idx + shader->uniforms.const_count) / 4, 2497 tgsi_swizzle_names[idx % 4], 2498 *((float *)&shader->uniforms.imm_data[idx]), 2499 shader->uniforms.imm_data[idx]); 2500 } 2501 printf("inputs:\n"); 2502 for (int idx = 0; idx < shader->infile.num_reg; ++idx) { 2503 printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg, 2504 tgsi_semantic_names[shader->infile.reg[idx].semantic.Name], 2505 shader->infile.reg[idx].semantic.Index, 2506 shader->infile.reg[idx].num_components); 2507 } 2508 printf("outputs:\n"); 2509 for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { 2510 printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg, 2511 tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name], 2512 shader->outfile.reg[idx].semantic.Index, 2513 shader->outfile.reg[idx].num_components); 2514 } 2515 printf("special:\n"); 2516 if (shader->processor == PIPE_SHADER_VERTEX) { 2517 printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); 2518 printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); 2519 printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); 2520 } else { 2521 printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg); 2522 printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg); 2523 } 2524 printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8); 2525 } 2526 2527 void 2528 etna_destroy_shader(struct etna_shader_variant *shader) 2529 { 2530 assert(shader); 2531 2532 FREE(shader->code); 2533 FREE(shader->uniforms.imm_data); 2534 FREE(shader->uniforms.imm_contents); 2535 FREE(shader->output_per_semantic_list); 2536 FREE(shader); 2537 } 2538 2539 static const struct etna_shader_inout * 2540 etna_shader_vs_lookup(const struct etna_shader_variant *sobj, 2541 const struct etna_shader_inout *in) 2542 { 2543 if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name]) 2544 return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index]; 2545 2546 return NULL; 2547 } 2548 2549 bool 2550 etna_link_shader(struct etna_shader_link_info *info, 2551 const struct etna_shader_variant *vs, const struct etna_shader_variant *fs) 2552 { 2553 int comp_ofs = 0; 2554 /* For each fragment input we need to find the associated vertex shader 2555 * output, which can be found by matching on semantic name and index. A 2556 * binary search could be used because the vs outputs are sorted by their 2557 * semantic index and grouped by semantic type by fill_in_vs_outputs. 2558 */ 2559 assert(fs->infile.num_reg < ETNA_NUM_INPUTS); 2560 info->pcoord_varying_comp_ofs = -1; 2561 2562 for (int idx = 0; idx < fs->infile.num_reg; ++idx) { 2563 const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; 2564 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); 2565 struct etna_varying *varying; 2566 bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR; 2567 2568 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); 2569 2570 if (fsio->reg > info->num_varyings) 2571 info->num_varyings = fsio->reg; 2572 2573 varying = &info->varyings[fsio->reg - 1]; 2574 varying->num_components = fsio->num_components; 2575 2576 if (!interpolate_always) /* colors affected by flat shading */ 2577 varying->pa_attributes = 0x200; 2578 else /* texture coord or other bypasses flat shading */ 2579 varying->pa_attributes = 0x2f1; 2580 2581 varying->use[0] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_X : VARYING_COMPONENT_USE_USED; 2582 varying->use[1] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_Y : VARYING_COMPONENT_USE_USED; 2583 varying->use[2] = VARYING_COMPONENT_USE_USED; 2584 varying->use[3] = VARYING_COMPONENT_USE_USED; 2585 2586 2587 /* point coord is an input to the PS without matching VS output, 2588 * so it gets a varying slot without being assigned a VS register. 2589 */ 2590 if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) { 2591 info->pcoord_varying_comp_ofs = comp_ofs; 2592 } else { 2593 if (vsio == NULL) { /* not found -- link error */ 2594 BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index); 2595 return true; 2596 } 2597 2598 varying->reg = vsio->reg; 2599 } 2600 2601 comp_ofs += varying->num_components; 2602 } 2603 2604 assert(info->num_varyings == fs->infile.num_reg); 2605 2606 return false; 2607 } 2608