1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ 2 3 /* 4 * Copyright (C) 2012 Rob Clark <robclark (at) freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Rob Clark <robclark (at) freedesktop.org> 27 */ 28 29 #include "pipe/p_state.h" 30 #include "util/u_string.h" 31 #include "util/u_memory.h" 32 #include "util/u_inlines.h" 33 #include "tgsi/tgsi_parse.h" 34 #include "tgsi/tgsi_ureg.h" 35 #include "tgsi/tgsi_info.h" 36 #include "tgsi/tgsi_strings.h" 37 #include "tgsi/tgsi_dump.h" 38 39 #include "fd2_compiler.h" 40 #include "fd2_program.h" 41 #include "fd2_util.h" 42 43 #include "instr-a2xx.h" 44 #include "ir-a2xx.h" 45 46 struct fd2_compile_context { 47 struct fd_program_stateobj *prog; 48 struct fd2_shader_stateobj *so; 49 50 struct tgsi_parse_context parser; 51 unsigned type; 52 53 /* predicate stack: */ 54 int pred_depth; 55 enum ir2_pred pred_stack[8]; 56 57 /* Internal-Temporary and Predicate register assignment: 58 * 59 * Some TGSI instructions which translate into multiple actual 60 * instructions need one or more temporary registers, which are not 61 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY). 62 * And some instructions (texture fetch) cannot write directly to 63 * output registers. We could be more clever and re-use dst or a 64 * src register in some cases. But for now don't try to be clever. 65 * Eventually we should implement an optimization pass that re- 66 * juggles the register usage and gets rid of unneeded temporaries. 67 * 68 * The predicate register must be valid across multiple TGSI 69 * instructions, but internal temporary's do not. For this reason, 70 * once the predicate register is requested, until it is no longer 71 * needed, it gets the first register slot after after the TGSI 72 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the 73 * internal temporaries get the register slots above this. 74 */ 75 76 int pred_reg; 77 int num_internal_temps; 78 79 uint8_t num_regs[TGSI_FILE_COUNT]; 80 81 /* maps input register idx to prog->export_linkage idx: */ 82 uint8_t input_export_idx[64]; 83 84 /* maps output register idx to prog->export_linkage idx: */ 85 uint8_t output_export_idx[64]; 86 87 /* idx/slot for last compiler generated immediate */ 88 unsigned immediate_idx; 89 90 // TODO we can skip emit exports in the VS that the FS doesn't need.. 91 // and get rid perhaps of num_param.. 92 unsigned num_position, num_param; 93 unsigned position, psize; 94 95 uint64_t need_sync; 96 97 /* current exec CF instruction */ 98 struct ir2_cf *cf; 99 }; 100 101 static int 102 semantic_idx(struct tgsi_declaration_semantic *semantic) 103 { 104 int idx = semantic->Name; 105 if (idx == TGSI_SEMANTIC_GENERIC) 106 idx = TGSI_SEMANTIC_COUNT + semantic->Index; 107 return idx; 108 } 109 110 /* assign/get the input/export register # for given semantic idx as 111 * returned by semantic_idx(): 112 */ 113 static int 114 export_linkage(struct fd2_compile_context *ctx, int idx) 115 { 116 struct fd_program_stateobj *prog = ctx->prog; 117 118 /* if first time we've seen this export, assign the next available slot: */ 119 if (prog->export_linkage[idx] == 0xff) 120 prog->export_linkage[idx] = prog->num_exports++; 121 122 return prog->export_linkage[idx]; 123 } 124 125 static unsigned 126 compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog, 127 struct fd2_shader_stateobj *so) 128 { 129 unsigned ret; 130 131 ctx->prog = prog; 132 ctx->so = so; 133 ctx->cf = NULL; 134 ctx->pred_depth = 0; 135 136 ret = tgsi_parse_init(&ctx->parser, so->tokens); 137 if (ret != TGSI_PARSE_OK) 138 return ret; 139 140 ctx->type = ctx->parser.FullHeader.Processor.Processor; 141 ctx->position = ~0; 142 ctx->psize = ~0; 143 ctx->num_position = 0; 144 ctx->num_param = 0; 145 ctx->need_sync = 0; 146 ctx->immediate_idx = 0; 147 ctx->pred_reg = -1; 148 ctx->num_internal_temps = 0; 149 150 memset(ctx->num_regs, 0, sizeof(ctx->num_regs)); 151 memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx)); 152 memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx)); 153 154 /* do first pass to extract declarations: */ 155 while (!tgsi_parse_end_of_tokens(&ctx->parser)) { 156 tgsi_parse_token(&ctx->parser); 157 158 switch (ctx->parser.FullToken.Token.Type) { 159 case TGSI_TOKEN_TYPE_DECLARATION: { 160 struct tgsi_full_declaration *decl = 161 &ctx->parser.FullToken.FullDeclaration; 162 if (decl->Declaration.File == TGSI_FILE_OUTPUT) { 163 unsigned name = decl->Semantic.Name; 164 165 assert(decl->Declaration.Semantic); // TODO is this ever not true? 166 167 ctx->output_export_idx[decl->Range.First] = 168 semantic_idx(&decl->Semantic); 169 170 if (ctx->type == PIPE_SHADER_VERTEX) { 171 switch (name) { 172 case TGSI_SEMANTIC_POSITION: 173 ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT]; 174 ctx->num_position++; 175 break; 176 case TGSI_SEMANTIC_PSIZE: 177 ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT]; 178 ctx->num_position++; 179 break; 180 case TGSI_SEMANTIC_COLOR: 181 case TGSI_SEMANTIC_GENERIC: 182 ctx->num_param++; 183 break; 184 default: 185 DBG("unknown VS semantic name: %s", 186 tgsi_semantic_names[name]); 187 assert(0); 188 } 189 } else { 190 switch (name) { 191 case TGSI_SEMANTIC_COLOR: 192 case TGSI_SEMANTIC_GENERIC: 193 ctx->num_param++; 194 break; 195 default: 196 DBG("unknown PS semantic name: %s", 197 tgsi_semantic_names[name]); 198 assert(0); 199 } 200 } 201 } else if (decl->Declaration.File == TGSI_FILE_INPUT) { 202 ctx->input_export_idx[decl->Range.First] = 203 semantic_idx(&decl->Semantic); 204 } 205 ctx->num_regs[decl->Declaration.File] = 206 MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1); 207 break; 208 } 209 case TGSI_TOKEN_TYPE_IMMEDIATE: { 210 struct tgsi_full_immediate *imm = 211 &ctx->parser.FullToken.FullImmediate; 212 unsigned n = ctx->so->num_immediates++; 213 memcpy(ctx->so->immediates[n].val, imm->u, 16); 214 break; 215 } 216 default: 217 break; 218 } 219 } 220 221 /* TGSI generated immediates are always entire vec4's, ones we 222 * generate internally are not: 223 */ 224 ctx->immediate_idx = ctx->so->num_immediates * 4; 225 226 ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT]; 227 228 tgsi_parse_free(&ctx->parser); 229 230 return tgsi_parse_init(&ctx->parser, so->tokens); 231 } 232 233 static void 234 compile_free(struct fd2_compile_context *ctx) 235 { 236 tgsi_parse_free(&ctx->parser); 237 } 238 239 static struct ir2_cf * 240 next_exec_cf(struct fd2_compile_context *ctx) 241 { 242 struct ir2_cf *cf = ctx->cf; 243 if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs)) 244 ctx->cf = cf = ir2_cf_create(ctx->so->ir, EXEC); 245 return cf; 246 } 247 248 static void 249 compile_vtx_fetch(struct fd2_compile_context *ctx) 250 { 251 struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs; 252 int i; 253 for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) { 254 struct ir2_instruction *instr = ir2_instr_create( 255 next_exec_cf(ctx), IR2_FETCH); 256 instr->fetch.opc = VTX_FETCH; 257 258 ctx->need_sync |= 1 << (i+1); 259 260 ir2_reg_create(instr, i+1, "xyzw", 0); 261 ir2_reg_create(instr, 0, "x", 0); 262 263 if (i == 0) 264 instr->sync = true; 265 266 vfetch_instrs[i] = instr; 267 } 268 ctx->so->num_vfetch_instrs = i; 269 ctx->cf = NULL; 270 } 271 272 /* 273 * For vertex shaders (VS): 274 * --- ------ ------------- 275 * 276 * Inputs: R1-R(num_input) 277 * Constants: C0-C(num_const-1) 278 * Immediates: C(num_const)-C(num_const+num_imm-1) 279 * Outputs: export0-export(n) and export62, export63 280 * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63) 281 * Temps: R(num_input+1)-R(num_input+num_temps) 282 * 283 * R0 could be clobbered after the vertex fetch instructions.. so we 284 * could use it for one of the temporaries. 285 * 286 * TODO: maybe the vertex fetch part could fetch first input into R0 as 287 * the last vtx fetch instruction, which would let us use the same 288 * register layout in either case.. although this is not what the blob 289 * compiler does. 290 * 291 * 292 * For frag shaders (PS): 293 * --- ---- ------------- 294 * 295 * Inputs: R0-R(num_input-1) 296 * Constants: same as VS 297 * Immediates: same as VS 298 * Outputs: export0-export(num_outputs) 299 * Temps: R(num_input)-R(num_input+num_temps-1) 300 * 301 * In either case, immediates are are postpended to the constants 302 * (uniforms). 303 * 304 */ 305 306 static unsigned 307 get_temp_gpr(struct fd2_compile_context *ctx, int idx) 308 { 309 unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT]; 310 if (ctx->type == PIPE_SHADER_VERTEX) 311 num++; 312 return num; 313 } 314 315 static struct ir2_register * 316 add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, 317 const struct tgsi_dst_register *dst) 318 { 319 unsigned flags = 0, num = 0; 320 char swiz[5]; 321 322 switch (dst->File) { 323 case TGSI_FILE_OUTPUT: 324 flags |= IR2_REG_EXPORT; 325 if (ctx->type == PIPE_SHADER_VERTEX) { 326 if (dst->Index == ctx->position) { 327 num = 62; 328 } else if (dst->Index == ctx->psize) { 329 num = 63; 330 } else { 331 num = export_linkage(ctx, 332 ctx->output_export_idx[dst->Index]); 333 } 334 } else { 335 num = dst->Index; 336 } 337 break; 338 case TGSI_FILE_TEMPORARY: 339 num = get_temp_gpr(ctx, dst->Index); 340 break; 341 default: 342 DBG("unsupported dst register file: %s", 343 tgsi_file_name(dst->File)); 344 assert(0); 345 break; 346 } 347 348 swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_'; 349 swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_'; 350 swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_'; 351 swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_'; 352 swiz[4] = '\0'; 353 354 return ir2_reg_create(alu, num, swiz, flags); 355 } 356 357 static struct ir2_register * 358 add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, 359 const struct tgsi_src_register *src) 360 { 361 static const char swiz_vals[] = { 362 'x', 'y', 'z', 'w', 363 }; 364 char swiz[5]; 365 unsigned flags = 0, num = 0; 366 367 switch (src->File) { 368 case TGSI_FILE_CONSTANT: 369 num = src->Index; 370 flags |= IR2_REG_CONST; 371 break; 372 case TGSI_FILE_INPUT: 373 if (ctx->type == PIPE_SHADER_VERTEX) { 374 num = src->Index + 1; 375 } else { 376 num = export_linkage(ctx, 377 ctx->input_export_idx[src->Index]); 378 } 379 break; 380 case TGSI_FILE_TEMPORARY: 381 num = get_temp_gpr(ctx, src->Index); 382 break; 383 case TGSI_FILE_IMMEDIATE: 384 num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT]; 385 flags |= IR2_REG_CONST; 386 break; 387 default: 388 DBG("unsupported src register file: %s", 389 tgsi_file_name(src->File)); 390 assert(0); 391 break; 392 } 393 394 if (src->Absolute) 395 flags |= IR2_REG_ABS; 396 if (src->Negate) 397 flags |= IR2_REG_NEGATE; 398 399 swiz[0] = swiz_vals[src->SwizzleX]; 400 swiz[1] = swiz_vals[src->SwizzleY]; 401 swiz[2] = swiz_vals[src->SwizzleZ]; 402 swiz[3] = swiz_vals[src->SwizzleW]; 403 swiz[4] = '\0'; 404 405 if ((ctx->need_sync & ((uint64_t)1 << num)) && 406 !(flags & IR2_REG_CONST)) { 407 alu->sync = true; 408 ctx->need_sync &= ~((uint64_t)1 << num); 409 } 410 411 return ir2_reg_create(alu, num, swiz, flags); 412 } 413 414 static void 415 add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) 416 { 417 if (inst->Instruction.Saturate) { 418 alu->alu.vector_clamp = true; 419 } 420 } 421 422 static void 423 add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) 424 { 425 if (inst->Instruction.Saturate) { 426 alu->alu.scalar_clamp = true; 427 } 428 } 429 430 static void 431 add_regs_vector_1(struct fd2_compile_context *ctx, 432 struct tgsi_full_instruction *inst, struct ir2_instruction *alu) 433 { 434 assert(inst->Instruction.NumSrcRegs == 1); 435 assert(inst->Instruction.NumDstRegs == 1); 436 437 add_dst_reg(ctx, alu, &inst->Dst[0].Register); 438 add_src_reg(ctx, alu, &inst->Src[0].Register); 439 add_src_reg(ctx, alu, &inst->Src[0].Register); 440 add_vector_clamp(inst, alu); 441 } 442 443 static void 444 add_regs_vector_2(struct fd2_compile_context *ctx, 445 struct tgsi_full_instruction *inst, struct ir2_instruction *alu) 446 { 447 assert(inst->Instruction.NumSrcRegs == 2); 448 assert(inst->Instruction.NumDstRegs == 1); 449 450 add_dst_reg(ctx, alu, &inst->Dst[0].Register); 451 add_src_reg(ctx, alu, &inst->Src[0].Register); 452 add_src_reg(ctx, alu, &inst->Src[1].Register); 453 add_vector_clamp(inst, alu); 454 } 455 456 static void 457 add_regs_vector_3(struct fd2_compile_context *ctx, 458 struct tgsi_full_instruction *inst, struct ir2_instruction *alu) 459 { 460 assert(inst->Instruction.NumSrcRegs == 3); 461 assert(inst->Instruction.NumDstRegs == 1); 462 463 add_dst_reg(ctx, alu, &inst->Dst[0].Register); 464 /* maybe should re-arrange the syntax some day, but 465 * in assembler/disassembler and what ir.c expects 466 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 467 */ 468 add_src_reg(ctx, alu, &inst->Src[2].Register); 469 add_src_reg(ctx, alu, &inst->Src[0].Register); 470 add_src_reg(ctx, alu, &inst->Src[1].Register); 471 add_vector_clamp(inst, alu); 472 } 473 474 static void 475 add_regs_dummy_vector(struct ir2_instruction *alu) 476 { 477 /* create dummy, non-written vector dst/src regs 478 * for unused vector instr slot: 479 */ 480 ir2_reg_create(alu, 0, "____", 0); /* vector dst */ 481 ir2_reg_create(alu, 0, NULL, 0); /* vector src1 */ 482 ir2_reg_create(alu, 0, NULL, 0); /* vector src2 */ 483 } 484 485 static void 486 add_regs_scalar_1(struct fd2_compile_context *ctx, 487 struct tgsi_full_instruction *inst, struct ir2_instruction *alu) 488 { 489 assert(inst->Instruction.NumSrcRegs == 1); 490 assert(inst->Instruction.NumDstRegs == 1); 491 492 add_regs_dummy_vector(alu); 493 494 add_dst_reg(ctx, alu, &inst->Dst[0].Register); 495 add_src_reg(ctx, alu, &inst->Src[0].Register); 496 add_scalar_clamp(inst, alu); 497 } 498 499 /* 500 * Helpers for TGSI instructions that don't map to a single shader instr: 501 */ 502 503 static void 504 src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) 505 { 506 src->File = dst->File; 507 src->Indirect = dst->Indirect; 508 src->Dimension = dst->Dimension; 509 src->Index = dst->Index; 510 src->Absolute = 0; 511 src->Negate = 0; 512 src->SwizzleX = TGSI_SWIZZLE_X; 513 src->SwizzleY = TGSI_SWIZZLE_Y; 514 src->SwizzleZ = TGSI_SWIZZLE_Z; 515 src->SwizzleW = TGSI_SWIZZLE_W; 516 } 517 518 /* Get internal-temp src/dst to use for a sequence of instructions 519 * generated by a single TGSI op. 520 */ 521 static void 522 get_internal_temp(struct fd2_compile_context *ctx, 523 struct tgsi_dst_register *tmp_dst, 524 struct tgsi_src_register *tmp_src) 525 { 526 int n; 527 528 tmp_dst->File = TGSI_FILE_TEMPORARY; 529 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; 530 tmp_dst->Indirect = 0; 531 tmp_dst->Dimension = 0; 532 533 /* assign next temporary: */ 534 n = ctx->num_internal_temps++; 535 if (ctx->pred_reg != -1) 536 n++; 537 538 tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n; 539 540 src_from_dst(tmp_src, tmp_dst); 541 } 542 543 static void 544 get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst, 545 struct tgsi_src_register *src) 546 { 547 assert(ctx->pred_reg != -1); 548 549 dst->File = TGSI_FILE_TEMPORARY; 550 dst->WriteMask = TGSI_WRITEMASK_W; 551 dst->Indirect = 0; 552 dst->Dimension = 0; 553 dst->Index = get_temp_gpr(ctx, ctx->pred_reg); 554 555 if (src) { 556 src_from_dst(src, dst); 557 src->SwizzleX = TGSI_SWIZZLE_W; 558 src->SwizzleY = TGSI_SWIZZLE_W; 559 src->SwizzleZ = TGSI_SWIZZLE_W; 560 src->SwizzleW = TGSI_SWIZZLE_W; 561 } 562 } 563 564 static void 565 push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src) 566 { 567 struct ir2_instruction *alu; 568 struct tgsi_dst_register pred_dst; 569 570 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by 571 * themselves: 572 */ 573 ctx->cf = NULL; 574 575 if (ctx->pred_depth == 0) { 576 /* assign predicate register: */ 577 ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY]; 578 579 get_predicate(ctx, &pred_dst, NULL); 580 581 alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs); 582 add_regs_dummy_vector(alu); 583 add_dst_reg(ctx, alu, &pred_dst); 584 add_src_reg(ctx, alu, src); 585 } else { 586 struct tgsi_src_register pred_src; 587 588 get_predicate(ctx, &pred_dst, &pred_src); 589 590 alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); 591 add_dst_reg(ctx, alu, &pred_dst); 592 add_src_reg(ctx, alu, &pred_src); 593 add_src_reg(ctx, alu, src); 594 595 // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make 596 // sure src reg is valid if it was calculated with a predicate 597 // condition.. 598 alu->pred = IR2_PRED_NONE; 599 } 600 601 /* save previous pred state to restore in pop_predicate(): */ 602 ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred; 603 604 ctx->cf = NULL; 605 } 606 607 static void 608 pop_predicate(struct fd2_compile_context *ctx) 609 { 610 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by 611 * themselves: 612 */ 613 ctx->cf = NULL; 614 615 /* restore previous predicate state: */ 616 ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth]; 617 618 if (ctx->pred_depth != 0) { 619 struct ir2_instruction *alu; 620 struct tgsi_dst_register pred_dst; 621 struct tgsi_src_register pred_src; 622 623 get_predicate(ctx, &pred_dst, &pred_src); 624 625 alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs); 626 add_regs_dummy_vector(alu); 627 add_dst_reg(ctx, alu, &pred_dst); 628 add_src_reg(ctx, alu, &pred_src); 629 alu->pred = IR2_PRED_NONE; 630 } else { 631 /* predicate register no longer needed: */ 632 ctx->pred_reg = -1; 633 } 634 635 ctx->cf = NULL; 636 } 637 638 static void 639 get_immediate(struct fd2_compile_context *ctx, 640 struct tgsi_src_register *reg, uint32_t val) 641 { 642 unsigned neg, swiz, idx, i; 643 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ 644 static const unsigned swiz2tgsi[] = { 645 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 646 }; 647 648 for (i = 0; i < ctx->immediate_idx; i++) { 649 swiz = i % 4; 650 idx = i / 4; 651 652 if (ctx->so->immediates[idx].val[swiz] == val) { 653 neg = 0; 654 break; 655 } 656 657 if (ctx->so->immediates[idx].val[swiz] == -val) { 658 neg = 1; 659 break; 660 } 661 } 662 663 if (i == ctx->immediate_idx) { 664 /* need to generate a new immediate: */ 665 swiz = i % 4; 666 idx = i / 4; 667 neg = 0; 668 ctx->so->immediates[idx].val[swiz] = val; 669 ctx->so->num_immediates = idx + 1; 670 ctx->immediate_idx++; 671 } 672 673 reg->File = TGSI_FILE_IMMEDIATE; 674 reg->Indirect = 0; 675 reg->Dimension = 0; 676 reg->Index = idx; 677 reg->Absolute = 0; 678 reg->Negate = neg; 679 reg->SwizzleX = swiz2tgsi[swiz]; 680 reg->SwizzleY = swiz2tgsi[swiz]; 681 reg->SwizzleZ = swiz2tgsi[swiz]; 682 reg->SwizzleW = swiz2tgsi[swiz]; 683 } 684 685 /* POW(a,b) = EXP2(b * LOG2(a)) */ 686 static void 687 translate_pow(struct fd2_compile_context *ctx, 688 struct tgsi_full_instruction *inst) 689 { 690 struct tgsi_dst_register tmp_dst; 691 struct tgsi_src_register tmp_src; 692 struct ir2_instruction *alu; 693 694 get_internal_temp(ctx, &tmp_dst, &tmp_src); 695 696 alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); 697 add_regs_dummy_vector(alu); 698 add_dst_reg(ctx, alu, &tmp_dst); 699 add_src_reg(ctx, alu, &inst->Src[0].Register); 700 701 alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); 702 add_dst_reg(ctx, alu, &tmp_dst); 703 add_src_reg(ctx, alu, &tmp_src); 704 add_src_reg(ctx, alu, &inst->Src[1].Register); 705 706 /* NOTE: some of the instructions, like EXP_IEEE, seem hard- 707 * coded to take their input from the w component. 708 */ 709 switch(inst->Dst[0].Register.WriteMask) { 710 case TGSI_WRITEMASK_X: 711 tmp_src.SwizzleW = TGSI_SWIZZLE_X; 712 break; 713 case TGSI_WRITEMASK_Y: 714 tmp_src.SwizzleW = TGSI_SWIZZLE_Y; 715 break; 716 case TGSI_WRITEMASK_Z: 717 tmp_src.SwizzleW = TGSI_SWIZZLE_Z; 718 break; 719 case TGSI_WRITEMASK_W: 720 tmp_src.SwizzleW = TGSI_SWIZZLE_W; 721 break; 722 default: 723 DBG("invalid writemask!"); 724 assert(0); 725 break; 726 } 727 728 alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE); 729 add_regs_dummy_vector(alu); 730 add_dst_reg(ctx, alu, &inst->Dst[0].Register); 731 add_src_reg(ctx, alu, &tmp_src); 732 add_scalar_clamp(inst, alu); 733 } 734 735 static void 736 translate_tex(struct fd2_compile_context *ctx, 737 struct tgsi_full_instruction *inst, unsigned opc) 738 { 739 struct ir2_instruction *instr; 740 struct ir2_register *reg; 741 struct tgsi_dst_register tmp_dst; 742 struct tgsi_src_register tmp_src; 743 const struct tgsi_src_register *coord; 744 bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) || 745 inst->Instruction.Saturate; 746 int idx; 747 748 if (using_temp || (opc == TGSI_OPCODE_TXP)) 749 get_internal_temp(ctx, &tmp_dst, &tmp_src); 750 751 if (opc == TGSI_OPCODE_TXP) { 752 static const char *swiz[] = { 753 [TGSI_SWIZZLE_X] = "xxxx", 754 [TGSI_SWIZZLE_Y] = "yyyy", 755 [TGSI_SWIZZLE_Z] = "zzzz", 756 [TGSI_SWIZZLE_W] = "wwww", 757 }; 758 759 /* TXP - Projective Texture Lookup: 760 * 761 * coord.x = src0.x / src.w 762 * coord.y = src0.y / src.w 763 * coord.z = src0.z / src.w 764 * coord.w = src0.w 765 * bias = 0.0 766 * 767 * dst = texture_sample(unit, coord, bias) 768 */ 769 instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE); 770 771 /* MAXv: */ 772 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w"; 773 add_src_reg(ctx, instr, &inst->Src[0].Register); 774 add_src_reg(ctx, instr, &inst->Src[0].Register); 775 776 /* RECIP_IEEE: */ 777 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; 778 add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = 779 swiz[inst->Src[0].Register.SwizzleW]; 780 781 instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); 782 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; 783 add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx"; 784 add_src_reg(ctx, instr, &inst->Src[0].Register); 785 786 coord = &tmp_src; 787 } else { 788 coord = &inst->Src[0].Register; 789 } 790 791 instr = ir2_instr_create(next_exec_cf(ctx), IR2_FETCH); 792 instr->fetch.opc = TEX_FETCH; 793 instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D); 794 assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases? 795 796 /* save off the tex fetch to be patched later with correct const_idx: */ 797 idx = ctx->so->num_tfetch_instrs++; 798 ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index; 799 ctx->so->tfetch_instrs[idx].instr = instr; 800 801 add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register); 802 reg = add_src_reg(ctx, instr, coord); 803 804 /* blob compiler always sets 3rd component to same as 1st for 2d: */ 805 if (inst->Texture.Texture == TGSI_TEXTURE_2D) 806 reg->swizzle[2] = reg->swizzle[0]; 807 808 /* dst register needs to be marked for sync: */ 809 ctx->need_sync |= 1 << instr->regs[0]->num; 810 811 /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */ 812 instr->sync = true; 813 814 if (using_temp) { 815 /* texture fetch can't write directly to export, so if tgsi 816 * is telling us the dst register is in output file, we load 817 * the texture to a temp and the use ALU instruction to move 818 * to output 819 */ 820 instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, ~0); 821 822 add_dst_reg(ctx, instr, &inst->Dst[0].Register); 823 add_src_reg(ctx, instr, &tmp_src); 824 add_src_reg(ctx, instr, &tmp_src); 825 add_vector_clamp(inst, instr); 826 } 827 } 828 829 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */ 830 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */ 831 static void 832 translate_sge_slt(struct fd2_compile_context *ctx, 833 struct tgsi_full_instruction *inst, unsigned opc) 834 { 835 struct ir2_instruction *instr; 836 struct tgsi_dst_register tmp_dst; 837 struct tgsi_src_register tmp_src; 838 struct tgsi_src_register tmp_const; 839 float c0, c1; 840 841 switch (opc) { 842 default: 843 assert(0); 844 case TGSI_OPCODE_SGE: 845 c0 = 1.0; 846 c1 = 0.0; 847 break; 848 case TGSI_OPCODE_SLT: 849 c0 = 0.0; 850 c1 = 1.0; 851 break; 852 } 853 854 get_internal_temp(ctx, &tmp_dst, &tmp_src); 855 856 instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); 857 add_dst_reg(ctx, instr, &tmp_dst); 858 add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; 859 add_src_reg(ctx, instr, &inst->Src[1].Register); 860 861 instr = ir2_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0); 862 add_dst_reg(ctx, instr, &inst->Dst[0].Register); 863 /* maybe should re-arrange the syntax some day, but 864 * in assembler/disassembler and what ir.c expects 865 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 866 */ 867 get_immediate(ctx, &tmp_const, fui(c0)); 868 add_src_reg(ctx, instr, &tmp_const); 869 add_src_reg(ctx, instr, &tmp_src); 870 get_immediate(ctx, &tmp_const, fui(c1)); 871 add_src_reg(ctx, instr, &tmp_const); 872 } 873 874 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ 875 static void 876 translate_lrp(struct fd2_compile_context *ctx, 877 struct tgsi_full_instruction *inst, 878 unsigned opc) 879 { 880 struct ir2_instruction *instr; 881 struct tgsi_dst_register tmp_dst1, tmp_dst2; 882 struct tgsi_src_register tmp_src1, tmp_src2; 883 struct tgsi_src_register tmp_const; 884 885 get_internal_temp(ctx, &tmp_dst1, &tmp_src1); 886 get_internal_temp(ctx, &tmp_dst2, &tmp_src2); 887 888 get_immediate(ctx, &tmp_const, fui(1.0)); 889 890 /* tmp1 = (a * b) */ 891 instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); 892 add_dst_reg(ctx, instr, &tmp_dst1); 893 add_src_reg(ctx, instr, &inst->Src[0].Register); 894 add_src_reg(ctx, instr, &inst->Src[1].Register); 895 896 /* tmp2 = (1 - a) */ 897 instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); 898 add_dst_reg(ctx, instr, &tmp_dst2); 899 add_src_reg(ctx, instr, &tmp_const); 900 add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; 901 902 /* tmp2 = tmp2 * c */ 903 instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); 904 add_dst_reg(ctx, instr, &tmp_dst2); 905 add_src_reg(ctx, instr, &tmp_src2); 906 add_src_reg(ctx, instr, &inst->Src[2].Register); 907 908 /* dst = tmp1 + tmp2 */ 909 instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); 910 add_dst_reg(ctx, instr, &inst->Dst[0].Register); 911 add_src_reg(ctx, instr, &tmp_src1); 912 add_src_reg(ctx, instr, &tmp_src2); 913 } 914 915 static void 916 translate_trig(struct fd2_compile_context *ctx, 917 struct tgsi_full_instruction *inst, 918 unsigned opc) 919 { 920 struct ir2_instruction *instr; 921 struct tgsi_dst_register tmp_dst; 922 struct tgsi_src_register tmp_src; 923 struct tgsi_src_register tmp_const; 924 instr_scalar_opc_t op; 925 926 switch (opc) { 927 default: 928 assert(0); 929 case TGSI_OPCODE_SIN: 930 op = SIN; 931 break; 932 case TGSI_OPCODE_COS: 933 op = COS; 934 break; 935 } 936 937 get_internal_temp(ctx, &tmp_dst, &tmp_src); 938 939 tmp_dst.WriteMask = TGSI_WRITEMASK_X; 940 tmp_src.SwizzleX = tmp_src.SwizzleY = 941 tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X; 942 943 /* maybe should re-arrange the syntax some day, but 944 * in assembler/disassembler and what ir.c expects 945 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 946 */ 947 instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); 948 add_dst_reg(ctx, instr, &tmp_dst); 949 get_immediate(ctx, &tmp_const, fui(0.5)); 950 add_src_reg(ctx, instr, &tmp_const); 951 add_src_reg(ctx, instr, &inst->Src[0].Register); 952 get_immediate(ctx, &tmp_const, fui(0.159155)); 953 add_src_reg(ctx, instr, &tmp_const); 954 955 instr = ir2_instr_create_alu(next_exec_cf(ctx), FRACv, ~0); 956 add_dst_reg(ctx, instr, &tmp_dst); 957 add_src_reg(ctx, instr, &tmp_src); 958 add_src_reg(ctx, instr, &tmp_src); 959 960 instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); 961 add_dst_reg(ctx, instr, &tmp_dst); 962 get_immediate(ctx, &tmp_const, fui(-3.141593)); 963 add_src_reg(ctx, instr, &tmp_const); 964 add_src_reg(ctx, instr, &tmp_src); 965 get_immediate(ctx, &tmp_const, fui(6.283185)); 966 add_src_reg(ctx, instr, &tmp_const); 967 968 instr = ir2_instr_create_alu(next_exec_cf(ctx), ~0, op); 969 add_regs_dummy_vector(instr); 970 add_dst_reg(ctx, instr, &inst->Dst[0].Register); 971 add_src_reg(ctx, instr, &tmp_src); 972 } 973 974 /* 975 * Main part of compiler/translator: 976 */ 977 978 static void 979 translate_instruction(struct fd2_compile_context *ctx, 980 struct tgsi_full_instruction *inst) 981 { 982 unsigned opc = inst->Instruction.Opcode; 983 struct ir2_instruction *instr; 984 static struct ir2_cf *cf; 985 986 if (opc == TGSI_OPCODE_END) 987 return; 988 989 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { 990 unsigned num = inst->Dst[0].Register.Index; 991 /* seems like we need to ensure that position vs param/pixel 992 * exports don't end up in the same EXEC clause.. easy way 993 * to do this is force a new EXEC clause on first appearance 994 * of an position or param/pixel export. 995 */ 996 if ((num == ctx->position) || (num == ctx->psize)) { 997 if (ctx->num_position > 0) { 998 ctx->cf = NULL; 999 ir2_cf_create_alloc(ctx->so->ir, SQ_POSITION, 1000 ctx->num_position - 1); 1001 ctx->num_position = 0; 1002 } 1003 } else { 1004 if (ctx->num_param > 0) { 1005 ctx->cf = NULL; 1006 ir2_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL, 1007 ctx->num_param - 1); 1008 ctx->num_param = 0; 1009 } 1010 } 1011 } 1012 1013 cf = next_exec_cf(ctx); 1014 1015 /* TODO turn this into a table: */ 1016 switch (opc) { 1017 case TGSI_OPCODE_MOV: 1018 instr = ir2_instr_create_alu(cf, MAXv, ~0); 1019 add_regs_vector_1(ctx, inst, instr); 1020 break; 1021 case TGSI_OPCODE_RCP: 1022 instr = ir2_instr_create_alu(cf, ~0, RECIP_IEEE); 1023 add_regs_scalar_1(ctx, inst, instr); 1024 break; 1025 case TGSI_OPCODE_RSQ: 1026 instr = ir2_instr_create_alu(cf, ~0, RECIPSQ_IEEE); 1027 add_regs_scalar_1(ctx, inst, instr); 1028 break; 1029 case TGSI_OPCODE_SQRT: 1030 instr = ir2_instr_create_alu(cf, ~0, SQRT_IEEE); 1031 add_regs_scalar_1(ctx, inst, instr); 1032 break; 1033 case TGSI_OPCODE_MUL: 1034 instr = ir2_instr_create_alu(cf, MULv, ~0); 1035 add_regs_vector_2(ctx, inst, instr); 1036 break; 1037 case TGSI_OPCODE_ADD: 1038 instr = ir2_instr_create_alu(cf, ADDv, ~0); 1039 add_regs_vector_2(ctx, inst, instr); 1040 break; 1041 case TGSI_OPCODE_DP3: 1042 instr = ir2_instr_create_alu(cf, DOT3v, ~0); 1043 add_regs_vector_2(ctx, inst, instr); 1044 break; 1045 case TGSI_OPCODE_DP4: 1046 instr = ir2_instr_create_alu(cf, DOT4v, ~0); 1047 add_regs_vector_2(ctx, inst, instr); 1048 break; 1049 case TGSI_OPCODE_MIN: 1050 instr = ir2_instr_create_alu(cf, MINv, ~0); 1051 add_regs_vector_2(ctx, inst, instr); 1052 break; 1053 case TGSI_OPCODE_MAX: 1054 instr = ir2_instr_create_alu(cf, MAXv, ~0); 1055 add_regs_vector_2(ctx, inst, instr); 1056 break; 1057 case TGSI_OPCODE_SLT: 1058 case TGSI_OPCODE_SGE: 1059 translate_sge_slt(ctx, inst, opc); 1060 break; 1061 case TGSI_OPCODE_MAD: 1062 instr = ir2_instr_create_alu(cf, MULADDv, ~0); 1063 add_regs_vector_3(ctx, inst, instr); 1064 break; 1065 case TGSI_OPCODE_LRP: 1066 translate_lrp(ctx, inst, opc); 1067 break; 1068 case TGSI_OPCODE_FRC: 1069 instr = ir2_instr_create_alu(cf, FRACv, ~0); 1070 add_regs_vector_1(ctx, inst, instr); 1071 break; 1072 case TGSI_OPCODE_FLR: 1073 instr = ir2_instr_create_alu(cf, FLOORv, ~0); 1074 add_regs_vector_1(ctx, inst, instr); 1075 break; 1076 case TGSI_OPCODE_EX2: 1077 instr = ir2_instr_create_alu(cf, ~0, EXP_IEEE); 1078 add_regs_scalar_1(ctx, inst, instr); 1079 break; 1080 case TGSI_OPCODE_POW: 1081 translate_pow(ctx, inst); 1082 break; 1083 case TGSI_OPCODE_COS: 1084 case TGSI_OPCODE_SIN: 1085 translate_trig(ctx, inst, opc); 1086 break; 1087 case TGSI_OPCODE_TEX: 1088 case TGSI_OPCODE_TXP: 1089 translate_tex(ctx, inst, opc); 1090 break; 1091 case TGSI_OPCODE_CMP: 1092 instr = ir2_instr_create_alu(cf, CNDGTEv, ~0); 1093 add_regs_vector_3(ctx, inst, instr); 1094 // TODO this should be src0 if regs where in sane order.. 1095 instr->regs[2]->flags ^= IR2_REG_NEGATE; /* src1 */ 1096 break; 1097 case TGSI_OPCODE_IF: 1098 push_predicate(ctx, &inst->Src[0].Register); 1099 ctx->so->ir->pred = IR2_PRED_EQ; 1100 break; 1101 case TGSI_OPCODE_ELSE: 1102 ctx->so->ir->pred = IR2_PRED_NE; 1103 /* not sure if this is required in all cases, but blob compiler 1104 * won't combine EQ and NE in same CF: 1105 */ 1106 ctx->cf = NULL; 1107 break; 1108 case TGSI_OPCODE_ENDIF: 1109 pop_predicate(ctx); 1110 break; 1111 case TGSI_OPCODE_F2I: 1112 instr = ir2_instr_create_alu(cf, TRUNCv, ~0); 1113 add_regs_vector_1(ctx, inst, instr); 1114 break; 1115 default: 1116 DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc)); 1117 tgsi_dump(ctx->so->tokens, 0); 1118 assert(0); 1119 break; 1120 } 1121 1122 /* internal temporaries are only valid for the duration of a single 1123 * TGSI instruction: 1124 */ 1125 ctx->num_internal_temps = 0; 1126 } 1127 1128 static void 1129 compile_instructions(struct fd2_compile_context *ctx) 1130 { 1131 while (!tgsi_parse_end_of_tokens(&ctx->parser)) { 1132 tgsi_parse_token(&ctx->parser); 1133 1134 switch (ctx->parser.FullToken.Token.Type) { 1135 case TGSI_TOKEN_TYPE_INSTRUCTION: 1136 translate_instruction(ctx, 1137 &ctx->parser.FullToken.FullInstruction); 1138 break; 1139 default: 1140 break; 1141 } 1142 } 1143 1144 ctx->cf->cf_type = EXEC_END; 1145 } 1146 1147 int 1148 fd2_compile_shader(struct fd_program_stateobj *prog, 1149 struct fd2_shader_stateobj *so) 1150 { 1151 struct fd2_compile_context ctx; 1152 1153 ir2_shader_destroy(so->ir); 1154 so->ir = ir2_shader_create(); 1155 so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0; 1156 1157 if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK) 1158 return -1; 1159 1160 if (ctx.type == PIPE_SHADER_VERTEX) { 1161 compile_vtx_fetch(&ctx); 1162 } else if (ctx.type == PIPE_SHADER_FRAGMENT) { 1163 prog->num_exports = 0; 1164 memset(prog->export_linkage, 0xff, 1165 sizeof(prog->export_linkage)); 1166 } 1167 1168 compile_instructions(&ctx); 1169 1170 compile_free(&ctx); 1171 1172 return 0; 1173 } 1174 1175