1 /************************************************************************** 2 * 3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * TGSI to PowerPC code generation. 30 */ 31 32 #include "pipe/p_config.h" 33 34 #if defined(PIPE_ARCH_PPC) 35 36 #include "util/u_debug.h" 37 #include "pipe/p_shader_tokens.h" 38 #include "util/u_math.h" 39 #include "util/u_memory.h" 40 #include "util/u_sse.h" 41 #include "tgsi/tgsi_info.h" 42 #include "tgsi/tgsi_parse.h" 43 #include "tgsi/tgsi_util.h" 44 #include "tgsi_dump.h" 45 #include "tgsi_exec.h" 46 #include "tgsi_ppc.h" 47 #include "rtasm/rtasm_ppc.h" 48 49 50 /** 51 * Since it's pretty much impossible to form PPC vector immediates, load 52 * them from memory here: 53 */ 54 PIPE_ALIGN_VAR(16) const float 55 ppc_builtin_constants[] = { 56 1.0f, -128.0f, 128.0, 0.0 57 }; 58 59 /** 60 * How many TGSI temps should be implemented with real PPC vector registers 61 * rather than memory. 62 */ 63 #define MAX_PPC_TEMPS 3 64 65 66 /** 67 * Context/state used during code gen. 68 */ 69 struct gen_context 70 { 71 struct ppc_function *f; 72 int inputs_reg; /**< GP register pointing to input params */ 73 int outputs_reg; /**< GP register pointing to output params */ 74 int temps_reg; /**< GP register pointing to temporary "registers" */ 75 int immed_reg; /**< GP register pointing to immediates buffer */ 76 int const_reg; /**< GP register pointing to constants buffer */ 77 int builtins_reg; /**< GP register pointint to built-in constants */ 78 79 int offset_reg; /**< used to reduce redundant li instructions */ 80 int offset_value; 81 82 int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ 83 int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ 84 85 /** 86 * Map TGSI temps to PPC vector temps. 87 * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. 88 * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. 89 */ 90 int temps_map[MAX_PPC_TEMPS][4]; 91 92 /** 93 * Cache of src registers. 94 * This is used to avoid redundant load instructions. 95 */ 96 struct { 97 struct tgsi_full_src_register src; 98 uint chan; 99 uint vec; 100 } regs[12]; /* 3 src regs, 4 channels */ 101 uint num_regs; 102 }; 103 104 105 /** 106 * Initialize code generation context. 107 */ 108 static void 109 init_gen_context(struct gen_context *gen, struct ppc_function *func) 110 { 111 uint i; 112 113 memset(gen, 0, sizeof(*gen)); 114 gen->f = func; 115 gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ 116 gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ 117 gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ 118 gen->immed_reg = ppc_reserve_register(func, 6); 119 gen->const_reg = ppc_reserve_register(func, 7); 120 gen->builtins_reg = ppc_reserve_register(func, 8); 121 gen->one_vec = -1; 122 gen->bit31_vec = -1; 123 gen->offset_reg = -1; 124 gen->offset_value = -9999999; 125 for (i = 0; i < MAX_PPC_TEMPS; i++) { 126 gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); 127 gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); 128 gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); 129 gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); 130 } 131 } 132 133 134 /** 135 * Is the given TGSI register stored as a real PPC vector register? 136 */ 137 static boolean 138 is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) 139 { 140 return (reg->Register.File == TGSI_FILE_TEMPORARY && 141 reg->Register.Index < MAX_PPC_TEMPS); 142 } 143 144 145 /** 146 * Is the given TGSI register stored as a real PPC vector register? 147 */ 148 static boolean 149 is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) 150 { 151 return (reg->Register.File == TGSI_FILE_TEMPORARY && 152 reg->Register.Index < MAX_PPC_TEMPS); 153 } 154 155 156 157 /** 158 * All PPC vector load/store instructions form an effective address 159 * by adding the contents of two registers. For example: 160 * lvx v2,r8,r9 # v2 = memory[r8 + r9] 161 * stvx v2,r8,r9 # memory[r8 + r9] = v2; 162 * So our lvx/stvx instructions are typically preceded by an 'li' instruction 163 * to load r9 (above) with an immediate (an offset). 164 * This code emits that 'li' instruction, but only if the offset value is 165 * different than the previous 'li'. 166 * This optimization seems to save about 10% in the instruction count. 167 * Note that we need to unconditionally emit an 'li' inside basic blocks 168 * (such as inside loops). 169 */ 170 static int 171 emit_li_offset(struct gen_context *gen, int offset) 172 { 173 if (gen->offset_reg <= 0) { 174 /* allocate a GP register for storing load/store offset */ 175 gen->offset_reg = ppc_allocate_register(gen->f); 176 } 177 178 /* emit new 'li' if offset is changing */ 179 if (gen->offset_value < 0 || gen->offset_value != offset) { 180 gen->offset_value = offset; 181 ppc_li(gen->f, gen->offset_reg, offset); 182 } 183 184 return gen->offset_reg; 185 } 186 187 188 /** 189 * Forces subsequent emit_li_offset() calls to emit an 'li'. 190 * To be called at the top of basic blocks. 191 */ 192 static void 193 reset_li_offset(struct gen_context *gen) 194 { 195 gen->offset_value = -9999999; 196 } 197 198 199 200 /** 201 * Load the given vector register with {value, value, value, value}. 202 * The value must be in the ppu_builtin_constants[] array. 203 * We wouldn't need this if there was a simple way to load PPC vector 204 * registers with immediate values! 205 */ 206 static void 207 load_constant_vec(struct gen_context *gen, int dst_vec, float value) 208 { 209 uint pos; 210 for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { 211 if (ppc_builtin_constants[pos] == value) { 212 int offset = pos * 4; 213 int offset_reg = emit_li_offset(gen, offset); 214 215 /* Load 4-byte word into vector register. 216 * The vector slot depends on the effective address we load from. 217 * We know that our builtins start at a 16-byte boundary so we 218 * know that 'swizzle' tells us which vector slot will have the 219 * loaded word. The other vector slots will be undefined. 220 */ 221 ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); 222 /* splat word[pos % 4] across the vector reg */ 223 ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); 224 return; 225 } 226 } 227 assert(0 && "Need to add new constant to ppc_builtin_constants array"); 228 } 229 230 231 /** 232 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. 233 */ 234 static int 235 gen_one_vec(struct gen_context *gen) 236 { 237 if (gen->one_vec < 0) { 238 gen->one_vec = ppc_allocate_vec_register(gen->f); 239 load_constant_vec(gen, gen->one_vec, 1.0f); 240 } 241 return gen->one_vec; 242 } 243 244 /** 245 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. 246 */ 247 static int 248 gen_get_bit31_vec(struct gen_context *gen) 249 { 250 if (gen->bit31_vec < 0) { 251 gen->bit31_vec = ppc_allocate_vec_register(gen->f); 252 ppc_vspltisw(gen->f, gen->bit31_vec, -1); 253 ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); 254 } 255 return gen->bit31_vec; 256 } 257 258 259 /** 260 * Register fetch. Return PPC vector register with result. 261 */ 262 static int 263 emit_fetch(struct gen_context *gen, 264 const struct tgsi_full_src_register *reg, 265 const unsigned chan_index) 266 { 267 uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); 268 int dst_vec = -1; 269 270 switch (swizzle) { 271 case TGSI_SWIZZLE_X: 272 case TGSI_SWIZZLE_Y: 273 case TGSI_SWIZZLE_Z: 274 case TGSI_SWIZZLE_W: 275 switch (reg->Register.File) { 276 case TGSI_FILE_INPUT: 277 { 278 int offset = (reg->Register.Index * 4 + swizzle) * 16; 279 int offset_reg = emit_li_offset(gen, offset); 280 dst_vec = ppc_allocate_vec_register(gen->f); 281 ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); 282 } 283 break; 284 case TGSI_FILE_SYSTEM_VALUE: 285 assert(!"unhandled system value in tgsi_ppc.c"); 286 break; 287 case TGSI_FILE_TEMPORARY: 288 if (is_ppc_vec_temporary(reg)) { 289 /* use PPC vec register */ 290 dst_vec = gen->temps_map[reg->Register.Index][swizzle]; 291 } 292 else { 293 /* use memory-based temp register "file" */ 294 int offset = (reg->Register.Index * 4 + swizzle) * 16; 295 int offset_reg = emit_li_offset(gen, offset); 296 dst_vec = ppc_allocate_vec_register(gen->f); 297 ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); 298 } 299 break; 300 case TGSI_FILE_IMMEDIATE: 301 { 302 int offset = (reg->Register.Index * 4 + swizzle) * 4; 303 int offset_reg = emit_li_offset(gen, offset); 304 dst_vec = ppc_allocate_vec_register(gen->f); 305 /* Load 4-byte word into vector register. 306 * The vector slot depends on the effective address we load from. 307 * We know that our immediates start at a 16-byte boundary so we 308 * know that 'swizzle' tells us which vector slot will have the 309 * loaded word. The other vector slots will be undefined. 310 */ 311 ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); 312 /* splat word[swizzle] across the vector reg */ 313 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 314 } 315 break; 316 case TGSI_FILE_CONSTANT: 317 { 318 int offset = (reg->Register.Index * 4 + swizzle) * 4; 319 int offset_reg = emit_li_offset(gen, offset); 320 dst_vec = ppc_allocate_vec_register(gen->f); 321 /* Load 4-byte word into vector register. 322 * The vector slot depends on the effective address we load from. 323 * We know that our constants start at a 16-byte boundary so we 324 * know that 'swizzle' tells us which vector slot will have the 325 * loaded word. The other vector slots will be undefined. 326 */ 327 ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); 328 /* splat word[swizzle] across the vector reg */ 329 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 330 } 331 break; 332 default: 333 assert( 0 ); 334 } 335 break; 336 default: 337 assert( 0 ); 338 } 339 340 assert(dst_vec >= 0); 341 342 { 343 uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); 344 if (sign_op != TGSI_UTIL_SIGN_KEEP) { 345 int bit31_vec = gen_get_bit31_vec(gen); 346 int dst_vec2; 347 348 if (is_ppc_vec_temporary(reg)) { 349 /* need to use a new temp */ 350 dst_vec2 = ppc_allocate_vec_register(gen->f); 351 } 352 else { 353 dst_vec2 = dst_vec; 354 } 355 356 switch (sign_op) { 357 case TGSI_UTIL_SIGN_CLEAR: 358 /* vec = vec & ~bit31 */ 359 ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); 360 break; 361 case TGSI_UTIL_SIGN_SET: 362 /* vec = vec | bit31 */ 363 ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); 364 break; 365 case TGSI_UTIL_SIGN_TOGGLE: 366 /* vec = vec ^ bit31 */ 367 ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); 368 break; 369 default: 370 assert(0); 371 } 372 return dst_vec2; 373 } 374 } 375 376 return dst_vec; 377 } 378 379 380 381 /** 382 * Test if two TGSI src registers refer to the same memory location. 383 * We use this to avoid redundant register loads. 384 */ 385 static boolean 386 equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, 387 const struct tgsi_full_src_register *b, uint chan_b) 388 { 389 int swz_a, swz_b; 390 int sign_a, sign_b; 391 if (a->Register.File != b->Register.File) 392 return FALSE; 393 if (a->Register.Index != b->Register.Index) 394 return FALSE; 395 swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); 396 swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); 397 if (swz_a != swz_b) 398 return FALSE; 399 sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); 400 sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); 401 if (sign_a != sign_b) 402 return FALSE; 403 return TRUE; 404 } 405 406 407 /** 408 * Given a TGSI src register and channel index, return the PPC vector 409 * register containing the value. We use a cache to prevent re-loading 410 * the same register multiple times. 411 * \return index of PPC vector register with the desired src operand 412 */ 413 static int 414 get_src_vec(struct gen_context *gen, 415 struct tgsi_full_instruction *inst, int src_reg, uint chan) 416 { 417 const const struct tgsi_full_src_register *src = 418 &inst->Src[src_reg]; 419 int vec; 420 uint i; 421 422 /* check the cache */ 423 for (i = 0; i < gen->num_regs; i++) { 424 if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { 425 /* cache hit */ 426 assert(gen->regs[i].vec >= 0); 427 return gen->regs[i].vec; 428 } 429 } 430 431 /* cache miss: allocate new vec reg and emit fetch/load code */ 432 vec = emit_fetch(gen, src, chan); 433 gen->regs[gen->num_regs].src = *src; 434 gen->regs[gen->num_regs].chan = chan; 435 gen->regs[gen->num_regs].vec = vec; 436 gen->num_regs++; 437 438 assert(gen->num_regs <= Elements(gen->regs)); 439 440 assert(vec >= 0); 441 442 return vec; 443 } 444 445 446 /** 447 * Clear the src operand cache. To be called at the end of each emit function. 448 */ 449 static void 450 release_src_vecs(struct gen_context *gen) 451 { 452 uint i; 453 for (i = 0; i < gen->num_regs; i++) { 454 const const struct tgsi_full_src_register src = gen->regs[i].src; 455 if (!is_ppc_vec_temporary(&src)) { 456 ppc_release_vec_register(gen->f, gen->regs[i].vec); 457 } 458 } 459 gen->num_regs = 0; 460 } 461 462 463 464 static int 465 get_dst_vec(struct gen_context *gen, 466 const struct tgsi_full_instruction *inst, 467 unsigned chan_index) 468 { 469 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 470 471 if (is_ppc_vec_temporary_dst(reg)) { 472 int vec = gen->temps_map[reg->Register.Index][chan_index]; 473 return vec; 474 } 475 else { 476 return ppc_allocate_vec_register(gen->f); 477 } 478 } 479 480 481 /** 482 * Register store. Store 'src_vec' at location indicated by 'reg'. 483 * \param free_vec Should the src_vec be released when done? 484 */ 485 static void 486 emit_store(struct gen_context *gen, 487 int src_vec, 488 const struct tgsi_full_instruction *inst, 489 unsigned chan_index, 490 boolean free_vec) 491 { 492 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 493 494 switch (reg->Register.File) { 495 case TGSI_FILE_OUTPUT: 496 { 497 int offset = (reg->Register.Index * 4 + chan_index) * 16; 498 int offset_reg = emit_li_offset(gen, offset); 499 ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); 500 } 501 break; 502 case TGSI_FILE_TEMPORARY: 503 if (is_ppc_vec_temporary_dst(reg)) { 504 if (!free_vec) { 505 int dst_vec = gen->temps_map[reg->Register.Index][chan_index]; 506 if (dst_vec != src_vec) 507 ppc_vmove(gen->f, dst_vec, src_vec); 508 } 509 free_vec = FALSE; 510 } 511 else { 512 int offset = (reg->Register.Index * 4 + chan_index) * 16; 513 int offset_reg = emit_li_offset(gen, offset); 514 ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); 515 } 516 break; 517 #if 0 518 case TGSI_FILE_ADDRESS: 519 emit_addrs( 520 func, 521 xmm, 522 reg->Register.Index, 523 chan_index ); 524 break; 525 #endif 526 default: 527 assert( 0 ); 528 } 529 530 #if 0 531 switch( inst->Instruction.Saturate ) { 532 case TGSI_SAT_NONE: 533 break; 534 535 case TGSI_SAT_ZERO_ONE: 536 /* assert( 0 ); */ 537 break; 538 539 case TGSI_SAT_MINUS_PLUS_ONE: 540 assert( 0 ); 541 break; 542 } 543 #endif 544 545 if (free_vec) 546 ppc_release_vec_register(gen->f, src_vec); 547 } 548 549 550 static void 551 emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 552 { 553 int v0, v1; 554 uint chan_index; 555 556 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 557 v1 = ppc_allocate_vec_register(gen->f); 558 559 switch (inst->Instruction.Opcode) { 560 case TGSI_OPCODE_RSQ: 561 /* v1 = 1.0 / sqrt(v0) */ 562 ppc_vrsqrtefp(gen->f, v1, v0); 563 break; 564 case TGSI_OPCODE_RCP: 565 /* v1 = 1.0 / v0 */ 566 ppc_vrefp(gen->f, v1, v0); 567 break; 568 default: 569 assert(0); 570 } 571 572 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 573 emit_store(gen, v1, inst, chan_index, FALSE); 574 } 575 576 release_src_vecs(gen); 577 ppc_release_vec_register(gen->f, v1); 578 } 579 580 581 static void 582 emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 583 { 584 uint chan_index; 585 586 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { 587 int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ 588 int v1 = get_dst_vec(gen, inst, chan_index); 589 switch (inst->Instruction.Opcode) { 590 case TGSI_OPCODE_ABS: 591 /* turn off the most significant bit of each vector float word */ 592 { 593 int bit31_vec = gen_get_bit31_vec(gen); 594 ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ 595 } 596 break; 597 case TGSI_OPCODE_FLR: 598 ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ 599 break; 600 case TGSI_OPCODE_FRC: 601 ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ 602 ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ 603 break; 604 case TGSI_OPCODE_EX2: 605 ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ 606 break; 607 case TGSI_OPCODE_LG2: 608 /* XXX this may be broken! */ 609 ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ 610 break; 611 case TGSI_OPCODE_MOV: 612 if (v0 != v1) 613 ppc_vmove(gen->f, v1, v0); 614 break; 615 default: 616 assert(0); 617 } 618 emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ 619 } 620 621 release_src_vecs(gen); 622 } 623 624 625 static void 626 emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) 627 { 628 int zero_vec = -1; 629 uint chan; 630 631 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { 632 zero_vec = ppc_allocate_vec_register(gen->f); 633 ppc_vzero(gen->f, zero_vec); 634 } 635 636 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { 637 /* fetch src operands */ 638 int v0 = get_src_vec(gen, inst, 0, chan); 639 int v1 = get_src_vec(gen, inst, 1, chan); 640 int v2 = get_dst_vec(gen, inst, chan); 641 642 /* emit binop */ 643 switch (inst->Instruction.Opcode) { 644 case TGSI_OPCODE_ADD: 645 ppc_vaddfp(gen->f, v2, v0, v1); 646 break; 647 case TGSI_OPCODE_SUB: 648 ppc_vsubfp(gen->f, v2, v0, v1); 649 break; 650 case TGSI_OPCODE_MUL: 651 ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); 652 break; 653 case TGSI_OPCODE_MIN: 654 ppc_vminfp(gen->f, v2, v0, v1); 655 break; 656 case TGSI_OPCODE_MAX: 657 ppc_vmaxfp(gen->f, v2, v0, v1); 658 break; 659 default: 660 assert(0); 661 } 662 663 /* store v2 */ 664 emit_store(gen, v2, inst, chan, TRUE); 665 } 666 667 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) 668 ppc_release_vec_register(gen->f, zero_vec); 669 670 release_src_vecs(gen); 671 } 672 673 674 static void 675 emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) 676 { 677 uint chan; 678 679 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { 680 /* fetch src operands */ 681 int v0 = get_src_vec(gen, inst, 0, chan); 682 int v1 = get_src_vec(gen, inst, 1, chan); 683 int v2 = get_src_vec(gen, inst, 2, chan); 684 int v3 = get_dst_vec(gen, inst, chan); 685 686 /* emit ALU */ 687 switch (inst->Instruction.Opcode) { 688 case TGSI_OPCODE_MAD: 689 ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ 690 break; 691 case TGSI_OPCODE_LRP: 692 ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ 693 ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ 694 break; 695 default: 696 assert(0); 697 } 698 699 /* store v3 */ 700 emit_store(gen, v3, inst, chan, TRUE); 701 } 702 703 release_src_vecs(gen); 704 } 705 706 707 /** 708 * Vector comparisons, resulting in 1.0 or 0.0 values. 709 */ 710 static void 711 emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) 712 { 713 uint chan; 714 int one_vec = gen_one_vec(gen); 715 716 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { 717 /* fetch src operands */ 718 int v0 = get_src_vec(gen, inst, 0, chan); 719 int v1 = get_src_vec(gen, inst, 1, chan); 720 int v2 = get_dst_vec(gen, inst, chan); 721 boolean complement = FALSE; 722 723 switch (inst->Instruction.Opcode) { 724 case TGSI_OPCODE_SNE: 725 complement = TRUE; 726 /* fall-through */ 727 case TGSI_OPCODE_SEQ: 728 ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ 729 break; 730 731 case TGSI_OPCODE_SGE: 732 complement = TRUE; 733 /* fall-through */ 734 case TGSI_OPCODE_SLT: 735 ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ 736 break; 737 738 case TGSI_OPCODE_SLE: 739 complement = TRUE; 740 /* fall-through */ 741 case TGSI_OPCODE_SGT: 742 ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ 743 break; 744 default: 745 assert(0); 746 } 747 748 /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ 749 750 if (complement) 751 ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ 752 else 753 ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ 754 755 /* store v2 */ 756 emit_store(gen, v2, inst, chan, TRUE); 757 } 758 759 release_src_vecs(gen); 760 } 761 762 763 static void 764 emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) 765 { 766 int v0, v1, v2; 767 uint chan_index; 768 769 v2 = ppc_allocate_vec_register(gen->f); 770 771 ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ 772 773 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_X); /* v0 = src0.XXXX */ 774 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_X); /* v1 = src1.XXXX */ 775 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 776 777 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_Y); /* v0 = src0.YYYY */ 778 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_Y); /* v1 = src1.YYYY */ 779 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 780 781 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_Z); /* v0 = src0.ZZZZ */ 782 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_Z); /* v1 = src1.ZZZZ */ 783 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 784 785 if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { 786 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_W); /* v0 = src0.WWWW */ 787 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_W); /* v1 = src1.WWWW */ 788 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 789 } 790 else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { 791 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_W); /* v1 = src1.WWWW */ 792 ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ 793 } 794 795 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { 796 emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ 797 } 798 799 release_src_vecs(gen); 800 801 ppc_release_vec_register(gen->f, v2); 802 } 803 804 805 /** Approximation for vr = pow(va, vb) */ 806 static void 807 ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) 808 { 809 /* pow(a,b) ~= exp2(log2(a) * b) */ 810 int t_vec = ppc_allocate_vec_register(f); 811 int zero_vec = ppc_allocate_vec_register(f); 812 813 ppc_vzero(f, zero_vec); 814 815 ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ 816 ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ 817 ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ 818 819 ppc_release_vec_register(f, t_vec); 820 ppc_release_vec_register(f, zero_vec); 821 } 822 823 824 static void 825 emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) 826 { 827 int one_vec = gen_one_vec(gen); 828 829 /* Compute X */ 830 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) { 831 emit_store(gen, one_vec, inst, TGSI_CHAN_X, FALSE); 832 } 833 834 /* Compute Y, Z */ 835 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) || 836 TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { 837 int x_vec; 838 int zero_vec = ppc_allocate_vec_register(gen->f); 839 840 x_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); /* x_vec = src[0].x */ 841 842 ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ 843 ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ 844 845 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { 846 emit_store(gen, x_vec, inst, TGSI_CHAN_Y, FALSE); 847 } 848 849 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { 850 int y_vec, w_vec; 851 int z_vec = ppc_allocate_vec_register(gen->f); 852 int pow_vec = ppc_allocate_vec_register(gen->f); 853 int pos_vec = ppc_allocate_vec_register(gen->f); 854 int p128_vec = ppc_allocate_vec_register(gen->f); 855 int n128_vec = ppc_allocate_vec_register(gen->f); 856 857 y_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Y); /* y_vec = src[0].y */ 858 ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ 859 860 w_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_W); /* w_vec = src[0].w */ 861 862 /* clamp W to [-128, 128] */ 863 load_constant_vec(gen, p128_vec, 128.0f); 864 load_constant_vec(gen, n128_vec, -128.0f); 865 ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ 866 ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ 867 868 /* if temp.x > 0 869 * z = pow(tmp.y, tmp.w) 870 * else 871 * z = 0.0 872 */ 873 ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ 874 ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ 875 ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ 876 877 emit_store(gen, z_vec, inst, TGSI_CHAN_Z, FALSE); 878 879 ppc_release_vec_register(gen->f, z_vec); 880 ppc_release_vec_register(gen->f, pow_vec); 881 ppc_release_vec_register(gen->f, pos_vec); 882 ppc_release_vec_register(gen->f, p128_vec); 883 ppc_release_vec_register(gen->f, n128_vec); 884 } 885 886 ppc_release_vec_register(gen->f, zero_vec); 887 } 888 889 /* Compute W */ 890 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) { 891 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE); 892 } 893 894 release_src_vecs(gen); 895 } 896 897 898 static void 899 emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) 900 { 901 const int one_vec = gen_one_vec(gen); 902 int src_vec; 903 904 /* get src arg */ 905 src_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 906 907 /* Compute X = 2^floor(src) */ 908 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) { 909 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_X); 910 int tmp_vec = ppc_allocate_vec_register(gen->f); 911 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 912 ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ 913 emit_store(gen, dst_vec, inst, TGSI_CHAN_X, TRUE); 914 ppc_release_vec_register(gen->f, tmp_vec); 915 } 916 917 /* Compute Y = src - floor(src) */ 918 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { 919 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Y); 920 int tmp_vec = ppc_allocate_vec_register(gen->f); 921 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 922 ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ 923 emit_store(gen, dst_vec, inst, TGSI_CHAN_Y, TRUE); 924 ppc_release_vec_register(gen->f, tmp_vec); 925 } 926 927 /* Compute Z = RoughApprox2ToX(src) */ 928 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { 929 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Z); 930 ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ 931 emit_store(gen, dst_vec, inst, TGSI_CHAN_Z, TRUE); 932 } 933 934 /* Compute W = 1.0 */ 935 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) { 936 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE); 937 } 938 939 release_src_vecs(gen); 940 } 941 942 943 static void 944 emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) 945 { 946 const int bit31_vec = gen_get_bit31_vec(gen); 947 const int one_vec = gen_one_vec(gen); 948 int src_vec, abs_vec; 949 950 /* get src arg */ 951 src_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 952 953 /* compute abs(src) */ 954 abs_vec = ppc_allocate_vec_register(gen->f); 955 ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ 956 957 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) && 958 TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { 959 960 /* compute tmp = floor(log2(abs)) */ 961 int tmp_vec = ppc_allocate_vec_register(gen->f); 962 ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ 963 ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ 964 965 /* Compute X = tmp */ 966 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) { 967 emit_store(gen, tmp_vec, inst, TGSI_CHAN_X, FALSE); 968 } 969 970 /* Compute Y = abs / 2^tmp */ 971 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { 972 const int zero_vec = ppc_allocate_vec_register(gen->f); 973 ppc_vzero(gen->f, zero_vec); 974 ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ 975 ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ 976 /* tmp = abs * tmp + zero */ 977 ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); 978 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Y, FALSE); 979 ppc_release_vec_register(gen->f, zero_vec); 980 } 981 982 ppc_release_vec_register(gen->f, tmp_vec); 983 } 984 985 /* Compute Z = RoughApproxLog2(abs) */ 986 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { 987 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Z); 988 ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ 989 emit_store(gen, dst_vec, inst, TGSI_CHAN_Z, TRUE); 990 } 991 992 /* Compute W = 1.0 */ 993 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) { 994 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE); 995 } 996 997 ppc_release_vec_register(gen->f, abs_vec); 998 release_src_vecs(gen); 999 } 1000 1001 1002 static void 1003 emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) 1004 { 1005 int s0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 1006 int s1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_X); 1007 int pow_vec = ppc_allocate_vec_register(gen->f); 1008 int chan; 1009 1010 ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); 1011 1012 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { 1013 emit_store(gen, pow_vec, inst, chan, FALSE); 1014 } 1015 1016 ppc_release_vec_register(gen->f, pow_vec); 1017 1018 release_src_vecs(gen); 1019 } 1020 1021 1022 static void 1023 emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) 1024 { 1025 int x0_vec, y0_vec, z0_vec; 1026 int x1_vec, y1_vec, z1_vec; 1027 int zero_vec, tmp_vec; 1028 int tmp2_vec; 1029 1030 zero_vec = ppc_allocate_vec_register(gen->f); 1031 ppc_vzero(gen->f, zero_vec); 1032 1033 tmp_vec = ppc_allocate_vec_register(gen->f); 1034 tmp2_vec = ppc_allocate_vec_register(gen->f); 1035 1036 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) || 1037 TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { 1038 x0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 1039 x1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_X); 1040 } 1041 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) || 1042 TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { 1043 y0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Y); 1044 y1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_Y); 1045 } 1046 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) || 1047 TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { 1048 z0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Z); 1049 z1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_Z); 1050 } 1051 1052 TGSI_IF_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) { 1053 /* tmp = y0 * z1 */ 1054 ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); 1055 /* tmp = tmp - z0 * y1*/ 1056 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); 1057 emit_store(gen, tmp_vec, inst, TGSI_CHAN_X, FALSE); 1058 } 1059 TGSI_IF_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) { 1060 /* tmp = z0 * x1 */ 1061 ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); 1062 /* tmp = tmp - x0 * z1 */ 1063 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); 1064 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Y, FALSE); 1065 } 1066 TGSI_IF_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) { 1067 /* tmp = x0 * y1 */ 1068 ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); 1069 /* tmp = tmp - y0 * x1 */ 1070 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); 1071 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Z, FALSE); 1072 } 1073 /* W is undefined */ 1074 1075 ppc_release_vec_register(gen->f, tmp_vec); 1076 ppc_release_vec_register(gen->f, zero_vec); 1077 release_src_vecs(gen); 1078 } 1079 1080 static int 1081 emit_instruction(struct gen_context *gen, 1082 struct tgsi_full_instruction *inst) 1083 { 1084 1085 /* we don't handle saturation/clamping yet */ 1086 if (inst->Instruction.Saturate != TGSI_SAT_NONE) 1087 return 0; 1088 1089 /* need to use extra temps to fix SOA dependencies : */ 1090 if (tgsi_check_soa_dependencies(inst)) 1091 return FALSE; 1092 1093 switch (inst->Instruction.Opcode) { 1094 case TGSI_OPCODE_MOV: 1095 case TGSI_OPCODE_ABS: 1096 case TGSI_OPCODE_FLR: 1097 case TGSI_OPCODE_FRC: 1098 case TGSI_OPCODE_EX2: 1099 case TGSI_OPCODE_LG2: 1100 emit_unaryop(gen, inst); 1101 break; 1102 case TGSI_OPCODE_RSQ: 1103 case TGSI_OPCODE_RCP: 1104 emit_scalar_unaryop(gen, inst); 1105 break; 1106 case TGSI_OPCODE_ADD: 1107 case TGSI_OPCODE_SUB: 1108 case TGSI_OPCODE_MUL: 1109 case TGSI_OPCODE_MIN: 1110 case TGSI_OPCODE_MAX: 1111 emit_binop(gen, inst); 1112 break; 1113 case TGSI_OPCODE_SEQ: 1114 case TGSI_OPCODE_SNE: 1115 case TGSI_OPCODE_SLT: 1116 case TGSI_OPCODE_SGT: 1117 case TGSI_OPCODE_SLE: 1118 case TGSI_OPCODE_SGE: 1119 emit_inequality(gen, inst); 1120 break; 1121 case TGSI_OPCODE_MAD: 1122 case TGSI_OPCODE_LRP: 1123 emit_triop(gen, inst); 1124 break; 1125 case TGSI_OPCODE_DP3: 1126 case TGSI_OPCODE_DP4: 1127 case TGSI_OPCODE_DPH: 1128 emit_dotprod(gen, inst); 1129 break; 1130 case TGSI_OPCODE_LIT: 1131 emit_lit(gen, inst); 1132 break; 1133 case TGSI_OPCODE_LOG: 1134 emit_log(gen, inst); 1135 break; 1136 case TGSI_OPCODE_EXP: 1137 emit_exp(gen, inst); 1138 break; 1139 case TGSI_OPCODE_POW: 1140 emit_pow(gen, inst); 1141 break; 1142 case TGSI_OPCODE_XPD: 1143 emit_xpd(gen, inst); 1144 break; 1145 case TGSI_OPCODE_END: 1146 /* normal end */ 1147 return 1; 1148 default: 1149 return 0; 1150 } 1151 return 1; 1152 } 1153 1154 1155 static void 1156 emit_declaration( 1157 struct ppc_function *func, 1158 struct tgsi_full_declaration *decl ) 1159 { 1160 if( decl->Declaration.File == TGSI_FILE_INPUT || 1161 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) { 1162 #if 0 1163 unsigned first, last, mask; 1164 unsigned i, j; 1165 1166 first = decl->Range.First; 1167 last = decl->Range.Last; 1168 mask = decl->Declaration.UsageMask; 1169 1170 for( i = first; i <= last; i++ ) { 1171 for( j = 0; j < NUM_CHANNELS; j++ ) { 1172 if( mask & (1 << j) ) { 1173 switch( decl->Interp.Interpolate ) { 1174 case TGSI_INTERPOLATE_CONSTANT: 1175 emit_coef_a0( func, 0, i, j ); 1176 emit_inputs( func, 0, i, j ); 1177 break; 1178 1179 case TGSI_INTERPOLATE_LINEAR: 1180 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1181 emit_coef_dadx( func, 1, i, j ); 1182 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1183 emit_coef_dady( func, 3, i, j ); 1184 emit_mul( func, 0, 1 ); /* x * dadx */ 1185 emit_coef_a0( func, 4, i, j ); 1186 emit_mul( func, 2, 3 ); /* y * dady */ 1187 emit_add( func, 0, 4 ); /* x * dadx + a0 */ 1188 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1189 emit_inputs( func, 0, i, j ); 1190 break; 1191 1192 case TGSI_INTERPOLATE_PERSPECTIVE: 1193 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1194 emit_coef_dadx( func, 1, i, j ); 1195 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1196 emit_coef_dady( func, 3, i, j ); 1197 emit_mul( func, 0, 1 ); /* x * dadx */ 1198 emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); 1199 emit_coef_a0( func, 5, i, j ); 1200 emit_rcp( func, 4, 4 ); /* 1.0 / w */ 1201 emit_mul( func, 2, 3 ); /* y * dady */ 1202 emit_add( func, 0, 5 ); /* x * dadx + a0 */ 1203 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1204 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ 1205 emit_inputs( func, 0, i, j ); 1206 break; 1207 1208 default: 1209 assert( 0 ); 1210 break; 1211 } 1212 } 1213 } 1214 } 1215 #endif 1216 } 1217 } 1218 1219 1220 1221 static void 1222 emit_prologue(struct ppc_function *func) 1223 { 1224 /* XXX set up stack frame */ 1225 } 1226 1227 1228 static void 1229 emit_epilogue(struct ppc_function *func) 1230 { 1231 ppc_comment(func, -4, "Epilogue:"); 1232 ppc_return(func); 1233 /* XXX restore prev stack frame */ 1234 #if 0 1235 debug_printf("PPC: Emitted %u instructions\n", func->num_inst); 1236 #endif 1237 } 1238 1239 1240 1241 /** 1242 * Translate a TGSI vertex/fragment shader to PPC code. 1243 * 1244 * \param tokens the TGSI input shader 1245 * \param func the output PPC code/function 1246 * \param immediates buffer to place immediates, later passed to PPC func 1247 * \return TRUE for success, FALSE if translation failed 1248 */ 1249 boolean 1250 tgsi_emit_ppc(const struct tgsi_token *tokens, 1251 struct ppc_function *func, 1252 float (*immediates)[4], 1253 boolean do_swizzles ) 1254 { 1255 static int use_ppc_asm = -1; 1256 struct tgsi_parse_context parse; 1257 /*boolean instruction_phase = FALSE;*/ 1258 unsigned ok = 1; 1259 uint num_immediates = 0; 1260 struct gen_context gen; 1261 uint ic = 0; 1262 1263 if (use_ppc_asm < 0) { 1264 /* If GALLIUM_NOPPC is set, don't use PPC codegen */ 1265 use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); 1266 } 1267 if (!use_ppc_asm) 1268 return FALSE; 1269 1270 if (0) { 1271 debug_printf("\n********* TGSI->PPC ********\n"); 1272 tgsi_dump(tokens, 0); 1273 } 1274 1275 util_init_math(); 1276 1277 init_gen_context(&gen, func); 1278 1279 emit_prologue(func); 1280 1281 tgsi_parse_init( &parse, tokens ); 1282 1283 while (!tgsi_parse_end_of_tokens(&parse) && ok) { 1284 tgsi_parse_token(&parse); 1285 1286 switch (parse.FullToken.Token.Type) { 1287 case TGSI_TOKEN_TYPE_DECLARATION: 1288 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { 1289 emit_declaration(func, &parse.FullToken.FullDeclaration ); 1290 } 1291 break; 1292 1293 case TGSI_TOKEN_TYPE_INSTRUCTION: 1294 if (func->print) { 1295 _debug_printf("# "); 1296 ic++; 1297 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); 1298 } 1299 1300 ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); 1301 1302 if (!ok) { 1303 uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1304 debug_printf("failed to translate tgsi opcode %d (%s) to PPC (%s)\n", 1305 opcode, 1306 tgsi_get_opcode_name(opcode), 1307 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? 1308 "vertex shader" : "fragment shader"); 1309 } 1310 break; 1311 1312 case TGSI_TOKEN_TYPE_IMMEDIATE: 1313 /* splat each immediate component into a float[4] vector for SoA */ 1314 { 1315 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1316 uint i; 1317 assert(size <= 4); 1318 assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); 1319 for (i = 0; i < size; i++) { 1320 immediates[num_immediates][i] = 1321 parse.FullToken.FullImmediate.u[i].Float; 1322 } 1323 num_immediates++; 1324 } 1325 break; 1326 1327 case TGSI_TOKEN_TYPE_PROPERTY: 1328 break; 1329 1330 default: 1331 ok = 0; 1332 assert( 0 ); 1333 } 1334 } 1335 1336 emit_epilogue(func); 1337 1338 tgsi_parse_free( &parse ); 1339 1340 if (ppc_num_instructions(func) == 0) { 1341 /* ran out of memory for instructions */ 1342 ok = FALSE; 1343 } 1344 1345 if (!ok) 1346 debug_printf("TGSI->PPC translation failed\n"); 1347 1348 return ok; 1349 } 1350 1351 #else 1352 1353 void ppc_dummy_func(void); 1354 1355 void ppc_dummy_func(void) 1356 { 1357 } 1358 1359 #endif /* PIPE_ARCH_PPC */ 1360