1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Shader.hpp" 16 17 #include "VertexShader.hpp" 18 #include "PixelShader.hpp" 19 #include "Math.hpp" 20 #include "Debug.hpp" 21 22 #include <set> 23 #include <fstream> 24 #include <sstream> 25 #include <stdarg.h> 26 27 namespace sw 28 { 29 volatile int Shader::serialCounter = 1; 30 31 Shader::Opcode Shader::OPCODE_DP(int i) 32 { 33 switch(i) 34 { 35 default: ASSERT(false); 36 case 1: return OPCODE_DP1; 37 case 2: return OPCODE_DP2; 38 case 3: return OPCODE_DP3; 39 case 4: return OPCODE_DP4; 40 } 41 } 42 43 Shader::Opcode Shader::OPCODE_LEN(int i) 44 { 45 switch(i) 46 { 47 default: ASSERT(false); 48 case 1: return OPCODE_ABS; 49 case 2: return OPCODE_LEN2; 50 case 3: return OPCODE_LEN3; 51 case 4: return OPCODE_LEN4; 52 } 53 } 54 55 Shader::Opcode Shader::OPCODE_DIST(int i) 56 { 57 switch(i) 58 { 59 default: ASSERT(false); 60 case 1: return OPCODE_DIST1; 61 case 2: return OPCODE_DIST2; 62 case 3: return OPCODE_DIST3; 63 case 4: return OPCODE_DIST4; 64 } 65 } 66 67 Shader::Opcode Shader::OPCODE_NRM(int i) 68 { 69 switch(i) 70 { 71 default: ASSERT(false); 72 case 1: return OPCODE_SGN; 73 case 2: return OPCODE_NRM2; 74 case 3: return OPCODE_NRM3; 75 case 4: return OPCODE_NRM4; 76 } 77 } 78 79 Shader::Opcode Shader::OPCODE_FORWARD(int i) 80 { 81 switch(i) 82 { 83 default: ASSERT(false); 84 case 1: return OPCODE_FORWARD1; 85 case 2: return OPCODE_FORWARD2; 86 case 3: return OPCODE_FORWARD3; 87 case 4: return OPCODE_FORWARD4; 88 } 89 } 90 91 Shader::Opcode Shader::OPCODE_REFLECT(int i) 92 { 93 switch(i) 94 { 95 default: ASSERT(false); 96 case 1: return OPCODE_REFLECT1; 97 case 2: return OPCODE_REFLECT2; 98 case 3: return OPCODE_REFLECT3; 99 case 4: return OPCODE_REFLECT4; 100 } 101 } 102 103 Shader::Opcode Shader::OPCODE_REFRACT(int i) 104 { 105 switch(i) 106 { 107 default: ASSERT(false); 108 case 1: return OPCODE_REFRACT1; 109 case 2: return OPCODE_REFRACT2; 110 case 3: return OPCODE_REFRACT3; 111 case 4: return OPCODE_REFRACT4; 112 } 113 } 114 115 Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0) 116 { 117 control = CONTROL_RESERVED0; 118 119 predicate = false; 120 predicateNot = false; 121 predicateSwizzle = 0xE4; 122 123 coissue = false; 124 samplerType = SAMPLER_UNKNOWN; 125 usage = USAGE_POSITION; 126 usageIndex = 0; 127 } 128 129 Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0) 130 { 131 parseOperationToken(*token++, majorVersion); 132 133 samplerType = SAMPLER_UNKNOWN; 134 usage = USAGE_POSITION; 135 usageIndex = 0; 136 137 if(opcode == OPCODE_IF || 138 opcode == OPCODE_IFC || 139 opcode == OPCODE_LOOP || 140 opcode == OPCODE_REP || 141 opcode == OPCODE_BREAKC || 142 opcode == OPCODE_BREAKP) // No destination operand 143 { 144 if(size > 0) parseSourceToken(0, token++, majorVersion); 145 if(size > 1) parseSourceToken(1, token++, majorVersion); 146 if(size > 2) parseSourceToken(2, token++, majorVersion); 147 if(size > 3) ASSERT(false); 148 } 149 else if(opcode == OPCODE_DCL) 150 { 151 parseDeclarationToken(*token++); 152 parseDestinationToken(token++, majorVersion); 153 } 154 else 155 { 156 if(size > 0) 157 { 158 parseDestinationToken(token, majorVersion); 159 160 if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3) 161 { 162 token++; 163 size--; 164 } 165 166 token++; 167 size--; 168 } 169 170 if(predicate) 171 { 172 ASSERT(size != 0); 173 174 predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT; 175 predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16); 176 177 token++; 178 size--; 179 } 180 181 for(int i = 0; size > 0; i++) 182 { 183 parseSourceToken(i, token, majorVersion); 184 185 token++; 186 size--; 187 188 if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2) 189 { 190 token++; 191 size--; 192 } 193 } 194 } 195 } 196 197 Shader::Instruction::~Instruction() 198 { 199 } 200 201 std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const 202 { 203 std::string instructionString; 204 205 if(opcode != OPCODE_DCL) 206 { 207 instructionString += coissue ? "+ " : ""; 208 209 if(predicate) 210 { 211 instructionString += predicateNot ? "(!p0" : "(p0"; 212 instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle); 213 instructionString += ") "; 214 } 215 216 instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString(); 217 218 if(dst.type != PARAMETER_VOID) 219 { 220 instructionString += " " + dst.string(shaderType, version) + 221 dst.relativeString() + 222 dst.maskString(); 223 } 224 225 for(int i = 0; i < 4; i++) 226 { 227 if(src[i].type != PARAMETER_VOID) 228 { 229 instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " "; 230 instructionString += src[i].preModifierString() + 231 src[i].string(shaderType, version) + 232 src[i].relativeString() + 233 src[i].postModifierString() + 234 src[i].swizzleString(); 235 } 236 } 237 } 238 else // DCL 239 { 240 instructionString += "dcl"; 241 242 if(dst.type == PARAMETER_SAMPLER) 243 { 244 switch(samplerType) 245 { 246 case SAMPLER_UNKNOWN: instructionString += " "; break; 247 case SAMPLER_1D: instructionString += "_1d "; break; 248 case SAMPLER_2D: instructionString += "_2d "; break; 249 case SAMPLER_CUBE: instructionString += "_cube "; break; 250 case SAMPLER_VOLUME: instructionString += "_volume "; break; 251 default: 252 ASSERT(false); 253 } 254 255 instructionString += dst.string(shaderType, version); 256 } 257 else if(dst.type == PARAMETER_INPUT || 258 dst.type == PARAMETER_OUTPUT || 259 dst.type == PARAMETER_TEXTURE) 260 { 261 if(version >= 0x0300) 262 { 263 switch(usage) 264 { 265 case USAGE_POSITION: instructionString += "_position"; break; 266 case USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break; 267 case USAGE_BLENDINDICES: instructionString += "_blendindices"; break; 268 case USAGE_NORMAL: instructionString += "_normal"; break; 269 case USAGE_PSIZE: instructionString += "_psize"; break; 270 case USAGE_TEXCOORD: instructionString += "_texcoord"; break; 271 case USAGE_TANGENT: instructionString += "_tangent"; break; 272 case USAGE_BINORMAL: instructionString += "_binormal"; break; 273 case USAGE_TESSFACTOR: instructionString += "_tessfactor"; break; 274 case USAGE_POSITIONT: instructionString += "_positiont"; break; 275 case USAGE_COLOR: instructionString += "_color"; break; 276 case USAGE_FOG: instructionString += "_fog"; break; 277 case USAGE_DEPTH: instructionString += "_depth"; break; 278 case USAGE_SAMPLE: instructionString += "_sample"; break; 279 default: 280 ASSERT(false); 281 } 282 283 if(usageIndex > 0) 284 { 285 std::ostringstream buffer; 286 287 buffer << (int)usageIndex; 288 289 instructionString += buffer.str(); 290 } 291 } 292 else ASSERT(dst.type != PARAMETER_OUTPUT); 293 294 instructionString += " "; 295 296 instructionString += dst.string(shaderType, version); 297 instructionString += dst.maskString(); 298 } 299 else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace 300 { 301 instructionString += " "; 302 303 instructionString += dst.string(shaderType, version); 304 } 305 else ASSERT(false); 306 } 307 308 return instructionString; 309 } 310 311 std::string Shader::DestinationParameter::modifierString() const 312 { 313 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 314 { 315 return ""; 316 } 317 318 std::string modifierString; 319 320 if(integer) 321 { 322 modifierString += "_int"; 323 } 324 325 if(saturate) 326 { 327 modifierString += "_sat"; 328 } 329 330 if(partialPrecision) 331 { 332 modifierString += "_pp"; 333 } 334 335 if(centroid) 336 { 337 modifierString += "_centroid"; 338 } 339 340 return modifierString; 341 } 342 343 std::string Shader::DestinationParameter::shiftString() const 344 { 345 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 346 { 347 return ""; 348 } 349 350 switch(shift) 351 { 352 case 0: return ""; 353 case 1: return "_x2"; 354 case 2: return "_x4"; 355 case 3: return "_x8"; 356 case -1: return "_d2"; 357 case -2: return "_d4"; 358 case -3: return "_d8"; 359 default: 360 return ""; 361 // ASSERT(false); // FIXME 362 } 363 } 364 365 std::string Shader::DestinationParameter::maskString() const 366 { 367 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 368 { 369 return ""; 370 } 371 372 switch(mask) 373 { 374 case 0x0: return ""; 375 case 0x1: return ".x"; 376 case 0x2: return ".y"; 377 case 0x3: return ".xy"; 378 case 0x4: return ".z"; 379 case 0x5: return ".xz"; 380 case 0x6: return ".yz"; 381 case 0x7: return ".xyz"; 382 case 0x8: return ".w"; 383 case 0x9: return ".xw"; 384 case 0xA: return ".yw"; 385 case 0xB: return ".xyw"; 386 case 0xC: return ".zw"; 387 case 0xD: return ".xzw"; 388 case 0xE: return ".yzw"; 389 case 0xF: return ""; 390 default: 391 ASSERT(false); 392 } 393 394 return ""; 395 } 396 397 std::string Shader::SourceParameter::preModifierString() const 398 { 399 if(type == PARAMETER_VOID) 400 { 401 return ""; 402 } 403 404 switch(modifier) 405 { 406 case MODIFIER_NONE: return ""; 407 case MODIFIER_NEGATE: return "-"; 408 case MODIFIER_BIAS: return ""; 409 case MODIFIER_BIAS_NEGATE: return "-"; 410 case MODIFIER_SIGN: return ""; 411 case MODIFIER_SIGN_NEGATE: return "-"; 412 case MODIFIER_COMPLEMENT: return "1-"; 413 case MODIFIER_X2: return ""; 414 case MODIFIER_X2_NEGATE: return "-"; 415 case MODIFIER_DZ: return ""; 416 case MODIFIER_DW: return ""; 417 case MODIFIER_ABS: return ""; 418 case MODIFIER_ABS_NEGATE: return "-"; 419 case MODIFIER_NOT: return "!"; 420 default: 421 ASSERT(false); 422 } 423 424 return ""; 425 } 426 427 std::string Shader::Parameter::relativeString() const 428 { 429 if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP) 430 { 431 if(rel.type == PARAMETER_VOID) 432 { 433 return ""; 434 } 435 else if(rel.type == PARAMETER_ADDR) 436 { 437 switch(rel.swizzle & 0x03) 438 { 439 case 0: return "[a0.x]"; 440 case 1: return "[a0.y]"; 441 case 2: return "[a0.z]"; 442 case 3: return "[a0.w]"; 443 } 444 } 445 else if(rel.type == PARAMETER_TEMP) 446 { 447 std::ostringstream buffer; 448 buffer << rel.index; 449 450 switch(rel.swizzle & 0x03) 451 { 452 case 0: return "[r" + buffer.str() + ".x]"; 453 case 1: return "[r" + buffer.str() + ".y]"; 454 case 2: return "[r" + buffer.str() + ".z]"; 455 case 3: return "[r" + buffer.str() + ".w]"; 456 } 457 } 458 else if(rel.type == PARAMETER_LOOP) 459 { 460 return "[aL]"; 461 } 462 else if(rel.type == PARAMETER_CONST) 463 { 464 std::ostringstream buffer; 465 buffer << rel.index; 466 467 switch(rel.swizzle & 0x03) 468 { 469 case 0: return "[c" + buffer.str() + ".x]"; 470 case 1: return "[c" + buffer.str() + ".y]"; 471 case 2: return "[c" + buffer.str() + ".z]"; 472 case 3: return "[c" + buffer.str() + ".w]"; 473 } 474 } 475 else ASSERT(false); 476 } 477 478 return ""; 479 } 480 481 std::string Shader::SourceParameter::postModifierString() const 482 { 483 if(type == PARAMETER_VOID) 484 { 485 return ""; 486 } 487 488 switch(modifier) 489 { 490 case MODIFIER_NONE: return ""; 491 case MODIFIER_NEGATE: return ""; 492 case MODIFIER_BIAS: return "_bias"; 493 case MODIFIER_BIAS_NEGATE: return "_bias"; 494 case MODIFIER_SIGN: return "_bx2"; 495 case MODIFIER_SIGN_NEGATE: return "_bx2"; 496 case MODIFIER_COMPLEMENT: return ""; 497 case MODIFIER_X2: return "_x2"; 498 case MODIFIER_X2_NEGATE: return "_x2"; 499 case MODIFIER_DZ: return "_dz"; 500 case MODIFIER_DW: return "_dw"; 501 case MODIFIER_ABS: return "_abs"; 502 case MODIFIER_ABS_NEGATE: return "_abs"; 503 case MODIFIER_NOT: return ""; 504 default: 505 ASSERT(false); 506 } 507 508 return ""; 509 } 510 511 std::string Shader::SourceParameter::swizzleString() const 512 { 513 return Instruction::swizzleString(type, swizzle); 514 } 515 516 void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion) 517 { 518 if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token 519 { 520 opcode = (Opcode)token; 521 522 control = CONTROL_RESERVED0; 523 predicate = false; 524 coissue = false; 525 } 526 else 527 { 528 opcode = (Opcode)(token & 0x0000FFFF); 529 control = (Control)((token & 0x00FF0000) >> 16); 530 531 int size = (token & 0x0F000000) >> 24; 532 533 predicate = (token & 0x10000000) != 0x00000000; 534 coissue = (token & 0x40000000) != 0x00000000; 535 536 if(majorVersion < 2) 537 { 538 if(size != 0) 539 { 540 ASSERT(false); // Reserved 541 } 542 } 543 544 if(majorVersion < 2) 545 { 546 if(predicate) 547 { 548 ASSERT(false); 549 } 550 } 551 552 if((token & 0x20000000) != 0x00000000) 553 { 554 ASSERT(false); // Reserved 555 } 556 557 if(majorVersion >= 2) 558 { 559 if(coissue) 560 { 561 ASSERT(false); // Reserved 562 } 563 } 564 565 if((token & 0x80000000) != 0x00000000) 566 { 567 ASSERT(false); 568 } 569 } 570 } 571 572 void Shader::Instruction::parseDeclarationToken(unsigned long token) 573 { 574 samplerType = (SamplerType)((token & 0x78000000) >> 27); 575 usage = (Usage)(token & 0x0000001F); 576 usageIndex = (unsigned char)((token & 0x000F0000) >> 16); 577 } 578 579 void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion) 580 { 581 dst.index = (unsigned short)(token[0] & 0x000007FF); 582 dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); 583 584 // TODO: Check type and index range 585 586 bool relative = (token[0] & 0x00002000) != 0x00000000; 587 dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; 588 dst.rel.swizzle = 0x00; 589 dst.rel.scale = 1; 590 591 if(relative && majorVersion >= 3) 592 { 593 dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); 594 dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); 595 } 596 else if(relative) ASSERT(false); // Reserved 597 598 if((token[0] & 0x0000C000) != 0x00000000) 599 { 600 ASSERT(false); // Reserved 601 } 602 603 dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16); 604 dst.saturate = (token[0] & 0x00100000) != 0; 605 dst.partialPrecision = (token[0] & 0x00200000) != 0; 606 dst.centroid = (token[0] & 0x00400000) != 0; 607 dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4; 608 609 if(majorVersion >= 2) 610 { 611 if(dst.shift) 612 { 613 ASSERT(false); // Reserved 614 } 615 } 616 617 if((token[0] & 0x80000000) != 0x80000000) 618 { 619 ASSERT(false); 620 } 621 } 622 623 void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion) 624 { 625 // Defaults 626 src[i].index = 0; 627 src[i].type = PARAMETER_VOID; 628 src[i].modifier = MODIFIER_NONE; 629 src[i].swizzle = 0xE4; 630 src[i].rel.type = PARAMETER_VOID; 631 src[i].rel.swizzle = 0x00; 632 src[i].rel.scale = 1; 633 634 switch(opcode) 635 { 636 case OPCODE_DEF: 637 src[0].type = PARAMETER_FLOAT4LITERAL; 638 src[0].value[i] = *(float*)token; 639 break; 640 case OPCODE_DEFB: 641 src[0].type = PARAMETER_BOOL1LITERAL; 642 src[0].boolean[0] = *(int*)token; 643 break; 644 case OPCODE_DEFI: 645 src[0].type = PARAMETER_INT4LITERAL; 646 src[0].integer[i] = *(int*)token; 647 break; 648 default: 649 src[i].index = (unsigned short)(token[0] & 0x000007FF); 650 src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); 651 652 // FIXME: Check type and index range 653 654 bool relative = (token[0] & 0x00002000) != 0x00000000; 655 src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; 656 657 if((token[0] & 0x0000C000) != 0x00000000) 658 { 659 if(opcode != OPCODE_DEF && 660 opcode != OPCODE_DEFI && 661 opcode != OPCODE_DEFB) 662 { 663 ASSERT(false); 664 } 665 } 666 667 src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16); 668 src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24); 669 670 if((token[0] & 0x80000000) != 0x80000000) 671 { 672 if(opcode != OPCODE_DEF && 673 opcode != OPCODE_DEFI && 674 opcode != OPCODE_DEFB) 675 { 676 ASSERT(false); 677 } 678 } 679 680 if(relative && majorVersion >= 2) 681 { 682 src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); 683 src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); 684 } 685 } 686 } 687 688 std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle) 689 { 690 if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4) 691 { 692 return ""; 693 } 694 695 int x = (swizzle & 0x03) >> 0; 696 int y = (swizzle & 0x0C) >> 2; 697 int z = (swizzle & 0x30) >> 4; 698 int w = (swizzle & 0xC0) >> 6; 699 700 std::string swizzleString = "."; 701 702 switch(x) 703 { 704 case 0: swizzleString += "x"; break; 705 case 1: swizzleString += "y"; break; 706 case 2: swizzleString += "z"; break; 707 case 3: swizzleString += "w"; break; 708 } 709 710 if(!(x == y && y == z && z == w)) 711 { 712 switch(y) 713 { 714 case 0: swizzleString += "x"; break; 715 case 1: swizzleString += "y"; break; 716 case 2: swizzleString += "z"; break; 717 case 3: swizzleString += "w"; break; 718 } 719 720 if(!(y == z && z == w)) 721 { 722 switch(z) 723 { 724 case 0: swizzleString += "x"; break; 725 case 1: swizzleString += "y"; break; 726 case 2: swizzleString += "z"; break; 727 case 3: swizzleString += "w"; break; 728 } 729 730 if(!(z == w)) 731 { 732 switch(w) 733 { 734 case 0: swizzleString += "x"; break; 735 case 1: swizzleString += "y"; break; 736 case 2: swizzleString += "z"; break; 737 case 3: swizzleString += "w"; break; 738 } 739 } 740 } 741 } 742 743 return swizzleString; 744 } 745 746 std::string Shader::Instruction::operationString(unsigned short version) const 747 { 748 switch(opcode) 749 { 750 case OPCODE_NULL: return "null"; 751 case OPCODE_NOP: return "nop"; 752 case OPCODE_MOV: return "mov"; 753 case OPCODE_ADD: return "add"; 754 case OPCODE_IADD: return "iadd"; 755 case OPCODE_SUB: return "sub"; 756 case OPCODE_ISUB: return "isub"; 757 case OPCODE_MAD: return "mad"; 758 case OPCODE_IMAD: return "imad"; 759 case OPCODE_MUL: return "mul"; 760 case OPCODE_IMUL: return "imul"; 761 case OPCODE_RCPX: return "rcpx"; 762 case OPCODE_DIV: return "div"; 763 case OPCODE_IDIV: return "idiv"; 764 case OPCODE_UDIV: return "udiv"; 765 case OPCODE_MOD: return "mod"; 766 case OPCODE_IMOD: return "imod"; 767 case OPCODE_UMOD: return "umod"; 768 case OPCODE_SHL: return "shl"; 769 case OPCODE_ISHR: return "ishr"; 770 case OPCODE_USHR: return "ushr"; 771 case OPCODE_RSQX: return "rsqx"; 772 case OPCODE_SQRT: return "sqrt"; 773 case OPCODE_RSQ: return "rsq"; 774 case OPCODE_LEN2: return "len2"; 775 case OPCODE_LEN3: return "len3"; 776 case OPCODE_LEN4: return "len4"; 777 case OPCODE_DIST1: return "dist1"; 778 case OPCODE_DIST2: return "dist2"; 779 case OPCODE_DIST3: return "dist3"; 780 case OPCODE_DIST4: return "dist4"; 781 case OPCODE_DP3: return "dp3"; 782 case OPCODE_DP4: return "dp4"; 783 case OPCODE_DET2: return "det2"; 784 case OPCODE_DET3: return "det3"; 785 case OPCODE_DET4: return "det4"; 786 case OPCODE_MIN: return "min"; 787 case OPCODE_IMIN: return "imin"; 788 case OPCODE_UMIN: return "umin"; 789 case OPCODE_MAX: return "max"; 790 case OPCODE_IMAX: return "imax"; 791 case OPCODE_UMAX: return "umax"; 792 case OPCODE_SLT: return "slt"; 793 case OPCODE_SGE: return "sge"; 794 case OPCODE_EXP2X: return "exp2x"; 795 case OPCODE_LOG2X: return "log2x"; 796 case OPCODE_LIT: return "lit"; 797 case OPCODE_ATT: return "att"; 798 case OPCODE_LRP: return "lrp"; 799 case OPCODE_STEP: return "step"; 800 case OPCODE_SMOOTH: return "smooth"; 801 case OPCODE_FLOATBITSTOINT: return "floatBitsToInt"; 802 case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt"; 803 case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat"; 804 case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat"; 805 case OPCODE_PACKSNORM2x16: return "packSnorm2x16"; 806 case OPCODE_PACKUNORM2x16: return "packUnorm2x16"; 807 case OPCODE_PACKHALF2x16: return "packHalf2x16"; 808 case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16"; 809 case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16"; 810 case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16"; 811 case OPCODE_FRC: return "frc"; 812 case OPCODE_M4X4: return "m4x4"; 813 case OPCODE_M4X3: return "m4x3"; 814 case OPCODE_M3X4: return "m3x4"; 815 case OPCODE_M3X3: return "m3x3"; 816 case OPCODE_M3X2: return "m3x2"; 817 case OPCODE_CALL: return "call"; 818 case OPCODE_CALLNZ: return "callnz"; 819 case OPCODE_LOOP: return "loop"; 820 case OPCODE_RET: return "ret"; 821 case OPCODE_ENDLOOP: return "endloop"; 822 case OPCODE_LABEL: return "label"; 823 case OPCODE_DCL: return "dcl"; 824 case OPCODE_POWX: return "powx"; 825 case OPCODE_CRS: return "crs"; 826 case OPCODE_SGN: return "sgn"; 827 case OPCODE_ISGN: return "isgn"; 828 case OPCODE_ABS: return "abs"; 829 case OPCODE_IABS: return "iabs"; 830 case OPCODE_NRM2: return "nrm2"; 831 case OPCODE_NRM3: return "nrm3"; 832 case OPCODE_NRM4: return "nrm4"; 833 case OPCODE_SINCOS: return "sincos"; 834 case OPCODE_REP: return "rep"; 835 case OPCODE_ENDREP: return "endrep"; 836 case OPCODE_IF: return "if"; 837 case OPCODE_IFC: return "ifc"; 838 case OPCODE_ELSE: return "else"; 839 case OPCODE_ENDIF: return "endif"; 840 case OPCODE_BREAK: return "break"; 841 case OPCODE_BREAKC: return "breakc"; 842 case OPCODE_MOVA: return "mova"; 843 case OPCODE_DEFB: return "defb"; 844 case OPCODE_DEFI: return "defi"; 845 case OPCODE_TEXCOORD: return "texcoord"; 846 case OPCODE_TEXKILL: return "texkill"; 847 case OPCODE_DISCARD: return "discard"; 848 case OPCODE_TEX: 849 if(version < 0x0104) return "tex"; 850 else return "texld"; 851 case OPCODE_TEXBEM: return "texbem"; 852 case OPCODE_TEXBEML: return "texbeml"; 853 case OPCODE_TEXREG2AR: return "texreg2ar"; 854 case OPCODE_TEXREG2GB: return "texreg2gb"; 855 case OPCODE_TEXM3X2PAD: return "texm3x2pad"; 856 case OPCODE_TEXM3X2TEX: return "texm3x2tex"; 857 case OPCODE_TEXM3X3PAD: return "texm3x3pad"; 858 case OPCODE_TEXM3X3TEX: return "texm3x3tex"; 859 case OPCODE_RESERVED0: return "reserved0"; 860 case OPCODE_TEXM3X3SPEC: return "texm3x3spec"; 861 case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec"; 862 case OPCODE_EXPP: return "expp"; 863 case OPCODE_LOGP: return "logp"; 864 case OPCODE_CND: return "cnd"; 865 case OPCODE_DEF: return "def"; 866 case OPCODE_TEXREG2RGB: return "texreg2rgb"; 867 case OPCODE_TEXDP3TEX: return "texdp3tex"; 868 case OPCODE_TEXM3X2DEPTH: return "texm3x2depth"; 869 case OPCODE_TEXDP3: return "texdp3"; 870 case OPCODE_TEXM3X3: return "texm3x3"; 871 case OPCODE_TEXDEPTH: return "texdepth"; 872 case OPCODE_CMP0: return "cmp0"; 873 case OPCODE_ICMP: return "icmp"; 874 case OPCODE_UCMP: return "ucmp"; 875 case OPCODE_SELECT: return "select"; 876 case OPCODE_EXTRACT: return "extract"; 877 case OPCODE_INSERT: return "insert"; 878 case OPCODE_BEM: return "bem"; 879 case OPCODE_DP2ADD: return "dp2add"; 880 case OPCODE_DFDX: return "dFdx"; 881 case OPCODE_DFDY: return "dFdy"; 882 case OPCODE_FWIDTH: return "fwidth"; 883 case OPCODE_TEXLDD: return "texldd"; 884 case OPCODE_CMP: return "cmp"; 885 case OPCODE_TEXLDL: return "texldl"; 886 case OPCODE_TEXOFFSET: return "texoffset"; 887 case OPCODE_TEXLDLOFFSET: return "texldloffset"; 888 case OPCODE_TEXELFETCH: return "texelfetch"; 889 case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset"; 890 case OPCODE_TEXGRAD: return "texgrad"; 891 case OPCODE_TEXGRADOFFSET: return "texgradoffset"; 892 case OPCODE_BREAKP: return "breakp"; 893 case OPCODE_TEXSIZE: return "texsize"; 894 case OPCODE_PHASE: return "phase"; 895 case OPCODE_COMMENT: return "comment"; 896 case OPCODE_END: return "end"; 897 case OPCODE_PS_1_0: return "ps_1_0"; 898 case OPCODE_PS_1_1: return "ps_1_1"; 899 case OPCODE_PS_1_2: return "ps_1_2"; 900 case OPCODE_PS_1_3: return "ps_1_3"; 901 case OPCODE_PS_1_4: return "ps_1_4"; 902 case OPCODE_PS_2_0: return "ps_2_0"; 903 case OPCODE_PS_2_x: return "ps_2_x"; 904 case OPCODE_PS_3_0: return "ps_3_0"; 905 case OPCODE_VS_1_0: return "vs_1_0"; 906 case OPCODE_VS_1_1: return "vs_1_1"; 907 case OPCODE_VS_2_0: return "vs_2_0"; 908 case OPCODE_VS_2_x: return "vs_2_x"; 909 case OPCODE_VS_2_sw: return "vs_2_sw"; 910 case OPCODE_VS_3_0: return "vs_3_0"; 911 case OPCODE_VS_3_sw: return "vs_3_sw"; 912 case OPCODE_WHILE: return "while"; 913 case OPCODE_ENDWHILE: return "endwhile"; 914 case OPCODE_COS: return "cos"; 915 case OPCODE_SIN: return "sin"; 916 case OPCODE_TAN: return "tan"; 917 case OPCODE_ACOS: return "acos"; 918 case OPCODE_ASIN: return "asin"; 919 case OPCODE_ATAN: return "atan"; 920 case OPCODE_ATAN2: return "atan2"; 921 case OPCODE_COSH: return "cosh"; 922 case OPCODE_SINH: return "sinh"; 923 case OPCODE_TANH: return "tanh"; 924 case OPCODE_ACOSH: return "acosh"; 925 case OPCODE_ASINH: return "asinh"; 926 case OPCODE_ATANH: return "atanh"; 927 case OPCODE_DP1: return "dp1"; 928 case OPCODE_DP2: return "dp2"; 929 case OPCODE_TRUNC: return "trunc"; 930 case OPCODE_FLOOR: return "floor"; 931 case OPCODE_ROUND: return "round"; 932 case OPCODE_ROUNDEVEN: return "roundEven"; 933 case OPCODE_CEIL: return "ceil"; 934 case OPCODE_EXP2: return "exp2"; 935 case OPCODE_LOG2: return "log2"; 936 case OPCODE_EXP: return "exp"; 937 case OPCODE_LOG: return "log"; 938 case OPCODE_POW: return "pow"; 939 case OPCODE_F2B: return "f2b"; 940 case OPCODE_B2F: return "b2f"; 941 case OPCODE_F2I: return "f2i"; 942 case OPCODE_I2F: return "i2f"; 943 case OPCODE_F2U: return "f2u"; 944 case OPCODE_U2F: return "u2f"; 945 case OPCODE_B2I: return "b2i"; 946 case OPCODE_I2B: return "i2b"; 947 case OPCODE_ALL: return "all"; 948 case OPCODE_ANY: return "any"; 949 case OPCODE_NEG: return "neg"; 950 case OPCODE_INEG: return "ineg"; 951 case OPCODE_ISNAN: return "isnan"; 952 case OPCODE_ISINF: return "isinf"; 953 case OPCODE_NOT: return "not"; 954 case OPCODE_OR: return "or"; 955 case OPCODE_XOR: return "xor"; 956 case OPCODE_AND: return "and"; 957 case OPCODE_EQ: return "eq"; 958 case OPCODE_NE: return "neq"; 959 case OPCODE_FORWARD1: return "forward1"; 960 case OPCODE_FORWARD2: return "forward2"; 961 case OPCODE_FORWARD3: return "forward3"; 962 case OPCODE_FORWARD4: return "forward4"; 963 case OPCODE_REFLECT1: return "reflect1"; 964 case OPCODE_REFLECT2: return "reflect2"; 965 case OPCODE_REFLECT3: return "reflect3"; 966 case OPCODE_REFLECT4: return "reflect4"; 967 case OPCODE_REFRACT1: return "refract1"; 968 case OPCODE_REFRACT2: return "refract2"; 969 case OPCODE_REFRACT3: return "refract3"; 970 case OPCODE_REFRACT4: return "refract4"; 971 case OPCODE_LEAVE: return "leave"; 972 case OPCODE_CONTINUE: return "continue"; 973 case OPCODE_TEST: return "test"; 974 case OPCODE_SWITCH: return "switch"; 975 case OPCODE_ENDSWITCH: return "endswitch"; 976 default: 977 ASSERT(false); 978 } 979 980 return "<unknown>"; 981 } 982 983 std::string Shader::Instruction::controlString() const 984 { 985 if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP) 986 { 987 if(project) return "p"; 988 989 if(bias) return "b"; 990 991 // FIXME: LOD 992 } 993 994 switch(control) 995 { 996 case 1: return "_gt"; 997 case 2: return "_eq"; 998 case 3: return "_ge"; 999 case 4: return "_lt"; 1000 case 5: return "_ne"; 1001 case 6: return "_le"; 1002 default: 1003 return ""; 1004 // ASSERT(false); // FIXME 1005 } 1006 } 1007 1008 std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const 1009 { 1010 std::ostringstream buffer; 1011 1012 if(type == PARAMETER_FLOAT4LITERAL) 1013 { 1014 buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}'; 1015 1016 return buffer.str(); 1017 } 1018 else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE) 1019 { 1020 buffer << index; 1021 1022 return typeString(shaderType, version) + buffer.str(); 1023 } 1024 else 1025 { 1026 return typeString(shaderType, version); 1027 } 1028 } 1029 1030 std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const 1031 { 1032 switch(type) 1033 { 1034 case PARAMETER_TEMP: return "r"; 1035 case PARAMETER_INPUT: return "v"; 1036 case PARAMETER_CONST: return "c"; 1037 case PARAMETER_TEXTURE: 1038 // case PARAMETER_ADDR: 1039 if(shaderType == SHADER_PIXEL) return "t"; 1040 else return "a0"; 1041 case PARAMETER_RASTOUT: 1042 if(index == 0) return "oPos"; 1043 else if(index == 1) return "oFog"; 1044 else if(index == 2) return "oPts"; 1045 else ASSERT(false); 1046 case PARAMETER_ATTROUT: return "oD"; 1047 case PARAMETER_TEXCRDOUT: 1048 // case PARAMETER_OUTPUT: return ""; 1049 if(version < 0x0300) return "oT"; 1050 else return "o"; 1051 case PARAMETER_CONSTINT: return "i"; 1052 case PARAMETER_COLOROUT: return "oC"; 1053 case PARAMETER_DEPTHOUT: return "oDepth"; 1054 case PARAMETER_SAMPLER: return "s"; 1055 // case PARAMETER_CONST2: return ""; 1056 // case PARAMETER_CONST3: return ""; 1057 // case PARAMETER_CONST4: return ""; 1058 case PARAMETER_CONSTBOOL: return "b"; 1059 case PARAMETER_LOOP: return "aL"; 1060 // case PARAMETER_TEMPFLOAT16: return ""; 1061 case PARAMETER_MISCTYPE: 1062 if(index == 0) return "vPos"; 1063 else if(index == 1) return "vFace"; 1064 else ASSERT(false); 1065 case PARAMETER_LABEL: return "l"; 1066 case PARAMETER_PREDICATE: return "p0"; 1067 case PARAMETER_FLOAT4LITERAL: return ""; 1068 case PARAMETER_BOOL1LITERAL: return ""; 1069 case PARAMETER_INT4LITERAL: return ""; 1070 // case PARAMETER_VOID: return ""; 1071 default: 1072 ASSERT(false); 1073 } 1074 1075 return ""; 1076 } 1077 1078 bool Shader::Instruction::isBranch() const 1079 { 1080 return opcode == OPCODE_IF || opcode == OPCODE_IFC; 1081 } 1082 1083 bool Shader::Instruction::isCall() const 1084 { 1085 return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ; 1086 } 1087 1088 bool Shader::Instruction::isBreak() const 1089 { 1090 return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP; 1091 } 1092 1093 bool Shader::Instruction::isLoopOrSwitch() const 1094 { 1095 return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE || opcode == OPCODE_SWITCH; 1096 } 1097 1098 bool Shader::Instruction::isEndLoopOrSwitch() const 1099 { 1100 return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE || opcode == OPCODE_ENDSWITCH;; 1101 } 1102 1103 bool Shader::Instruction::isPredicated() const 1104 { 1105 return predicate || 1106 analysisBranch || 1107 analysisBreak || 1108 analysisContinue || 1109 analysisLeave; 1110 } 1111 1112 Shader::Shader() : serialID(serialCounter++) 1113 { 1114 usedSamplers = 0; 1115 } 1116 1117 Shader::~Shader() 1118 { 1119 for(unsigned int i = 0; i < instruction.size(); i++) 1120 { 1121 delete instruction[i]; 1122 instruction[i] = 0; 1123 } 1124 } 1125 1126 void Shader::parse(const unsigned long *token) 1127 { 1128 minorVersion = (unsigned char)(token[0] & 0x000000FF); 1129 majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8); 1130 shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16); 1131 1132 int length = 0; 1133 1134 if(shaderType == SHADER_VERTEX) 1135 { 1136 length = VertexShader::validate(token); 1137 } 1138 else if(shaderType == SHADER_PIXEL) 1139 { 1140 length = PixelShader::validate(token); 1141 } 1142 else ASSERT(false); 1143 1144 ASSERT(length != 0); 1145 instruction.resize(length); 1146 1147 for(int i = 0; i < length; i++) 1148 { 1149 while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token 1150 { 1151 int length = (*token & 0x7FFF0000) >> 16; 1152 1153 token += length + 1; 1154 } 1155 1156 int tokenCount = size(*token); 1157 1158 instruction[i] = new Instruction(token, tokenCount, majorVersion); 1159 1160 token += 1 + tokenCount; 1161 } 1162 } 1163 1164 int Shader::size(unsigned long opcode) const 1165 { 1166 return size(opcode, version); 1167 } 1168 1169 int Shader::size(unsigned long opcode, unsigned short version) 1170 { 1171 if(version > 0x0300) 1172 { 1173 ASSERT(false); 1174 } 1175 1176 static const char size[] = 1177 { 1178 0, // NOP = 0 1179 2, // MOV 1180 3, // ADD 1181 3, // SUB 1182 4, // MAD 1183 3, // MUL 1184 2, // RCP 1185 2, // RSQ 1186 3, // DP3 1187 3, // DP4 1188 3, // MIN 1189 3, // MAX 1190 3, // SLT 1191 3, // SGE 1192 2, // EXP 1193 2, // LOG 1194 2, // LIT 1195 3, // DST 1196 4, // LRP 1197 2, // FRC 1198 3, // M4x4 1199 3, // M4x3 1200 3, // M3x4 1201 3, // M3x3 1202 3, // M3x2 1203 1, // CALL 1204 2, // CALLNZ 1205 2, // LOOP 1206 0, // RET 1207 0, // ENDLOOP 1208 1, // LABEL 1209 2, // DCL 1210 3, // POW 1211 3, // CRS 1212 4, // SGN 1213 2, // ABS 1214 2, // NRM 1215 4, // SINCOS 1216 1, // REP 1217 0, // ENDREP 1218 1, // IF 1219 2, // IFC 1220 0, // ELSE 1221 0, // ENDIF 1222 0, // BREAK 1223 2, // BREAKC 1224 2, // MOVA 1225 2, // DEFB 1226 5, // DEFI 1227 -1, // 49 1228 -1, // 50 1229 -1, // 51 1230 -1, // 52 1231 -1, // 53 1232 -1, // 54 1233 -1, // 55 1234 -1, // 56 1235 -1, // 57 1236 -1, // 58 1237 -1, // 59 1238 -1, // 60 1239 -1, // 61 1240 -1, // 62 1241 -1, // 63 1242 1, // TEXCOORD = 64 1243 1, // TEXKILL 1244 1, // TEX 1245 2, // TEXBEM 1246 2, // TEXBEML 1247 2, // TEXREG2AR 1248 2, // TEXREG2GB 1249 2, // TEXM3x2PAD 1250 2, // TEXM3x2TEX 1251 2, // TEXM3x3PAD 1252 2, // TEXM3x3TEX 1253 -1, // RESERVED0 1254 3, // TEXM3x3SPEC 1255 2, // TEXM3x3VSPEC 1256 2, // EXPP 1257 2, // LOGP 1258 4, // CND 1259 5, // DEF 1260 2, // TEXREG2RGB 1261 2, // TEXDP3TEX 1262 2, // TEXM3x2DEPTH 1263 2, // TEXDP3 1264 2, // TEXM3x3 1265 1, // TEXDEPTH 1266 4, // CMP 1267 3, // BEM 1268 4, // DP2ADD 1269 2, // DSX 1270 2, // DSY 1271 5, // TEXLDD 1272 3, // SETP 1273 3, // TEXLDL 1274 2, // BREAKP 1275 -1, // 97 1276 -1, // 98 1277 -1, // 99 1278 -1, // 100 1279 -1, // 101 1280 -1, // 102 1281 -1, // 103 1282 -1, // 104 1283 -1, // 105 1284 -1, // 106 1285 -1, // 107 1286 -1, // 108 1287 -1, // 109 1288 -1, // 110 1289 -1, // 111 1290 -1, // 112 1291 }; 1292 1293 int length = 0; 1294 1295 if((opcode & 0x0000FFFF) == OPCODE_COMMENT) 1296 { 1297 return (opcode & 0x7FFF0000) >> 16; 1298 } 1299 1300 if(opcode != OPCODE_PS_1_0 && 1301 opcode != OPCODE_PS_1_1 && 1302 opcode != OPCODE_PS_1_2 && 1303 opcode != OPCODE_PS_1_3 && 1304 opcode != OPCODE_PS_1_4 && 1305 opcode != OPCODE_PS_2_0 && 1306 opcode != OPCODE_PS_2_x && 1307 opcode != OPCODE_PS_3_0 && 1308 opcode != OPCODE_VS_1_0 && 1309 opcode != OPCODE_VS_1_1 && 1310 opcode != OPCODE_VS_2_0 && 1311 opcode != OPCODE_VS_2_x && 1312 opcode != OPCODE_VS_2_sw && 1313 opcode != OPCODE_VS_3_0 && 1314 opcode != OPCODE_VS_3_sw && 1315 opcode != OPCODE_PHASE && 1316 opcode != OPCODE_END) 1317 { 1318 if(version >= 0x0200) 1319 { 1320 length = (opcode & 0x0F000000) >> 24; 1321 } 1322 else 1323 { 1324 length = size[opcode & 0x0000FFFF]; 1325 } 1326 } 1327 1328 if(length < 0) 1329 { 1330 ASSERT(false); 1331 } 1332 1333 if(version == 0x0104) 1334 { 1335 switch(opcode & 0x0000FFFF) 1336 { 1337 case OPCODE_TEX: 1338 length += 1; 1339 break; 1340 case OPCODE_TEXCOORD: 1341 length += 1; 1342 break; 1343 default: 1344 break; 1345 } 1346 } 1347 1348 return length; 1349 } 1350 1351 bool Shader::maskContainsComponent(int mask, int component) 1352 { 1353 return (mask & (1 << component)) != 0; 1354 } 1355 1356 bool Shader::swizzleContainsComponent(int swizzle, int component) 1357 { 1358 if((swizzle & 0x03) >> 0 == component) return true; 1359 if((swizzle & 0x0C) >> 2 == component) return true; 1360 if((swizzle & 0x30) >> 4 == component) return true; 1361 if((swizzle & 0xC0) >> 6 == component) return true; 1362 1363 return false; 1364 } 1365 1366 bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask) 1367 { 1368 if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true; 1369 if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true; 1370 if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true; 1371 if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true; 1372 1373 return false; 1374 } 1375 1376 bool Shader::containsDynamicBranching() const 1377 { 1378 return dynamicBranching; 1379 } 1380 1381 bool Shader::containsBreakInstruction() const 1382 { 1383 return containsBreak; 1384 } 1385 1386 bool Shader::containsContinueInstruction() const 1387 { 1388 return containsContinue; 1389 } 1390 1391 bool Shader::containsLeaveInstruction() const 1392 { 1393 return containsLeave; 1394 } 1395 1396 bool Shader::containsDefineInstruction() const 1397 { 1398 return containsDefine; 1399 } 1400 1401 bool Shader::usesSampler(int index) const 1402 { 1403 return (usedSamplers & (1 << index)) != 0; 1404 } 1405 1406 int Shader::getSerialID() const 1407 { 1408 return serialID; 1409 } 1410 1411 size_t Shader::getLength() const 1412 { 1413 return instruction.size(); 1414 } 1415 1416 Shader::ShaderType Shader::getShaderType() const 1417 { 1418 return shaderType; 1419 } 1420 1421 unsigned short Shader::getVersion() const 1422 { 1423 return version; 1424 } 1425 1426 void Shader::print(const char *fileName, ...) const 1427 { 1428 char fullName[1024 + 1]; 1429 1430 va_list vararg; 1431 va_start(vararg, fileName); 1432 vsnprintf(fullName, 1024, fileName, vararg); 1433 va_end(vararg); 1434 1435 std::ofstream file(fullName, std::ofstream::out); 1436 1437 for(unsigned int i = 0; i < instruction.size(); i++) 1438 { 1439 file << instruction[i]->string(shaderType, version) << std::endl; 1440 } 1441 } 1442 1443 void Shader::printInstruction(int index, const char *fileName) const 1444 { 1445 std::ofstream file(fileName, std::ofstream::out | std::ofstream::app); 1446 1447 file << instruction[index]->string(shaderType, version) << std::endl; 1448 } 1449 1450 void Shader::append(Instruction *instruction) 1451 { 1452 this->instruction.push_back(instruction); 1453 } 1454 1455 void Shader::declareSampler(int i) 1456 { 1457 usedSamplers |= 1 << i; 1458 } 1459 1460 const Shader::Instruction *Shader::getInstruction(unsigned int i) const 1461 { 1462 ASSERT(i < instruction.size()); 1463 1464 return instruction[i]; 1465 } 1466 1467 void Shader::optimize() 1468 { 1469 optimizeLeave(); 1470 optimizeCall(); 1471 removeNull(); 1472 } 1473 1474 void Shader::optimizeLeave() 1475 { 1476 // A return (leave) right before the end of a function or the shader can be removed 1477 for(unsigned int i = 0; i < instruction.size(); i++) 1478 { 1479 if(instruction[i]->opcode == OPCODE_LEAVE) 1480 { 1481 if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET) 1482 { 1483 instruction[i]->opcode = OPCODE_NULL; 1484 } 1485 } 1486 } 1487 } 1488 1489 void Shader::optimizeCall() 1490 { 1491 // Eliminate uncalled functions 1492 std::set<int> calledFunctions; 1493 bool rescan = true; 1494 1495 while(rescan) 1496 { 1497 calledFunctions.clear(); 1498 rescan = false; 1499 1500 for(unsigned int i = 0; i < instruction.size(); i++) 1501 { 1502 if(instruction[i]->isCall()) 1503 { 1504 calledFunctions.insert(instruction[i]->dst.label); 1505 } 1506 } 1507 1508 if(!calledFunctions.empty()) 1509 { 1510 for(unsigned int i = 0; i < instruction.size(); i++) 1511 { 1512 if(instruction[i]->opcode == OPCODE_LABEL) 1513 { 1514 if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end()) 1515 { 1516 for( ; i < instruction.size(); i++) 1517 { 1518 Opcode oldOpcode = instruction[i]->opcode; 1519 instruction[i]->opcode = OPCODE_NULL; 1520 1521 if(oldOpcode == OPCODE_RET) 1522 { 1523 rescan = true; 1524 break; 1525 } 1526 } 1527 } 1528 } 1529 } 1530 } 1531 } 1532 1533 // Optimize the entry call 1534 if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET) 1535 { 1536 if(calledFunctions.size() == 1) 1537 { 1538 instruction[0]->opcode = OPCODE_NULL; 1539 instruction[1]->opcode = OPCODE_NULL; 1540 1541 for(size_t i = 2; i < instruction.size(); i++) 1542 { 1543 if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET) 1544 { 1545 instruction[i]->opcode = OPCODE_NULL; 1546 } 1547 } 1548 } 1549 } 1550 } 1551 1552 void Shader::removeNull() 1553 { 1554 size_t size = 0; 1555 for(size_t i = 0; i < instruction.size(); i++) 1556 { 1557 if(instruction[i]->opcode != OPCODE_NULL) 1558 { 1559 instruction[size] = instruction[i]; 1560 size++; 1561 } 1562 else 1563 { 1564 delete instruction[i]; 1565 } 1566 } 1567 1568 instruction.resize(size); 1569 } 1570 1571 void Shader::analyzeDirtyConstants() 1572 { 1573 dirtyConstantsF = 0; 1574 dirtyConstantsI = 0; 1575 dirtyConstantsB = 0; 1576 1577 for(unsigned int i = 0; i < instruction.size(); i++) 1578 { 1579 switch(instruction[i]->opcode) 1580 { 1581 case OPCODE_DEF: 1582 if(instruction[i]->dst.index + 1 > dirtyConstantsF) 1583 { 1584 dirtyConstantsF = instruction[i]->dst.index + 1; 1585 } 1586 break; 1587 case OPCODE_DEFI: 1588 if(instruction[i]->dst.index + 1 > dirtyConstantsI) 1589 { 1590 dirtyConstantsI = instruction[i]->dst.index + 1; 1591 } 1592 break; 1593 case OPCODE_DEFB: 1594 if(instruction[i]->dst.index + 1 > dirtyConstantsB) 1595 { 1596 dirtyConstantsB = instruction[i]->dst.index + 1; 1597 } 1598 break; 1599 default: 1600 break; 1601 } 1602 } 1603 } 1604 1605 void Shader::analyzeDynamicBranching() 1606 { 1607 dynamicBranching = false; 1608 containsLeave = false; 1609 containsBreak = false; 1610 containsContinue = false; 1611 containsDefine = false; 1612 1613 // Determine global presence of branching instructions 1614 for(unsigned int i = 0; i < instruction.size(); i++) 1615 { 1616 switch(instruction[i]->opcode) 1617 { 1618 case OPCODE_CALLNZ: 1619 case OPCODE_IF: 1620 case OPCODE_IFC: 1621 case OPCODE_BREAK: 1622 case OPCODE_BREAKC: 1623 case OPCODE_CMP: 1624 case OPCODE_BREAKP: 1625 case OPCODE_LEAVE: 1626 case OPCODE_CONTINUE: 1627 if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL) 1628 { 1629 dynamicBranching = true; 1630 } 1631 1632 if(instruction[i]->opcode == OPCODE_LEAVE) 1633 { 1634 containsLeave = true; 1635 } 1636 1637 if(instruction[i]->isBreak()) 1638 { 1639 containsBreak = true; 1640 } 1641 1642 if(instruction[i]->opcode == OPCODE_CONTINUE) 1643 { 1644 containsContinue = true; 1645 } 1646 case OPCODE_DEF: 1647 case OPCODE_DEFB: 1648 case OPCODE_DEFI: 1649 containsDefine = true; 1650 default: 1651 break; 1652 } 1653 } 1654 1655 // Conservatively determine which instructions are affected by dynamic branching 1656 int branchDepth = 0; 1657 int breakDepth = 0; 1658 int continueDepth = 0; 1659 bool leaveReturn = false; 1660 1661 for(unsigned int i = 0; i < instruction.size(); i++) 1662 { 1663 // If statements 1664 if(instruction[i]->isBranch()) 1665 { 1666 branchDepth++; 1667 } 1668 else if(instruction[i]->opcode == OPCODE_ENDIF) 1669 { 1670 branchDepth--; 1671 } 1672 1673 if(branchDepth > 0) 1674 { 1675 instruction[i]->analysisBranch = true; 1676 1677 if(instruction[i]->isCall()) 1678 { 1679 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); 1680 } 1681 } 1682 1683 // Break statemement 1684 if(instruction[i]->isBreak()) 1685 { 1686 breakDepth++; 1687 } 1688 1689 if(breakDepth > 0) 1690 { 1691 if(instruction[i]->isLoopOrSwitch()) // Nested loop or switch, don't make the end of it disable the break execution mask 1692 { 1693 breakDepth++; 1694 } 1695 else if(instruction[i]->isEndLoopOrSwitch()) 1696 { 1697 breakDepth--; 1698 } 1699 1700 instruction[i]->analysisBreak = true; 1701 1702 if(instruction[i]->isCall()) 1703 { 1704 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); 1705 } 1706 } 1707 1708 // Continue statement 1709 if(instruction[i]->opcode == OPCODE_CONTINUE) 1710 { 1711 continueDepth++; 1712 } 1713 1714 if(continueDepth > 0) 1715 { 1716 if(instruction[i]->isLoopOrSwitch()) // Nested loop or switch, don't make the end of it disable the break execution mask 1717 { 1718 continueDepth++; 1719 } 1720 else if(instruction[i]->isEndLoopOrSwitch()) 1721 { 1722 continueDepth--; 1723 } 1724 1725 instruction[i]->analysisContinue = true; 1726 1727 if(instruction[i]->isCall()) 1728 { 1729 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE); 1730 } 1731 } 1732 1733 // Return (leave) statement 1734 if(instruction[i]->opcode == OPCODE_LEAVE) 1735 { 1736 leaveReturn = true; 1737 } 1738 else if(instruction[i]->opcode == OPCODE_RET) // End of the function 1739 { 1740 leaveReturn = false; 1741 } 1742 1743 if(leaveReturn) 1744 { 1745 instruction[i]->analysisLeave = true; 1746 1747 if(instruction[i]->isCall()) 1748 { 1749 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE); 1750 } 1751 } 1752 } 1753 } 1754 1755 void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag) 1756 { 1757 bool marker = false; 1758 for(unsigned int i = 0; i < instruction.size(); i++) 1759 { 1760 if(!marker) 1761 { 1762 if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel) 1763 { 1764 marker = true; 1765 } 1766 } 1767 else 1768 { 1769 if(instruction[i]->opcode == OPCODE_RET) 1770 { 1771 break; 1772 } 1773 else if(instruction[i]->isCall()) 1774 { 1775 markFunctionAnalysis(instruction[i]->dst.label, flag); 1776 } 1777 1778 instruction[i]->analysis |= flag; 1779 } 1780 } 1781 } 1782 1783 void Shader::analyzeSamplers() 1784 { 1785 for(unsigned int i = 0; i < instruction.size(); i++) 1786 { 1787 switch(instruction[i]->opcode) 1788 { 1789 case OPCODE_TEX: 1790 case OPCODE_TEXBEM: 1791 case OPCODE_TEXBEML: 1792 case OPCODE_TEXREG2AR: 1793 case OPCODE_TEXREG2GB: 1794 case OPCODE_TEXM3X2TEX: 1795 case OPCODE_TEXM3X3TEX: 1796 case OPCODE_TEXM3X3SPEC: 1797 case OPCODE_TEXM3X3VSPEC: 1798 case OPCODE_TEXREG2RGB: 1799 case OPCODE_TEXDP3TEX: 1800 case OPCODE_TEXM3X2DEPTH: 1801 case OPCODE_TEXLDD: 1802 case OPCODE_TEXLDL: 1803 case OPCODE_TEXOFFSET: 1804 case OPCODE_TEXLDLOFFSET: 1805 case OPCODE_TEXELFETCH: 1806 case OPCODE_TEXELFETCHOFFSET: 1807 case OPCODE_TEXGRAD: 1808 case OPCODE_TEXGRADOFFSET: 1809 { 1810 Parameter &dst = instruction[i]->dst; 1811 Parameter &src1 = instruction[i]->src[1]; 1812 1813 if(majorVersion >= 2) 1814 { 1815 usedSamplers |= 1 << src1.index; 1816 } 1817 else 1818 { 1819 usedSamplers |= 1 << dst.index; 1820 } 1821 } 1822 break; 1823 default: 1824 break; 1825 } 1826 } 1827 } 1828 1829 // Assigns a unique index to each call instruction, on a per label basis. 1830 // This is used to know what basic block to return to. 1831 void Shader::analyzeCallSites() 1832 { 1833 int callSiteIndex[2048] = {0}; 1834 1835 for(unsigned int i = 0; i < instruction.size(); i++) 1836 { 1837 if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ) 1838 { 1839 int label = instruction[i]->dst.label; 1840 1841 instruction[i]->dst.callSite = callSiteIndex[label]++; 1842 } 1843 } 1844 } 1845 1846 void Shader::analyzeDynamicIndexing() 1847 { 1848 dynamicallyIndexedTemporaries = false; 1849 dynamicallyIndexedInput = false; 1850 dynamicallyIndexedOutput = false; 1851 1852 for(unsigned int i = 0; i < instruction.size(); i++) 1853 { 1854 if(instruction[i]->dst.rel.type == PARAMETER_ADDR || 1855 instruction[i]->dst.rel.type == PARAMETER_LOOP || 1856 instruction[i]->dst.rel.type == PARAMETER_TEMP || 1857 instruction[i]->dst.rel.type == PARAMETER_CONST) 1858 { 1859 switch(instruction[i]->dst.type) 1860 { 1861 case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; 1862 case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; 1863 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; 1864 default: break; 1865 } 1866 } 1867 1868 for(int j = 0; j < 3; j++) 1869 { 1870 if(instruction[i]->src[j].rel.type == PARAMETER_ADDR || 1871 instruction[i]->src[j].rel.type == PARAMETER_LOOP || 1872 instruction[i]->src[j].rel.type == PARAMETER_TEMP || 1873 instruction[i]->src[j].rel.type == PARAMETER_CONST) 1874 { 1875 switch(instruction[i]->src[j].type) 1876 { 1877 case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; 1878 case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; 1879 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; 1880 default: break; 1881 } 1882 } 1883 } 1884 } 1885 } 1886 } 1887