Home | History | Annotate | Download | only in Shader
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Shader.hpp"
     16 
     17 #include "VertexShader.hpp"
     18 #include "PixelShader.hpp"
     19 #include "Math.hpp"
     20 #include "Debug.hpp"
     21 
     22 #include <set>
     23 #include <fstream>
     24 #include <sstream>
     25 #include <stdarg.h>
     26 
     27 namespace sw
     28 {
     29 	volatile int Shader::serialCounter = 1;
     30 
     31 	Shader::Opcode Shader::OPCODE_DP(int i)
     32 	{
     33 		switch(i)
     34 		{
     35 		default: ASSERT(false);
     36 		case 1: return OPCODE_DP1;
     37 		case 2: return OPCODE_DP2;
     38 		case 3: return OPCODE_DP3;
     39 		case 4: return OPCODE_DP4;
     40 		}
     41 	}
     42 
     43 	Shader::Opcode Shader::OPCODE_LEN(int i)
     44 	{
     45 		switch(i)
     46 		{
     47 		default: ASSERT(false);
     48 		case 1: return OPCODE_ABS;
     49 		case 2: return OPCODE_LEN2;
     50 		case 3: return OPCODE_LEN3;
     51 		case 4: return OPCODE_LEN4;
     52 		}
     53 	}
     54 
     55 	Shader::Opcode Shader::OPCODE_DIST(int i)
     56 	{
     57 		switch(i)
     58 		{
     59 		default: ASSERT(false);
     60 		case 1: return OPCODE_DIST1;
     61 		case 2: return OPCODE_DIST2;
     62 		case 3: return OPCODE_DIST3;
     63 		case 4: return OPCODE_DIST4;
     64 		}
     65 	}
     66 
     67 	Shader::Opcode Shader::OPCODE_NRM(int i)
     68 	{
     69 		switch(i)
     70 		{
     71 		default: ASSERT(false);
     72 		case 1: return OPCODE_SGN;
     73 		case 2: return OPCODE_NRM2;
     74 		case 3: return OPCODE_NRM3;
     75 		case 4: return OPCODE_NRM4;
     76 		}
     77 	}
     78 
     79 	Shader::Opcode Shader::OPCODE_FORWARD(int i)
     80 	{
     81 		switch(i)
     82 		{
     83 		default: ASSERT(false);
     84 		case 1: return OPCODE_FORWARD1;
     85 		case 2: return OPCODE_FORWARD2;
     86 		case 3: return OPCODE_FORWARD3;
     87 		case 4: return OPCODE_FORWARD4;
     88 		}
     89 	}
     90 
     91 	Shader::Opcode Shader::OPCODE_REFLECT(int i)
     92 	{
     93 		switch(i)
     94 		{
     95 		default: ASSERT(false);
     96 		case 1: return OPCODE_REFLECT1;
     97 		case 2: return OPCODE_REFLECT2;
     98 		case 3: return OPCODE_REFLECT3;
     99 		case 4: return OPCODE_REFLECT4;
    100 		}
    101 	}
    102 
    103 	Shader::Opcode Shader::OPCODE_REFRACT(int i)
    104 	{
    105 		switch(i)
    106 		{
    107 		default: ASSERT(false);
    108 		case 1: return OPCODE_REFRACT1;
    109 		case 2: return OPCODE_REFRACT2;
    110 		case 3: return OPCODE_REFRACT3;
    111 		case 4: return OPCODE_REFRACT4;
    112 		}
    113 	}
    114 
    115 	Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
    116 	{
    117 		control = CONTROL_RESERVED0;
    118 
    119 		predicate = false;
    120 		predicateNot = false;
    121 		predicateSwizzle = 0xE4;
    122 
    123 		coissue = false;
    124 		samplerType = SAMPLER_UNKNOWN;
    125 		usage = USAGE_POSITION;
    126 		usageIndex = 0;
    127 	}
    128 
    129 	Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
    130 	{
    131 		parseOperationToken(*token++, majorVersion);
    132 
    133 		samplerType = SAMPLER_UNKNOWN;
    134 		usage = USAGE_POSITION;
    135 		usageIndex = 0;
    136 
    137 		if(opcode == OPCODE_IF ||
    138 		   opcode == OPCODE_IFC ||
    139 		   opcode == OPCODE_LOOP ||
    140 		   opcode == OPCODE_REP ||
    141 		   opcode == OPCODE_BREAKC ||
    142 		   opcode == OPCODE_BREAKP)   // No destination operand
    143 		{
    144 			if(size > 0) parseSourceToken(0, token++, majorVersion);
    145 			if(size > 1) parseSourceToken(1, token++, majorVersion);
    146 			if(size > 2) parseSourceToken(2, token++, majorVersion);
    147 			if(size > 3) ASSERT(false);
    148 		}
    149 		else if(opcode == OPCODE_DCL)
    150 		{
    151 			parseDeclarationToken(*token++);
    152 			parseDestinationToken(token++, majorVersion);
    153 		}
    154 		else
    155 		{
    156 			if(size > 0)
    157 			{
    158 				parseDestinationToken(token, majorVersion);
    159 
    160 				if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
    161 				{
    162 					token++;
    163 					size--;
    164 				}
    165 
    166 				token++;
    167 				size--;
    168 			}
    169 
    170 			if(predicate)
    171 			{
    172 				ASSERT(size != 0);
    173 
    174 				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
    175 				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
    176 
    177 				token++;
    178 				size--;
    179 			}
    180 
    181 			for(int i = 0; size > 0; i++)
    182 			{
    183 				parseSourceToken(i, token, majorVersion);
    184 
    185 				token++;
    186 				size--;
    187 
    188 				if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
    189 				{
    190 					token++;
    191 					size--;
    192 				}
    193 			}
    194 		}
    195 	}
    196 
    197 	Shader::Instruction::~Instruction()
    198 	{
    199 	}
    200 
    201 	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
    202 	{
    203 		std::string instructionString;
    204 
    205 		if(opcode != OPCODE_DCL)
    206 		{
    207 			instructionString += coissue ? "+ " : "";
    208 
    209 			if(predicate)
    210 			{
    211 				instructionString += predicateNot ? "(!p0" : "(p0";
    212 				instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
    213 				instructionString += ") ";
    214 			}
    215 
    216 			instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
    217 
    218 			if(dst.type != PARAMETER_VOID)
    219 			{
    220 				instructionString += " " + dst.string(shaderType, version) +
    221 				                           dst.relativeString() +
    222 				                           dst.maskString();
    223 			}
    224 
    225 			for(int i = 0; i < 4; i++)
    226 			{
    227 				if(src[i].type != PARAMETER_VOID)
    228 				{
    229 					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
    230 					instructionString += src[i].preModifierString() +
    231 										 src[i].string(shaderType, version) +
    232 										 src[i].relativeString() +
    233 										 src[i].postModifierString() +
    234 										 src[i].swizzleString();
    235 				}
    236 			}
    237 		}
    238 		else   // DCL
    239 		{
    240 			instructionString += "dcl";
    241 
    242 			if(dst.type == PARAMETER_SAMPLER)
    243 			{
    244 				switch(samplerType)
    245 				{
    246 				case SAMPLER_UNKNOWN: instructionString += " ";        break;
    247 				case SAMPLER_1D:      instructionString += "_1d ";     break;
    248 				case SAMPLER_2D:      instructionString += "_2d ";     break;
    249 				case SAMPLER_CUBE:    instructionString += "_cube ";   break;
    250 				case SAMPLER_VOLUME:  instructionString += "_volume "; break;
    251 				default:
    252 					ASSERT(false);
    253 				}
    254 
    255 				instructionString += dst.string(shaderType, version);
    256 			}
    257 			else if(dst.type == PARAMETER_INPUT ||
    258 				    dst.type == PARAMETER_OUTPUT ||
    259 				    dst.type == PARAMETER_TEXTURE)
    260 			{
    261 				if(version >= 0x0300)
    262 				{
    263 					switch(usage)
    264 					{
    265 					case USAGE_POSITION:     instructionString += "_position";     break;
    266 					case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
    267 					case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
    268 					case USAGE_NORMAL:       instructionString += "_normal";       break;
    269 					case USAGE_PSIZE:        instructionString += "_psize";        break;
    270 					case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
    271 					case USAGE_TANGENT:      instructionString += "_tangent";      break;
    272 					case USAGE_BINORMAL:     instructionString += "_binormal";     break;
    273 					case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
    274 					case USAGE_POSITIONT:    instructionString += "_positiont";    break;
    275 					case USAGE_COLOR:        instructionString += "_color";        break;
    276 					case USAGE_FOG:          instructionString += "_fog";          break;
    277 					case USAGE_DEPTH:        instructionString += "_depth";        break;
    278 					case USAGE_SAMPLE:       instructionString += "_sample";       break;
    279 					default:
    280 						ASSERT(false);
    281 					}
    282 
    283 					if(usageIndex > 0)
    284 					{
    285 						std::ostringstream buffer;
    286 
    287 						buffer << (int)usageIndex;
    288 
    289 						instructionString += buffer.str();
    290 					}
    291 				}
    292 				else ASSERT(dst.type != PARAMETER_OUTPUT);
    293 
    294 				instructionString += " ";
    295 
    296 				instructionString += dst.string(shaderType, version);
    297 				instructionString += dst.maskString();
    298 			}
    299 			else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
    300 			{
    301 				instructionString += " ";
    302 
    303 				instructionString += dst.string(shaderType, version);
    304 			}
    305 			else ASSERT(false);
    306 		}
    307 
    308 		return instructionString;
    309 	}
    310 
    311 	std::string Shader::DestinationParameter::modifierString() const
    312 	{
    313 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
    314 		{
    315 			return "";
    316 		}
    317 
    318 		std::string modifierString;
    319 
    320 		if(integer)
    321 		{
    322 			modifierString += "_int";
    323 		}
    324 
    325 		if(saturate)
    326 		{
    327 			modifierString += "_sat";
    328 		}
    329 
    330 		if(partialPrecision)
    331 		{
    332 			modifierString += "_pp";
    333 		}
    334 
    335 		if(centroid)
    336 		{
    337 			modifierString += "_centroid";
    338 		}
    339 
    340 		return modifierString;
    341 	}
    342 
    343 	std::string Shader::DestinationParameter::shiftString() const
    344 	{
    345 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
    346 		{
    347 			return "";
    348 		}
    349 
    350 		switch(shift)
    351 		{
    352 		case 0:		return "";
    353 		case 1:		return "_x2";
    354 		case 2:		return "_x4";
    355 		case 3:		return "_x8";
    356 		case -1:	return "_d2";
    357 		case -2:	return "_d4";
    358 		case -3:	return "_d8";
    359 		default:
    360 			return "";
    361 		//	ASSERT(false);   // FIXME
    362 		}
    363 	}
    364 
    365 	std::string Shader::DestinationParameter::maskString() const
    366 	{
    367 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
    368 		{
    369 			return "";
    370 		}
    371 
    372 		switch(mask)
    373 		{
    374 		case 0x0:	return "";
    375 		case 0x1:	return ".x";
    376 		case 0x2:	return ".y";
    377 		case 0x3:	return ".xy";
    378 		case 0x4:	return ".z";
    379 		case 0x5:	return ".xz";
    380 		case 0x6:	return ".yz";
    381 		case 0x7:	return ".xyz";
    382 		case 0x8:	return ".w";
    383 		case 0x9:	return ".xw";
    384 		case 0xA:	return ".yw";
    385 		case 0xB:	return ".xyw";
    386 		case 0xC:	return ".zw";
    387 		case 0xD:	return ".xzw";
    388 		case 0xE:	return ".yzw";
    389 		case 0xF:	return "";
    390 		default:
    391 			ASSERT(false);
    392 		}
    393 
    394 		return "";
    395 	}
    396 
    397 	std::string Shader::SourceParameter::preModifierString() const
    398 	{
    399 		if(type == PARAMETER_VOID)
    400 		{
    401 			return "";
    402 		}
    403 
    404 		switch(modifier)
    405 		{
    406 		case MODIFIER_NONE:			return "";
    407 		case MODIFIER_NEGATE:		return "-";
    408 		case MODIFIER_BIAS:			return "";
    409 		case MODIFIER_BIAS_NEGATE:	return "-";
    410 		case MODIFIER_SIGN:			return "";
    411 		case MODIFIER_SIGN_NEGATE:	return "-";
    412 		case MODIFIER_COMPLEMENT:	return "1-";
    413 		case MODIFIER_X2:			return "";
    414 		case MODIFIER_X2_NEGATE:	return "-";
    415 		case MODIFIER_DZ:			return "";
    416 		case MODIFIER_DW:			return "";
    417 		case MODIFIER_ABS:			return "";
    418 		case MODIFIER_ABS_NEGATE:	return "-";
    419 		case MODIFIER_NOT:			return "!";
    420 		default:
    421 			ASSERT(false);
    422 		}
    423 
    424 		return "";
    425 	}
    426 
    427 	std::string Shader::Parameter::relativeString() const
    428 	{
    429 		if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
    430 		{
    431 			if(rel.type == PARAMETER_VOID)
    432 			{
    433 				return "";
    434 			}
    435 			else if(rel.type == PARAMETER_ADDR)
    436 			{
    437 				switch(rel.swizzle & 0x03)
    438 				{
    439 				case 0: return "[a0.x]";
    440 				case 1: return "[a0.y]";
    441 				case 2: return "[a0.z]";
    442 				case 3: return "[a0.w]";
    443 				}
    444 			}
    445 			else if(rel.type == PARAMETER_TEMP)
    446 			{
    447 				std::ostringstream buffer;
    448 				buffer << rel.index;
    449 
    450 				switch(rel.swizzle & 0x03)
    451 				{
    452 				case 0: return "[r" + buffer.str() + ".x]";
    453 				case 1: return "[r" + buffer.str() + ".y]";
    454 				case 2: return "[r" + buffer.str() + ".z]";
    455 				case 3: return "[r" + buffer.str() + ".w]";
    456 				}
    457 			}
    458 			else if(rel.type == PARAMETER_LOOP)
    459 			{
    460 				return "[aL]";
    461 			}
    462 			else if(rel.type == PARAMETER_CONST)
    463 			{
    464 				std::ostringstream buffer;
    465 				buffer << rel.index;
    466 
    467 				switch(rel.swizzle & 0x03)
    468 				{
    469 				case 0: return "[c" + buffer.str() + ".x]";
    470 				case 1: return "[c" + buffer.str() + ".y]";
    471 				case 2: return "[c" + buffer.str() + ".z]";
    472 				case 3: return "[c" + buffer.str() + ".w]";
    473 				}
    474 			}
    475 			else ASSERT(false);
    476 		}
    477 
    478 		return "";
    479 	}
    480 
    481 	std::string Shader::SourceParameter::postModifierString() const
    482 	{
    483 		if(type == PARAMETER_VOID)
    484 		{
    485 			return "";
    486 		}
    487 
    488 		switch(modifier)
    489 		{
    490 		case MODIFIER_NONE:			return "";
    491 		case MODIFIER_NEGATE:		return "";
    492 		case MODIFIER_BIAS:			return "_bias";
    493 		case MODIFIER_BIAS_NEGATE:	return "_bias";
    494 		case MODIFIER_SIGN:			return "_bx2";
    495 		case MODIFIER_SIGN_NEGATE:	return "_bx2";
    496 		case MODIFIER_COMPLEMENT:	return "";
    497 		case MODIFIER_X2:			return "_x2";
    498 		case MODIFIER_X2_NEGATE:	return "_x2";
    499 		case MODIFIER_DZ:			return "_dz";
    500 		case MODIFIER_DW:			return "_dw";
    501 		case MODIFIER_ABS:			return "_abs";
    502 		case MODIFIER_ABS_NEGATE:	return "_abs";
    503 		case MODIFIER_NOT:			return "";
    504 		default:
    505 			ASSERT(false);
    506 		}
    507 
    508 		return "";
    509 	}
    510 
    511 	std::string Shader::SourceParameter::swizzleString() const
    512 	{
    513 		return Instruction::swizzleString(type, swizzle);
    514 	}
    515 
    516 	void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
    517 	{
    518 		if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
    519 		{
    520 			opcode = (Opcode)token;
    521 
    522 			control = CONTROL_RESERVED0;
    523 			predicate = false;
    524 			coissue = false;
    525 		}
    526 		else
    527 		{
    528 			opcode = (Opcode)(token & 0x0000FFFF);
    529 			control = (Control)((token & 0x00FF0000) >> 16);
    530 
    531 			int size = (token & 0x0F000000) >> 24;
    532 
    533 			predicate = (token & 0x10000000) != 0x00000000;
    534 			coissue = (token & 0x40000000) != 0x00000000;
    535 
    536 			if(majorVersion < 2)
    537 			{
    538 				if(size != 0)
    539 				{
    540 					ASSERT(false);   // Reserved
    541 				}
    542 			}
    543 
    544 			if(majorVersion < 2)
    545 			{
    546 				if(predicate)
    547 				{
    548 					ASSERT(false);
    549 				}
    550 			}
    551 
    552 			if((token & 0x20000000) != 0x00000000)
    553 			{
    554 				ASSERT(false);   // Reserved
    555 			}
    556 
    557 			if(majorVersion >= 2)
    558 			{
    559 				if(coissue)
    560 				{
    561 					ASSERT(false);   // Reserved
    562 				}
    563 			}
    564 
    565 			if((token & 0x80000000) != 0x00000000)
    566 			{
    567 				ASSERT(false);
    568 			}
    569 		}
    570 	}
    571 
    572 	void Shader::Instruction::parseDeclarationToken(unsigned long token)
    573 	{
    574 		samplerType = (SamplerType)((token & 0x78000000) >> 27);
    575 		usage = (Usage)(token & 0x0000001F);
    576 		usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
    577 	}
    578 
    579 	void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
    580 	{
    581 		dst.index = (unsigned short)(token[0] & 0x000007FF);
    582 		dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
    583 
    584 		// TODO: Check type and index range
    585 
    586 		bool relative = (token[0] & 0x00002000) != 0x00000000;
    587 		dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
    588 		dst.rel.swizzle = 0x00;
    589 		dst.rel.scale = 1;
    590 
    591 		if(relative && majorVersion >= 3)
    592 		{
    593 			dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
    594 			dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
    595 		}
    596 		else if(relative) ASSERT(false);   // Reserved
    597 
    598 		if((token[0] & 0x0000C000) != 0x00000000)
    599 		{
    600 			ASSERT(false);   // Reserved
    601 		}
    602 
    603 		dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
    604 		dst.saturate = (token[0] & 0x00100000) != 0;
    605 		dst.partialPrecision = (token[0] & 0x00200000) != 0;
    606 		dst.centroid = (token[0] & 0x00400000) != 0;
    607 		dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
    608 
    609 		if(majorVersion >= 2)
    610 		{
    611 			if(dst.shift)
    612 			{
    613 				ASSERT(false);   // Reserved
    614 			}
    615 		}
    616 
    617 		if((token[0] & 0x80000000) != 0x80000000)
    618 		{
    619 			ASSERT(false);
    620 		}
    621 	}
    622 
    623 	void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
    624 	{
    625 		// Defaults
    626 		src[i].index = 0;
    627 		src[i].type = PARAMETER_VOID;
    628 		src[i].modifier = MODIFIER_NONE;
    629 		src[i].swizzle = 0xE4;
    630 		src[i].rel.type = PARAMETER_VOID;
    631 		src[i].rel.swizzle = 0x00;
    632 		src[i].rel.scale = 1;
    633 
    634 		switch(opcode)
    635 		{
    636 		case OPCODE_DEF:
    637 			src[0].type = PARAMETER_FLOAT4LITERAL;
    638 			src[0].value[i] = *(float*)token;
    639 			break;
    640 		case OPCODE_DEFB:
    641 			src[0].type = PARAMETER_BOOL1LITERAL;
    642 			src[0].boolean[0] = *(int*)token;
    643 			break;
    644 		case OPCODE_DEFI:
    645 			src[0].type = PARAMETER_INT4LITERAL;
    646 			src[0].integer[i] = *(int*)token;
    647 			break;
    648 		default:
    649 			src[i].index = (unsigned short)(token[0] & 0x000007FF);
    650 			src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
    651 
    652 			// FIXME: Check type and index range
    653 
    654 			bool relative = (token[0] & 0x00002000) != 0x00000000;
    655 			src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
    656 
    657 			if((token[0] & 0x0000C000) != 0x00000000)
    658 			{
    659 				if(opcode != OPCODE_DEF &&
    660 				   opcode != OPCODE_DEFI &&
    661 				   opcode != OPCODE_DEFB)
    662 				{
    663 					ASSERT(false);
    664 				}
    665 			}
    666 
    667 			src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
    668 			src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
    669 
    670 			if((token[0] & 0x80000000) != 0x80000000)
    671 			{
    672 				if(opcode != OPCODE_DEF &&
    673 				   opcode != OPCODE_DEFI &&
    674 				   opcode != OPCODE_DEFB)
    675 				{
    676 					ASSERT(false);
    677 				}
    678 			}
    679 
    680 			if(relative && majorVersion >= 2)
    681 			{
    682 				src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
    683 				src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
    684 			}
    685 		}
    686 	}
    687 
    688 	std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
    689 	{
    690 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
    691 		{
    692 			return "";
    693 		}
    694 
    695 		int x = (swizzle & 0x03) >> 0;
    696 		int y = (swizzle & 0x0C) >> 2;
    697 		int z = (swizzle & 0x30) >> 4;
    698 		int w = (swizzle & 0xC0) >> 6;
    699 
    700 		std::string swizzleString = ".";
    701 
    702 		switch(x)
    703 		{
    704 		case 0: swizzleString += "x"; break;
    705 		case 1: swizzleString += "y"; break;
    706 		case 2: swizzleString += "z"; break;
    707 		case 3: swizzleString += "w"; break;
    708 		}
    709 
    710 		if(!(x == y && y == z && z == w))
    711 		{
    712 			switch(y)
    713 			{
    714 			case 0: swizzleString += "x"; break;
    715 			case 1: swizzleString += "y"; break;
    716 			case 2: swizzleString += "z"; break;
    717 			case 3: swizzleString += "w"; break;
    718 			}
    719 
    720 			if(!(y == z && z == w))
    721 			{
    722 				switch(z)
    723 				{
    724 				case 0: swizzleString += "x"; break;
    725 				case 1: swizzleString += "y"; break;
    726 				case 2: swizzleString += "z"; break;
    727 				case 3: swizzleString += "w"; break;
    728 				}
    729 
    730 				if(!(z == w))
    731 				{
    732 					switch(w)
    733 					{
    734 					case 0: swizzleString += "x"; break;
    735 					case 1: swizzleString += "y"; break;
    736 					case 2: swizzleString += "z"; break;
    737 					case 3: swizzleString += "w"; break;
    738 					}
    739 				}
    740 			}
    741 		}
    742 
    743 		return swizzleString;
    744 	}
    745 
    746 	std::string Shader::Instruction::operationString(unsigned short version) const
    747 	{
    748 		switch(opcode)
    749 		{
    750 		case OPCODE_NULL:			return "null";
    751 		case OPCODE_NOP:			return "nop";
    752 		case OPCODE_MOV:			return "mov";
    753 		case OPCODE_ADD:			return "add";
    754 		case OPCODE_IADD:			return "iadd";
    755 		case OPCODE_SUB:			return "sub";
    756 		case OPCODE_ISUB:			return "isub";
    757 		case OPCODE_MAD:			return "mad";
    758 		case OPCODE_IMAD:			return "imad";
    759 		case OPCODE_MUL:			return "mul";
    760 		case OPCODE_IMUL:			return "imul";
    761 		case OPCODE_RCPX:			return "rcpx";
    762 		case OPCODE_DIV:			return "div";
    763 		case OPCODE_IDIV:			return "idiv";
    764 		case OPCODE_UDIV:			return "udiv";
    765 		case OPCODE_MOD:			return "mod";
    766 		case OPCODE_IMOD:			return "imod";
    767 		case OPCODE_UMOD:			return "umod";
    768 		case OPCODE_SHL:			return "shl";
    769 		case OPCODE_ISHR:			return "ishr";
    770 		case OPCODE_USHR:			return "ushr";
    771 		case OPCODE_RSQX:			return "rsqx";
    772 		case OPCODE_SQRT:			return "sqrt";
    773 		case OPCODE_RSQ:			return "rsq";
    774 		case OPCODE_LEN2:			return "len2";
    775 		case OPCODE_LEN3:			return "len3";
    776 		case OPCODE_LEN4:			return "len4";
    777 		case OPCODE_DIST1:			return "dist1";
    778 		case OPCODE_DIST2:			return "dist2";
    779 		case OPCODE_DIST3:			return "dist3";
    780 		case OPCODE_DIST4:			return "dist4";
    781 		case OPCODE_DP3:			return "dp3";
    782 		case OPCODE_DP4:			return "dp4";
    783 		case OPCODE_DET2:			return "det2";
    784 		case OPCODE_DET3:			return "det3";
    785 		case OPCODE_DET4:			return "det4";
    786 		case OPCODE_MIN:			return "min";
    787 		case OPCODE_IMIN:			return "imin";
    788 		case OPCODE_UMIN:			return "umin";
    789 		case OPCODE_MAX:			return "max";
    790 		case OPCODE_IMAX:			return "imax";
    791 		case OPCODE_UMAX:			return "umax";
    792 		case OPCODE_SLT:			return "slt";
    793 		case OPCODE_SGE:			return "sge";
    794 		case OPCODE_EXP2X:			return "exp2x";
    795 		case OPCODE_LOG2X:			return "log2x";
    796 		case OPCODE_LIT:			return "lit";
    797 		case OPCODE_ATT:			return "att";
    798 		case OPCODE_LRP:			return "lrp";
    799 		case OPCODE_STEP:			return "step";
    800 		case OPCODE_SMOOTH:			return "smooth";
    801 		case OPCODE_FLOATBITSTOINT:	 return "floatBitsToInt";
    802 		case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
    803 		case OPCODE_INTBITSTOFLOAT:	 return "intBitsToFloat";
    804 		case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
    805 		case OPCODE_PACKSNORM2x16:	 return "packSnorm2x16";
    806 		case OPCODE_PACKUNORM2x16:	 return "packUnorm2x16";
    807 		case OPCODE_PACKHALF2x16:	 return "packHalf2x16";
    808 		case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
    809 		case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
    810 		case OPCODE_UNPACKHALF2x16:	 return "unpackHalf2x16";
    811 		case OPCODE_FRC:			return "frc";
    812 		case OPCODE_M4X4:			return "m4x4";
    813 		case OPCODE_M4X3:			return "m4x3";
    814 		case OPCODE_M3X4:			return "m3x4";
    815 		case OPCODE_M3X3:			return "m3x3";
    816 		case OPCODE_M3X2:			return "m3x2";
    817 		case OPCODE_CALL:			return "call";
    818 		case OPCODE_CALLNZ:			return "callnz";
    819 		case OPCODE_LOOP:			return "loop";
    820 		case OPCODE_RET:			return "ret";
    821 		case OPCODE_ENDLOOP:		return "endloop";
    822 		case OPCODE_LABEL:			return "label";
    823 		case OPCODE_DCL:			return "dcl";
    824 		case OPCODE_POWX:			return "powx";
    825 		case OPCODE_CRS:			return "crs";
    826 		case OPCODE_SGN:			return "sgn";
    827 		case OPCODE_ISGN:			return "isgn";
    828 		case OPCODE_ABS:			return "abs";
    829 		case OPCODE_IABS:			return "iabs";
    830 		case OPCODE_NRM2:			return "nrm2";
    831 		case OPCODE_NRM3:			return "nrm3";
    832 		case OPCODE_NRM4:			return "nrm4";
    833 		case OPCODE_SINCOS:			return "sincos";
    834 		case OPCODE_REP:			return "rep";
    835 		case OPCODE_ENDREP:			return "endrep";
    836 		case OPCODE_IF:				return "if";
    837 		case OPCODE_IFC:			return "ifc";
    838 		case OPCODE_ELSE:			return "else";
    839 		case OPCODE_ENDIF:			return "endif";
    840 		case OPCODE_BREAK:			return "break";
    841 		case OPCODE_BREAKC:			return "breakc";
    842 		case OPCODE_MOVA:			return "mova";
    843 		case OPCODE_DEFB:			return "defb";
    844 		case OPCODE_DEFI:			return "defi";
    845 		case OPCODE_TEXCOORD:		return "texcoord";
    846 		case OPCODE_TEXKILL:		return "texkill";
    847 		case OPCODE_DISCARD:		return "discard";
    848 		case OPCODE_TEX:
    849 			if(version < 0x0104)	return "tex";
    850 			else					return "texld";
    851 		case OPCODE_TEXBEM:			return "texbem";
    852 		case OPCODE_TEXBEML:		return "texbeml";
    853 		case OPCODE_TEXREG2AR:		return "texreg2ar";
    854 		case OPCODE_TEXREG2GB:		return "texreg2gb";
    855 		case OPCODE_TEXM3X2PAD:		return "texm3x2pad";
    856 		case OPCODE_TEXM3X2TEX:		return "texm3x2tex";
    857 		case OPCODE_TEXM3X3PAD:		return "texm3x3pad";
    858 		case OPCODE_TEXM3X3TEX:		return "texm3x3tex";
    859 		case OPCODE_RESERVED0:		return "reserved0";
    860 		case OPCODE_TEXM3X3SPEC:	return "texm3x3spec";
    861 		case OPCODE_TEXM3X3VSPEC:	return "texm3x3vspec";
    862 		case OPCODE_EXPP:			return "expp";
    863 		case OPCODE_LOGP:			return "logp";
    864 		case OPCODE_CND:			return "cnd";
    865 		case OPCODE_DEF:			return "def";
    866 		case OPCODE_TEXREG2RGB:		return "texreg2rgb";
    867 		case OPCODE_TEXDP3TEX:		return "texdp3tex";
    868 		case OPCODE_TEXM3X2DEPTH:	return "texm3x2depth";
    869 		case OPCODE_TEXDP3:			return "texdp3";
    870 		case OPCODE_TEXM3X3:		return "texm3x3";
    871 		case OPCODE_TEXDEPTH:		return "texdepth";
    872 		case OPCODE_CMP0:			return "cmp0";
    873 		case OPCODE_ICMP:			return "icmp";
    874 		case OPCODE_UCMP:			return "ucmp";
    875 		case OPCODE_SELECT:			return "select";
    876 		case OPCODE_EXTRACT:		return "extract";
    877 		case OPCODE_INSERT:			return "insert";
    878 		case OPCODE_BEM:			return "bem";
    879 		case OPCODE_DP2ADD:			return "dp2add";
    880 		case OPCODE_DFDX:			return "dFdx";
    881 		case OPCODE_DFDY:			return "dFdy";
    882 		case OPCODE_FWIDTH:			return "fwidth";
    883 		case OPCODE_TEXLDD:			return "texldd";
    884 		case OPCODE_CMP:			return "cmp";
    885 		case OPCODE_TEXLDL:			return "texldl";
    886 		case OPCODE_TEXOFFSET:		return "texoffset";
    887 		case OPCODE_TEXLDLOFFSET:	return "texldloffset";
    888 		case OPCODE_TEXELFETCH:		return "texelfetch";
    889 		case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
    890 		case OPCODE_TEXGRAD:		return "texgrad";
    891 		case OPCODE_TEXGRADOFFSET:	return "texgradoffset";
    892 		case OPCODE_BREAKP:			return "breakp";
    893 		case OPCODE_TEXSIZE:        return "texsize";
    894 		case OPCODE_PHASE:			return "phase";
    895 		case OPCODE_COMMENT:		return "comment";
    896 		case OPCODE_END:			return "end";
    897 		case OPCODE_PS_1_0:			return "ps_1_0";
    898 		case OPCODE_PS_1_1:			return "ps_1_1";
    899 		case OPCODE_PS_1_2:			return "ps_1_2";
    900 		case OPCODE_PS_1_3:			return "ps_1_3";
    901 		case OPCODE_PS_1_4:			return "ps_1_4";
    902 		case OPCODE_PS_2_0:			return "ps_2_0";
    903 		case OPCODE_PS_2_x:			return "ps_2_x";
    904 		case OPCODE_PS_3_0:			return "ps_3_0";
    905 		case OPCODE_VS_1_0:			return "vs_1_0";
    906 		case OPCODE_VS_1_1:			return "vs_1_1";
    907 		case OPCODE_VS_2_0:			return "vs_2_0";
    908 		case OPCODE_VS_2_x:			return "vs_2_x";
    909 		case OPCODE_VS_2_sw:		return "vs_2_sw";
    910 		case OPCODE_VS_3_0:			return "vs_3_0";
    911 		case OPCODE_VS_3_sw:		return "vs_3_sw";
    912 		case OPCODE_WHILE:          return "while";
    913 		case OPCODE_ENDWHILE:       return "endwhile";
    914 		case OPCODE_COS:            return "cos";
    915 		case OPCODE_SIN:            return "sin";
    916 		case OPCODE_TAN:            return "tan";
    917 		case OPCODE_ACOS:           return "acos";
    918 		case OPCODE_ASIN:           return "asin";
    919 		case OPCODE_ATAN:           return "atan";
    920 		case OPCODE_ATAN2:          return "atan2";
    921 		case OPCODE_COSH:           return "cosh";
    922 		case OPCODE_SINH:           return "sinh";
    923 		case OPCODE_TANH:           return "tanh";
    924 		case OPCODE_ACOSH:          return "acosh";
    925 		case OPCODE_ASINH:          return "asinh";
    926 		case OPCODE_ATANH:          return "atanh";
    927 		case OPCODE_DP1:            return "dp1";
    928 		case OPCODE_DP2:            return "dp2";
    929 		case OPCODE_TRUNC:          return "trunc";
    930 		case OPCODE_FLOOR:          return "floor";
    931 		case OPCODE_ROUND:          return "round";
    932 		case OPCODE_ROUNDEVEN:      return "roundEven";
    933 		case OPCODE_CEIL:           return "ceil";
    934 		case OPCODE_EXP2:           return "exp2";
    935 		case OPCODE_LOG2:           return "log2";
    936 		case OPCODE_EXP:            return "exp";
    937 		case OPCODE_LOG:            return "log";
    938 		case OPCODE_POW:            return "pow";
    939 		case OPCODE_F2B:            return "f2b";
    940 		case OPCODE_B2F:            return "b2f";
    941 		case OPCODE_F2I:            return "f2i";
    942 		case OPCODE_I2F:            return "i2f";
    943 		case OPCODE_F2U:            return "f2u";
    944 		case OPCODE_U2F:            return "u2f";
    945 		case OPCODE_B2I:            return "b2i";
    946 		case OPCODE_I2B:            return "i2b";
    947 		case OPCODE_ALL:            return "all";
    948 		case OPCODE_ANY:            return "any";
    949 		case OPCODE_NEG:            return "neg";
    950 		case OPCODE_INEG:           return "ineg";
    951 		case OPCODE_ISNAN:          return "isnan";
    952 		case OPCODE_ISINF:          return "isinf";
    953 		case OPCODE_NOT:            return "not";
    954 		case OPCODE_OR:             return "or";
    955 		case OPCODE_XOR:            return "xor";
    956 		case OPCODE_AND:            return "and";
    957 		case OPCODE_EQ:             return "eq";
    958 		case OPCODE_NE:             return "neq";
    959 		case OPCODE_FORWARD1:       return "forward1";
    960 		case OPCODE_FORWARD2:       return "forward2";
    961 		case OPCODE_FORWARD3:       return "forward3";
    962 		case OPCODE_FORWARD4:       return "forward4";
    963 		case OPCODE_REFLECT1:       return "reflect1";
    964 		case OPCODE_REFLECT2:       return "reflect2";
    965 		case OPCODE_REFLECT3:       return "reflect3";
    966 		case OPCODE_REFLECT4:       return "reflect4";
    967 		case OPCODE_REFRACT1:       return "refract1";
    968 		case OPCODE_REFRACT2:       return "refract2";
    969 		case OPCODE_REFRACT3:       return "refract3";
    970 		case OPCODE_REFRACT4:       return "refract4";
    971 		case OPCODE_LEAVE:          return "leave";
    972 		case OPCODE_CONTINUE:       return "continue";
    973 		case OPCODE_TEST:           return "test";
    974 		case OPCODE_SWITCH:         return "switch";
    975 		case OPCODE_ENDSWITCH:      return "endswitch";
    976 		default:
    977 			ASSERT(false);
    978 		}
    979 
    980 		return "<unknown>";
    981 	}
    982 
    983 	std::string Shader::Instruction::controlString() const
    984 	{
    985 		if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
    986 		{
    987 			if(project) return "p";
    988 
    989 			if(bias) return "b";
    990 
    991 			// FIXME: LOD
    992 		}
    993 
    994 		switch(control)
    995 		{
    996 		case 1: return "_gt";
    997 		case 2: return "_eq";
    998 		case 3: return "_ge";
    999 		case 4: return "_lt";
   1000 		case 5: return "_ne";
   1001 		case 6: return "_le";
   1002 		default:
   1003 			return "";
   1004 		//	ASSERT(false);   // FIXME
   1005 		}
   1006 	}
   1007 
   1008 	std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
   1009 	{
   1010 		std::ostringstream buffer;
   1011 
   1012 		if(type == PARAMETER_FLOAT4LITERAL)
   1013 		{
   1014 			buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
   1015 
   1016 			return buffer.str();
   1017 		}
   1018 		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
   1019 		{
   1020 			buffer << index;
   1021 
   1022 			return typeString(shaderType, version) + buffer.str();
   1023 		}
   1024 		else
   1025 		{
   1026 			return typeString(shaderType, version);
   1027 		}
   1028 	}
   1029 
   1030 	std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
   1031 	{
   1032 		switch(type)
   1033 		{
   1034 		case PARAMETER_TEMP:			return "r";
   1035 		case PARAMETER_INPUT:			return "v";
   1036 		case PARAMETER_CONST:			return "c";
   1037 		case PARAMETER_TEXTURE:
   1038 	//	case PARAMETER_ADDR:
   1039 			if(shaderType == SHADER_PIXEL)	return "t";
   1040 			else							return "a0";
   1041 		case PARAMETER_RASTOUT:
   1042 			if(index == 0)              return "oPos";
   1043 			else if(index == 1)         return "oFog";
   1044 			else if(index == 2)         return "oPts";
   1045 			else                        ASSERT(false);
   1046 		case PARAMETER_ATTROUT:			return "oD";
   1047 		case PARAMETER_TEXCRDOUT:
   1048 	//	case PARAMETER_OUTPUT:			return "";
   1049 			if(version < 0x0300)		return "oT";
   1050 			else						return "o";
   1051 		case PARAMETER_CONSTINT:		return "i";
   1052 		case PARAMETER_COLOROUT:		return "oC";
   1053 		case PARAMETER_DEPTHOUT:		return "oDepth";
   1054 		case PARAMETER_SAMPLER:			return "s";
   1055 	//	case PARAMETER_CONST2:			return "";
   1056 	//	case PARAMETER_CONST3:			return "";
   1057 	//	case PARAMETER_CONST4:			return "";
   1058 		case PARAMETER_CONSTBOOL:		return "b";
   1059 		case PARAMETER_LOOP:			return "aL";
   1060 	//	case PARAMETER_TEMPFLOAT16:		return "";
   1061 		case PARAMETER_MISCTYPE:
   1062 			if(index == 0)				return "vPos";
   1063 			else if(index == 1)			return "vFace";
   1064 			else						ASSERT(false);
   1065 		case PARAMETER_LABEL:			return "l";
   1066 		case PARAMETER_PREDICATE:		return "p0";
   1067 		case PARAMETER_FLOAT4LITERAL:	return "";
   1068 		case PARAMETER_BOOL1LITERAL:	return "";
   1069 		case PARAMETER_INT4LITERAL:		return "";
   1070 	//	case PARAMETER_VOID:			return "";
   1071 		default:
   1072 			ASSERT(false);
   1073 		}
   1074 
   1075 		return "";
   1076 	}
   1077 
   1078 	bool Shader::Instruction::isBranch() const
   1079 	{
   1080 		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
   1081 	}
   1082 
   1083 	bool Shader::Instruction::isCall() const
   1084 	{
   1085 		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
   1086 	}
   1087 
   1088 	bool Shader::Instruction::isBreak() const
   1089 	{
   1090 		return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
   1091 	}
   1092 
   1093 	bool Shader::Instruction::isLoopOrSwitch() const
   1094 	{
   1095 		return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE || opcode == OPCODE_SWITCH;
   1096 	}
   1097 
   1098 	bool Shader::Instruction::isEndLoopOrSwitch() const
   1099 	{
   1100 		return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE || opcode == OPCODE_ENDSWITCH;;
   1101 	}
   1102 
   1103 	bool Shader::Instruction::isPredicated() const
   1104 	{
   1105 		return predicate ||
   1106 		       analysisBranch ||
   1107 		       analysisBreak ||
   1108 		       analysisContinue ||
   1109 		       analysisLeave;
   1110 	}
   1111 
   1112 	Shader::Shader() : serialID(serialCounter++)
   1113 	{
   1114 		usedSamplers = 0;
   1115 	}
   1116 
   1117 	Shader::~Shader()
   1118 	{
   1119 		for(unsigned int i = 0; i < instruction.size(); i++)
   1120 		{
   1121 			delete instruction[i];
   1122 			instruction[i] = 0;
   1123 		}
   1124 	}
   1125 
   1126 	void Shader::parse(const unsigned long *token)
   1127 	{
   1128 		minorVersion = (unsigned char)(token[0] & 0x000000FF);
   1129 		majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
   1130 		shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
   1131 
   1132 		int length = 0;
   1133 
   1134 		if(shaderType == SHADER_VERTEX)
   1135 		{
   1136 			length = VertexShader::validate(token);
   1137 		}
   1138 		else if(shaderType == SHADER_PIXEL)
   1139 		{
   1140 			length = PixelShader::validate(token);
   1141 		}
   1142 		else ASSERT(false);
   1143 
   1144 		ASSERT(length != 0);
   1145 		instruction.resize(length);
   1146 
   1147 		for(int i = 0; i < length; i++)
   1148 		{
   1149 			while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
   1150 			{
   1151 				int length = (*token & 0x7FFF0000) >> 16;
   1152 
   1153 				token += length + 1;
   1154 			}
   1155 
   1156 			int tokenCount = size(*token);
   1157 
   1158 			instruction[i] = new Instruction(token, tokenCount, majorVersion);
   1159 
   1160 			token += 1 + tokenCount;
   1161 		}
   1162 	}
   1163 
   1164 	int Shader::size(unsigned long opcode) const
   1165 	{
   1166 		return size(opcode, version);
   1167 	}
   1168 
   1169 	int Shader::size(unsigned long opcode, unsigned short version)
   1170 	{
   1171 		if(version > 0x0300)
   1172 		{
   1173 			ASSERT(false);
   1174 		}
   1175 
   1176 		static const char size[] =
   1177 		{
   1178 			0,   // NOP = 0
   1179 			2,   // MOV
   1180 			3,   // ADD
   1181 			3,   // SUB
   1182 			4,   // MAD
   1183 			3,   // MUL
   1184 			2,   // RCP
   1185 			2,   // RSQ
   1186 			3,   // DP3
   1187 			3,   // DP4
   1188 			3,   // MIN
   1189 			3,   // MAX
   1190 			3,   // SLT
   1191 			3,   // SGE
   1192 			2,   // EXP
   1193 			2,   // LOG
   1194 			2,   // LIT
   1195 			3,   // DST
   1196 			4,   // LRP
   1197 			2,   // FRC
   1198 			3,   // M4x4
   1199 			3,   // M4x3
   1200 			3,   // M3x4
   1201 			3,   // M3x3
   1202 			3,   // M3x2
   1203 			1,   // CALL
   1204 			2,   // CALLNZ
   1205 			2,   // LOOP
   1206 			0,   // RET
   1207 			0,   // ENDLOOP
   1208 			1,   // LABEL
   1209 			2,   // DCL
   1210 			3,   // POW
   1211 			3,   // CRS
   1212 			4,   // SGN
   1213 			2,   // ABS
   1214 			2,   // NRM
   1215 			4,   // SINCOS
   1216 			1,   // REP
   1217 			0,   // ENDREP
   1218 			1,   // IF
   1219 			2,   // IFC
   1220 			0,   // ELSE
   1221 			0,   // ENDIF
   1222 			0,   // BREAK
   1223 			2,   // BREAKC
   1224 			2,   // MOVA
   1225 			2,   // DEFB
   1226 			5,   // DEFI
   1227 			-1,  // 49
   1228 			-1,  // 50
   1229 			-1,  // 51
   1230 			-1,  // 52
   1231 			-1,  // 53
   1232 			-1,  // 54
   1233 			-1,  // 55
   1234 			-1,  // 56
   1235 			-1,  // 57
   1236 			-1,  // 58
   1237 			-1,  // 59
   1238 			-1,  // 60
   1239 			-1,  // 61
   1240 			-1,  // 62
   1241 			-1,  // 63
   1242 			1,   // TEXCOORD = 64
   1243 			1,   // TEXKILL
   1244 			1,   // TEX
   1245 			2,   // TEXBEM
   1246 			2,   // TEXBEML
   1247 			2,   // TEXREG2AR
   1248 			2,   // TEXREG2GB
   1249 			2,   // TEXM3x2PAD
   1250 			2,   // TEXM3x2TEX
   1251 			2,   // TEXM3x3PAD
   1252 			2,   // TEXM3x3TEX
   1253 			-1,  // RESERVED0
   1254 			3,   // TEXM3x3SPEC
   1255 			2,   // TEXM3x3VSPEC
   1256 			2,   // EXPP
   1257 			2,   // LOGP
   1258 			4,   // CND
   1259 			5,   // DEF
   1260 			2,   // TEXREG2RGB
   1261 			2,   // TEXDP3TEX
   1262 			2,   // TEXM3x2DEPTH
   1263 			2,   // TEXDP3
   1264 			2,   // TEXM3x3
   1265 			1,   // TEXDEPTH
   1266 			4,   // CMP
   1267 			3,   // BEM
   1268 			4,   // DP2ADD
   1269 			2,   // DSX
   1270 			2,   // DSY
   1271 			5,   // TEXLDD
   1272 			3,   // SETP
   1273 			3,   // TEXLDL
   1274 			2,   // BREAKP
   1275 			-1,  // 97
   1276 			-1,  // 98
   1277 			-1,  // 99
   1278 			-1,  // 100
   1279 			-1,  // 101
   1280 			-1,  // 102
   1281 			-1,  // 103
   1282 			-1,  // 104
   1283 			-1,  // 105
   1284 			-1,  // 106
   1285 			-1,  // 107
   1286 			-1,  // 108
   1287 			-1,  // 109
   1288 			-1,  // 110
   1289 			-1,  // 111
   1290 			-1,  // 112
   1291 		};
   1292 
   1293 		int length = 0;
   1294 
   1295 		if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
   1296 		{
   1297 			return (opcode & 0x7FFF0000) >> 16;
   1298 		}
   1299 
   1300 		if(opcode != OPCODE_PS_1_0 &&
   1301 		   opcode != OPCODE_PS_1_1 &&
   1302 		   opcode != OPCODE_PS_1_2 &&
   1303 		   opcode != OPCODE_PS_1_3 &&
   1304 		   opcode != OPCODE_PS_1_4 &&
   1305 		   opcode != OPCODE_PS_2_0 &&
   1306 		   opcode != OPCODE_PS_2_x &&
   1307 		   opcode != OPCODE_PS_3_0 &&
   1308 		   opcode != OPCODE_VS_1_0 &&
   1309 		   opcode != OPCODE_VS_1_1 &&
   1310 		   opcode != OPCODE_VS_2_0 &&
   1311 		   opcode != OPCODE_VS_2_x &&
   1312 		   opcode != OPCODE_VS_2_sw &&
   1313 		   opcode != OPCODE_VS_3_0 &&
   1314 		   opcode != OPCODE_VS_3_sw &&
   1315 		   opcode != OPCODE_PHASE &&
   1316 		   opcode != OPCODE_END)
   1317 		{
   1318 			if(version >= 0x0200)
   1319 			{
   1320 				length = (opcode & 0x0F000000) >> 24;
   1321 			}
   1322 			else
   1323 			{
   1324 				length = size[opcode & 0x0000FFFF];
   1325 			}
   1326 		}
   1327 
   1328 		if(length < 0)
   1329 		{
   1330 			ASSERT(false);
   1331 		}
   1332 
   1333 		if(version == 0x0104)
   1334 		{
   1335 			switch(opcode & 0x0000FFFF)
   1336 			{
   1337 			case OPCODE_TEX:
   1338 				length += 1;
   1339 				break;
   1340 			case OPCODE_TEXCOORD:
   1341 				length += 1;
   1342 				break;
   1343 			default:
   1344 				break;
   1345 			}
   1346 		}
   1347 
   1348 		return length;
   1349 	}
   1350 
   1351 	bool Shader::maskContainsComponent(int mask, int component)
   1352 	{
   1353 		return (mask & (1 << component)) != 0;
   1354 	}
   1355 
   1356 	bool Shader::swizzleContainsComponent(int swizzle, int component)
   1357 	{
   1358 		if((swizzle & 0x03) >> 0 == component) return true;
   1359 		if((swizzle & 0x0C) >> 2 == component) return true;
   1360 		if((swizzle & 0x30) >> 4 == component) return true;
   1361 		if((swizzle & 0xC0) >> 6 == component) return true;
   1362 
   1363 		return false;
   1364 	}
   1365 
   1366 	bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
   1367 	{
   1368 		if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
   1369 		if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
   1370 		if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
   1371 		if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
   1372 
   1373 		return false;
   1374 	}
   1375 
   1376 	bool Shader::containsDynamicBranching() const
   1377 	{
   1378 		return dynamicBranching;
   1379 	}
   1380 
   1381 	bool Shader::containsBreakInstruction() const
   1382 	{
   1383 		return containsBreak;
   1384 	}
   1385 
   1386 	bool Shader::containsContinueInstruction() const
   1387 	{
   1388 		return containsContinue;
   1389 	}
   1390 
   1391 	bool Shader::containsLeaveInstruction() const
   1392 	{
   1393 		return containsLeave;
   1394 	}
   1395 
   1396 	bool Shader::containsDefineInstruction() const
   1397 	{
   1398 		return containsDefine;
   1399 	}
   1400 
   1401 	bool Shader::usesSampler(int index) const
   1402 	{
   1403 		return (usedSamplers & (1 << index)) != 0;
   1404 	}
   1405 
   1406 	int Shader::getSerialID() const
   1407 	{
   1408 		return serialID;
   1409 	}
   1410 
   1411 	size_t Shader::getLength() const
   1412 	{
   1413 		return instruction.size();
   1414 	}
   1415 
   1416 	Shader::ShaderType Shader::getShaderType() const
   1417 	{
   1418 		return shaderType;
   1419 	}
   1420 
   1421 	unsigned short Shader::getVersion() const
   1422 	{
   1423 		return version;
   1424 	}
   1425 
   1426 	void Shader::print(const char *fileName, ...) const
   1427 	{
   1428 		char fullName[1024 + 1];
   1429 
   1430 		va_list vararg;
   1431 		va_start(vararg, fileName);
   1432 		vsnprintf(fullName, 1024, fileName, vararg);
   1433 		va_end(vararg);
   1434 
   1435 		std::ofstream file(fullName, std::ofstream::out);
   1436 
   1437 		for(unsigned int i = 0; i < instruction.size(); i++)
   1438 		{
   1439 			file << instruction[i]->string(shaderType, version) << std::endl;
   1440 		}
   1441 	}
   1442 
   1443 	void Shader::printInstruction(int index, const char *fileName) const
   1444 	{
   1445 		std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
   1446 
   1447 		file << instruction[index]->string(shaderType, version) << std::endl;
   1448 	}
   1449 
   1450 	void Shader::append(Instruction *instruction)
   1451 	{
   1452 		this->instruction.push_back(instruction);
   1453 	}
   1454 
   1455 	void Shader::declareSampler(int i)
   1456 	{
   1457 		usedSamplers |= 1 << i;
   1458 	}
   1459 
   1460 	const Shader::Instruction *Shader::getInstruction(unsigned int i) const
   1461 	{
   1462 		ASSERT(i < instruction.size());
   1463 
   1464 		return instruction[i];
   1465 	}
   1466 
   1467 	void Shader::optimize()
   1468 	{
   1469 		optimizeLeave();
   1470 		optimizeCall();
   1471 		removeNull();
   1472 	}
   1473 
   1474 	void Shader::optimizeLeave()
   1475 	{
   1476 		// A return (leave) right before the end of a function or the shader can be removed
   1477 		for(unsigned int i = 0; i < instruction.size(); i++)
   1478 		{
   1479 			if(instruction[i]->opcode == OPCODE_LEAVE)
   1480 			{
   1481 				if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
   1482 				{
   1483 					instruction[i]->opcode = OPCODE_NULL;
   1484 				}
   1485 			}
   1486 		}
   1487 	}
   1488 
   1489 	void Shader::optimizeCall()
   1490 	{
   1491 		// Eliminate uncalled functions
   1492 		std::set<int> calledFunctions;
   1493 		bool rescan = true;
   1494 
   1495 		while(rescan)
   1496 		{
   1497 			calledFunctions.clear();
   1498 			rescan = false;
   1499 
   1500 			for(unsigned int i = 0; i < instruction.size(); i++)
   1501 			{
   1502 				if(instruction[i]->isCall())
   1503 				{
   1504 					calledFunctions.insert(instruction[i]->dst.label);
   1505 				}
   1506 			}
   1507 
   1508 			if(!calledFunctions.empty())
   1509 			{
   1510 				for(unsigned int i = 0; i < instruction.size(); i++)
   1511 				{
   1512 					if(instruction[i]->opcode == OPCODE_LABEL)
   1513 					{
   1514 						if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
   1515 						{
   1516 							for( ; i < instruction.size(); i++)
   1517 							{
   1518 								Opcode oldOpcode = instruction[i]->opcode;
   1519 								instruction[i]->opcode = OPCODE_NULL;
   1520 
   1521 								if(oldOpcode == OPCODE_RET)
   1522 								{
   1523 									rescan = true;
   1524 									break;
   1525 								}
   1526 							}
   1527 						}
   1528 					}
   1529 				}
   1530 			}
   1531 		}
   1532 
   1533 		// Optimize the entry call
   1534 		if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
   1535 		{
   1536 			if(calledFunctions.size() == 1)
   1537 			{
   1538 				instruction[0]->opcode = OPCODE_NULL;
   1539 				instruction[1]->opcode = OPCODE_NULL;
   1540 
   1541 				for(size_t i = 2; i < instruction.size(); i++)
   1542 				{
   1543 					if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
   1544 					{
   1545 						instruction[i]->opcode = OPCODE_NULL;
   1546 					}
   1547 				}
   1548 			}
   1549 		}
   1550 	}
   1551 
   1552 	void Shader::removeNull()
   1553 	{
   1554 		size_t size = 0;
   1555 		for(size_t i = 0; i < instruction.size(); i++)
   1556 		{
   1557 			if(instruction[i]->opcode != OPCODE_NULL)
   1558 			{
   1559 				instruction[size] = instruction[i];
   1560 				size++;
   1561 			}
   1562 			else
   1563 			{
   1564 				delete instruction[i];
   1565 			}
   1566 		}
   1567 
   1568 		instruction.resize(size);
   1569 	}
   1570 
   1571 	void Shader::analyzeDirtyConstants()
   1572 	{
   1573 		dirtyConstantsF = 0;
   1574 		dirtyConstantsI = 0;
   1575 		dirtyConstantsB = 0;
   1576 
   1577 		for(unsigned int i = 0; i < instruction.size(); i++)
   1578 		{
   1579 			switch(instruction[i]->opcode)
   1580 			{
   1581 			case OPCODE_DEF:
   1582 				if(instruction[i]->dst.index + 1 > dirtyConstantsF)
   1583 				{
   1584 					dirtyConstantsF = instruction[i]->dst.index + 1;
   1585 				}
   1586 				break;
   1587 			case OPCODE_DEFI:
   1588 				if(instruction[i]->dst.index + 1 > dirtyConstantsI)
   1589 				{
   1590 					dirtyConstantsI = instruction[i]->dst.index + 1;
   1591 				}
   1592 				break;
   1593 			case OPCODE_DEFB:
   1594 				if(instruction[i]->dst.index + 1 > dirtyConstantsB)
   1595 				{
   1596 					dirtyConstantsB = instruction[i]->dst.index + 1;
   1597 				}
   1598 				break;
   1599 			default:
   1600 				break;
   1601 			}
   1602 		}
   1603 	}
   1604 
   1605 	void Shader::analyzeDynamicBranching()
   1606 	{
   1607 		dynamicBranching = false;
   1608 		containsLeave = false;
   1609 		containsBreak = false;
   1610 		containsContinue = false;
   1611 		containsDefine = false;
   1612 
   1613 		// Determine global presence of branching instructions
   1614 		for(unsigned int i = 0; i < instruction.size(); i++)
   1615 		{
   1616 			switch(instruction[i]->opcode)
   1617 			{
   1618 			case OPCODE_CALLNZ:
   1619 			case OPCODE_IF:
   1620 			case OPCODE_IFC:
   1621 			case OPCODE_BREAK:
   1622 			case OPCODE_BREAKC:
   1623 			case OPCODE_CMP:
   1624 			case OPCODE_BREAKP:
   1625 			case OPCODE_LEAVE:
   1626 			case OPCODE_CONTINUE:
   1627 				if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL)
   1628 				{
   1629 					dynamicBranching = true;
   1630 				}
   1631 
   1632 				if(instruction[i]->opcode == OPCODE_LEAVE)
   1633 				{
   1634 					containsLeave = true;
   1635 				}
   1636 
   1637 				if(instruction[i]->isBreak())
   1638 				{
   1639 					containsBreak = true;
   1640 				}
   1641 
   1642 				if(instruction[i]->opcode == OPCODE_CONTINUE)
   1643 				{
   1644 					containsContinue = true;
   1645 				}
   1646 			case OPCODE_DEF:
   1647 			case OPCODE_DEFB:
   1648 			case OPCODE_DEFI:
   1649 				containsDefine = true;
   1650 			default:
   1651 				break;
   1652 			}
   1653 		}
   1654 
   1655 		// Conservatively determine which instructions are affected by dynamic branching
   1656 		int branchDepth = 0;
   1657 		int breakDepth = 0;
   1658 		int continueDepth = 0;
   1659 		bool leaveReturn = false;
   1660 
   1661 		for(unsigned int i = 0; i < instruction.size(); i++)
   1662 		{
   1663 			// If statements
   1664 			if(instruction[i]->isBranch())
   1665 			{
   1666 				branchDepth++;
   1667 			}
   1668 			else if(instruction[i]->opcode == OPCODE_ENDIF)
   1669 			{
   1670 				branchDepth--;
   1671 			}
   1672 
   1673 			if(branchDepth > 0)
   1674 			{
   1675 				instruction[i]->analysisBranch = true;
   1676 
   1677 				if(instruction[i]->isCall())
   1678 				{
   1679 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
   1680 				}
   1681 			}
   1682 
   1683 			// Break statemement
   1684 			if(instruction[i]->isBreak())
   1685 			{
   1686 				breakDepth++;
   1687 			}
   1688 
   1689 			if(breakDepth > 0)
   1690 			{
   1691 				if(instruction[i]->isLoopOrSwitch())   // Nested loop or switch, don't make the end of it disable the break execution mask
   1692 				{
   1693 					breakDepth++;
   1694 				}
   1695 				else if(instruction[i]->isEndLoopOrSwitch())
   1696 				{
   1697 					breakDepth--;
   1698 				}
   1699 
   1700 				instruction[i]->analysisBreak = true;
   1701 
   1702 				if(instruction[i]->isCall())
   1703 				{
   1704 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
   1705 				}
   1706 			}
   1707 
   1708 			// Continue statement
   1709 			if(instruction[i]->opcode == OPCODE_CONTINUE)
   1710 			{
   1711 				continueDepth++;
   1712 			}
   1713 
   1714 			if(continueDepth > 0)
   1715 			{
   1716 				if(instruction[i]->isLoopOrSwitch())   // Nested loop or switch, don't make the end of it disable the break execution mask
   1717 				{
   1718 					continueDepth++;
   1719 				}
   1720 				else if(instruction[i]->isEndLoopOrSwitch())
   1721 				{
   1722 					continueDepth--;
   1723 				}
   1724 
   1725 				instruction[i]->analysisContinue = true;
   1726 
   1727 				if(instruction[i]->isCall())
   1728 				{
   1729 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
   1730 				}
   1731 			}
   1732 
   1733 			// Return (leave) statement
   1734 			if(instruction[i]->opcode == OPCODE_LEAVE)
   1735 			{
   1736 				leaveReturn = true;
   1737 			}
   1738 			else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
   1739 			{
   1740 				leaveReturn = false;
   1741 			}
   1742 
   1743 			if(leaveReturn)
   1744 			{
   1745 				instruction[i]->analysisLeave = true;
   1746 
   1747 				if(instruction[i]->isCall())
   1748 				{
   1749 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
   1750 				}
   1751 			}
   1752 		}
   1753 	}
   1754 
   1755 	void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
   1756 	{
   1757 		bool marker = false;
   1758 		for(unsigned int i = 0; i < instruction.size(); i++)
   1759 		{
   1760 			if(!marker)
   1761 			{
   1762 				if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel)
   1763 				{
   1764 					marker = true;
   1765 				}
   1766 			}
   1767 			else
   1768 			{
   1769 				if(instruction[i]->opcode == OPCODE_RET)
   1770 				{
   1771 					break;
   1772 				}
   1773 				else if(instruction[i]->isCall())
   1774 				{
   1775 					markFunctionAnalysis(instruction[i]->dst.label, flag);
   1776 				}
   1777 
   1778 				instruction[i]->analysis |= flag;
   1779 			}
   1780 		}
   1781 	}
   1782 
   1783 	void Shader::analyzeSamplers()
   1784 	{
   1785 		for(unsigned int i = 0; i < instruction.size(); i++)
   1786 		{
   1787 			switch(instruction[i]->opcode)
   1788 			{
   1789 			case OPCODE_TEX:
   1790 			case OPCODE_TEXBEM:
   1791 			case OPCODE_TEXBEML:
   1792 			case OPCODE_TEXREG2AR:
   1793 			case OPCODE_TEXREG2GB:
   1794 			case OPCODE_TEXM3X2TEX:
   1795 			case OPCODE_TEXM3X3TEX:
   1796 			case OPCODE_TEXM3X3SPEC:
   1797 			case OPCODE_TEXM3X3VSPEC:
   1798 			case OPCODE_TEXREG2RGB:
   1799 			case OPCODE_TEXDP3TEX:
   1800 			case OPCODE_TEXM3X2DEPTH:
   1801 			case OPCODE_TEXLDD:
   1802 			case OPCODE_TEXLDL:
   1803 			case OPCODE_TEXOFFSET:
   1804 			case OPCODE_TEXLDLOFFSET:
   1805 			case OPCODE_TEXELFETCH:
   1806 			case OPCODE_TEXELFETCHOFFSET:
   1807 			case OPCODE_TEXGRAD:
   1808 			case OPCODE_TEXGRADOFFSET:
   1809 				{
   1810 					Parameter &dst = instruction[i]->dst;
   1811 					Parameter &src1 = instruction[i]->src[1];
   1812 
   1813 					if(majorVersion >= 2)
   1814 					{
   1815 						usedSamplers |= 1 << src1.index;
   1816 					}
   1817 					else
   1818 					{
   1819 						usedSamplers |= 1 << dst.index;
   1820 					}
   1821 				}
   1822 				break;
   1823 			default:
   1824 				break;
   1825 			}
   1826 		}
   1827 	}
   1828 
   1829 	// Assigns a unique index to each call instruction, on a per label basis.
   1830 	// This is used to know what basic block to return to.
   1831 	void Shader::analyzeCallSites()
   1832 	{
   1833 		int callSiteIndex[2048] = {0};
   1834 
   1835 		for(unsigned int i = 0; i < instruction.size(); i++)
   1836 		{
   1837 			if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ)
   1838 			{
   1839 				int label = instruction[i]->dst.label;
   1840 
   1841 				instruction[i]->dst.callSite = callSiteIndex[label]++;
   1842 			}
   1843 		}
   1844 	}
   1845 
   1846 	void Shader::analyzeDynamicIndexing()
   1847 	{
   1848 		dynamicallyIndexedTemporaries = false;
   1849 		dynamicallyIndexedInput = false;
   1850 		dynamicallyIndexedOutput = false;
   1851 
   1852 		for(unsigned int i = 0; i < instruction.size(); i++)
   1853 		{
   1854 			if(instruction[i]->dst.rel.type == PARAMETER_ADDR ||
   1855 			   instruction[i]->dst.rel.type == PARAMETER_LOOP ||
   1856 			   instruction[i]->dst.rel.type == PARAMETER_TEMP ||
   1857 			   instruction[i]->dst.rel.type == PARAMETER_CONST)
   1858 			{
   1859 				switch(instruction[i]->dst.type)
   1860 				{
   1861 				case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
   1862 				case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
   1863 				case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
   1864 				default: break;
   1865 				}
   1866 			}
   1867 
   1868 			for(int j = 0; j < 3; j++)
   1869 			{
   1870 				if(instruction[i]->src[j].rel.type == PARAMETER_ADDR ||
   1871 				   instruction[i]->src[j].rel.type == PARAMETER_LOOP ||
   1872 				   instruction[i]->src[j].rel.type == PARAMETER_TEMP ||
   1873 				   instruction[i]->src[j].rel.type == PARAMETER_CONST)
   1874 				{
   1875 					switch(instruction[i]->src[j].type)
   1876 					{
   1877 					case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
   1878 					case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
   1879 					case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
   1880 					default: break;
   1881 					}
   1882 				}
   1883 			}
   1884 		}
   1885 	}
   1886 }
   1887