Home | History | Annotate | Download | only in compiler
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "OutputASM.h"
     16 #include "Common/Math.hpp"
     17 
     18 #include "common/debug.h"
     19 #include "InfoSink.h"
     20 
     21 #include "libGLESv2/Shader.h"
     22 
     23 #include <GLES2/gl2.h>
     24 #include <GLES2/gl2ext.h>
     25 #include <GLES3/gl3.h>
     26 
     27 #include <stdlib.h>
     28 
     29 namespace glsl
     30 {
     31 	// Integer to TString conversion
     32 	TString str(int i)
     33 	{
     34 		char buffer[20];
     35 		sprintf(buffer, "%d", i);
     36 		return buffer;
     37 	}
     38 
     39 	class Temporary : public TIntermSymbol
     40 	{
     41 	public:
     42 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
     43 		{
     44 		}
     45 
     46 		~Temporary()
     47 		{
     48 			assembler->freeTemporary(this);
     49 		}
     50 
     51 	private:
     52 		OutputASM *const assembler;
     53 	};
     54 
     55 	class Constant : public TIntermConstantUnion
     56 	{
     57 	public:
     58 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
     59 		{
     60 			constants[0].setFConst(x);
     61 			constants[1].setFConst(y);
     62 			constants[2].setFConst(z);
     63 			constants[3].setFConst(w);
     64 		}
     65 
     66 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
     67 		{
     68 			constants[0].setBConst(b);
     69 		}
     70 
     71 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
     72 		{
     73 			constants[0].setIConst(i);
     74 		}
     75 
     76 		~Constant()
     77 		{
     78 		}
     79 
     80 	private:
     81 		ConstantUnion constants[4];
     82 	};
     83 
     84 	Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
     85 		type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
     86 	{
     87 	}
     88 
     89 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
     90 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
     91 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
     92 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
     93 	{
     94 	}
     95 
     96 	BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor)
     97 		: mCurrentOffset(0), isRowMajor(rowMajor)
     98 	{
     99 	}
    100 
    101 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
    102 	{
    103 		int arrayStride;
    104 		int matrixStride;
    105 
    106 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
    107 
    108 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
    109 		                                 static_cast<int>(arrayStride * BytesPerComponent),
    110 		                                 static_cast<int>(matrixStride * BytesPerComponent),
    111 		                                 (matrixStride > 0) && isRowMajor);
    112 
    113 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
    114 
    115 		return memberInfo;
    116 	}
    117 
    118 	// static
    119 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
    120 	{
    121 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
    122 	}
    123 
    124 	// static
    125 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
    126 	{
    127 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
    128 	}
    129 
    130 	void BlockLayoutEncoder::nextRegister()
    131 	{
    132 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
    133 	}
    134 
    135 	Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor)
    136 	{
    137 	}
    138 
    139 	void Std140BlockEncoder::enterAggregateType()
    140 	{
    141 		nextRegister();
    142 	}
    143 
    144 	void Std140BlockEncoder::exitAggregateType()
    145 	{
    146 		nextRegister();
    147 	}
    148 
    149 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
    150 	{
    151 		size_t baseAlignment = 0;
    152 		int matrixStride = 0;
    153 		int arrayStride = 0;
    154 
    155 		if(type.isMatrix())
    156 		{
    157 			baseAlignment = ComponentsPerRegister;
    158 			matrixStride = ComponentsPerRegister;
    159 
    160 			if(arraySize > 0)
    161 			{
    162 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
    163 				arrayStride = ComponentsPerRegister * numRegisters;
    164 			}
    165 		}
    166 		else if(arraySize > 0)
    167 		{
    168 			baseAlignment = ComponentsPerRegister;
    169 			arrayStride = ComponentsPerRegister;
    170 		}
    171 		else
    172 		{
    173 			const size_t numComponents = type.getElementSize();
    174 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
    175 		}
    176 
    177 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
    178 
    179 		*matrixStrideOut = matrixStride;
    180 		*arrayStrideOut = arrayStride;
    181 	}
    182 
    183 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
    184 	{
    185 		if(arraySize > 0)
    186 		{
    187 			mCurrentOffset += arrayStride * arraySize;
    188 		}
    189 		else if(type.isMatrix())
    190 		{
    191 			ASSERT(matrixStride == ComponentsPerRegister);
    192 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
    193 			mCurrentOffset += ComponentsPerRegister * numRegisters;
    194 		}
    195 		else
    196 		{
    197 			mCurrentOffset += type.getElementSize();
    198 		}
    199 	}
    200 
    201 	Attribute::Attribute()
    202 	{
    203 		type = GL_NONE;
    204 		arraySize = 0;
    205 		registerIndex = 0;
    206 	}
    207 
    208 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)
    209 	{
    210 		this->type = type;
    211 		this->name = name;
    212 		this->arraySize = arraySize;
    213 		this->location = location;
    214 		this->registerIndex = registerIndex;
    215 	}
    216 
    217 	sw::PixelShader *Shader::getPixelShader() const
    218 	{
    219 		return 0;
    220 	}
    221 
    222 	sw::VertexShader *Shader::getVertexShader() const
    223 	{
    224 		return 0;
    225 	}
    226 
    227 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
    228 	{
    229 		TString name = TFunction::unmangleName(nodeName);
    230 
    231 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D")
    232 		{
    233 			method = IMPLICIT;
    234 		}
    235 		else if(name == "texture2DProj" || name == "textureProj")
    236 		{
    237 			method = IMPLICIT;
    238 			proj = true;
    239 		}
    240 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
    241 		{
    242 			method = LOD;
    243 		}
    244 		else if(name == "texture2DProjLod" || name == "textureProjLod")
    245 		{
    246 			method = LOD;
    247 			proj = true;
    248 		}
    249 		else if(name == "textureSize")
    250 		{
    251 			method = SIZE;
    252 		}
    253 		else if(name == "textureOffset")
    254 		{
    255 			method = IMPLICIT;
    256 			offset = true;
    257 		}
    258 		else if(name == "textureProjOffset")
    259 		{
    260 			method = IMPLICIT;
    261 			offset = true;
    262 			proj = true;
    263 		}
    264 		else if(name == "textureLodOffset")
    265 		{
    266 			method = LOD;
    267 			offset = true;
    268 		}
    269 		else if(name == "textureProjLodOffset")
    270 		{
    271 			method = LOD;
    272 			proj = true;
    273 			offset = true;
    274 		}
    275 		else if(name == "texelFetch")
    276 		{
    277 			method = FETCH;
    278 		}
    279 		else if(name == "texelFetchOffset")
    280 		{
    281 			method = FETCH;
    282 			offset = true;
    283 		}
    284 		else if(name == "textureGrad")
    285 		{
    286 			method = GRAD;
    287 		}
    288 		else if(name == "textureGradOffset")
    289 		{
    290 			method = GRAD;
    291 			offset = true;
    292 		}
    293 		else if(name == "textureProjGrad")
    294 		{
    295 			method = GRAD;
    296 			proj = true;
    297 		}
    298 		else if(name == "textureProjGradOffset")
    299 		{
    300 			method = GRAD;
    301 			proj = true;
    302 			offset = true;
    303 		}
    304 		else UNREACHABLE(0);
    305 	}
    306 
    307 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
    308 	{
    309 		shader = 0;
    310 		pixelShader = 0;
    311 		vertexShader = 0;
    312 
    313 		if(shaderObject)
    314 		{
    315 			shader = shaderObject->getShader();
    316 			pixelShader = shaderObject->getPixelShader();
    317 			vertexShader = shaderObject->getVertexShader();
    318 		}
    319 
    320 		functionArray.push_back(Function(0, "main(", 0, 0));
    321 		currentFunction = 0;
    322 		outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData
    323 	}
    324 
    325 	OutputASM::~OutputASM()
    326 	{
    327 	}
    328 
    329 	void OutputASM::output()
    330 	{
    331 		if(shader)
    332 		{
    333 			emitShader(GLOBAL);
    334 
    335 			if(functionArray.size() > 1)   // Only call main() when there are other functions
    336 			{
    337 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
    338 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
    339 				callMain->dst.index = 0;   // main()
    340 
    341 				emit(sw::Shader::OPCODE_RET);
    342 			}
    343 
    344 			emitShader(FUNCTION);
    345 		}
    346 	}
    347 
    348 	void OutputASM::emitShader(Scope scope)
    349 	{
    350 		emitScope = scope;
    351 		currentScope = GLOBAL;
    352 		mContext.getTreeRoot()->traverse(this);
    353 	}
    354 
    355 	void OutputASM::freeTemporary(Temporary *temporary)
    356 	{
    357 		free(temporaries, temporary);
    358 	}
    359 
    360 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
    361 	{
    362 		TBasicType baseType = in->getType().getBasicType();
    363 
    364 		switch(op)
    365 		{
    366 		case sw::Shader::OPCODE_NEG:
    367 			switch(baseType)
    368 			{
    369 			case EbtInt:
    370 			case EbtUInt:
    371 				return sw::Shader::OPCODE_INEG;
    372 			case EbtFloat:
    373 			default:
    374 				return op;
    375 			}
    376 		case sw::Shader::OPCODE_ABS:
    377 			switch(baseType)
    378 			{
    379 			case EbtInt:
    380 				return sw::Shader::OPCODE_IABS;
    381 			case EbtFloat:
    382 			default:
    383 				return op;
    384 			}
    385 		case sw::Shader::OPCODE_SGN:
    386 			switch(baseType)
    387 			{
    388 			case EbtInt:
    389 				return sw::Shader::OPCODE_ISGN;
    390 			case EbtFloat:
    391 			default:
    392 				return op;
    393 			}
    394 		case sw::Shader::OPCODE_ADD:
    395 			switch(baseType)
    396 			{
    397 			case EbtInt:
    398 			case EbtUInt:
    399 				return sw::Shader::OPCODE_IADD;
    400 			case EbtFloat:
    401 			default:
    402 				return op;
    403 			}
    404 		case sw::Shader::OPCODE_SUB:
    405 			switch(baseType)
    406 			{
    407 			case EbtInt:
    408 			case EbtUInt:
    409 				return sw::Shader::OPCODE_ISUB;
    410 			case EbtFloat:
    411 			default:
    412 				return op;
    413 			}
    414 		case sw::Shader::OPCODE_MUL:
    415 			switch(baseType)
    416 			{
    417 			case EbtInt:
    418 			case EbtUInt:
    419 				return sw::Shader::OPCODE_IMUL;
    420 			case EbtFloat:
    421 			default:
    422 				return op;
    423 			}
    424 		case sw::Shader::OPCODE_DIV:
    425 			switch(baseType)
    426 			{
    427 			case EbtInt:
    428 				return sw::Shader::OPCODE_IDIV;
    429 			case EbtUInt:
    430 				return sw::Shader::OPCODE_UDIV;
    431 			case EbtFloat:
    432 			default:
    433 				return op;
    434 			}
    435 		case sw::Shader::OPCODE_IMOD:
    436 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
    437 		case sw::Shader::OPCODE_ISHR:
    438 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
    439 		case sw::Shader::OPCODE_MIN:
    440 			switch(baseType)
    441 			{
    442 			case EbtInt:
    443 				return sw::Shader::OPCODE_IMIN;
    444 			case EbtUInt:
    445 				return sw::Shader::OPCODE_UMIN;
    446 			case EbtFloat:
    447 			default:
    448 				return op;
    449 			}
    450 		case sw::Shader::OPCODE_MAX:
    451 			switch(baseType)
    452 			{
    453 			case EbtInt:
    454 				return sw::Shader::OPCODE_IMAX;
    455 			case EbtUInt:
    456 				return sw::Shader::OPCODE_UMAX;
    457 			case EbtFloat:
    458 			default:
    459 				return op;
    460 			}
    461 		default:
    462 			return op;
    463 		}
    464 	}
    465 
    466 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
    467 	{
    468 		// Vertex varyings don't have to be actively used to successfully link
    469 		// against pixel shaders that use them. So make sure they're declared.
    470 		if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut)
    471 		{
    472 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
    473 			{
    474 				declareVarying(symbol, -1);
    475 			}
    476 		}
    477 
    478 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
    479 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
    480 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
    481 		// are considered active, even if they are not referenced in any shader in the program.
    482 		// The uniform block itself is also considered active, even if no member of the block is referenced."
    483 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
    484 		{
    485 			uniformRegister(symbol);
    486 		}
    487 	}
    488 
    489 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
    490 	{
    491 		if(currentScope != emitScope)
    492 		{
    493 			return false;
    494 		}
    495 
    496 		TIntermTyped *result = node;
    497 		TIntermTyped *left = node->getLeft();
    498 		TIntermTyped *right = node->getRight();
    499 		const TType &leftType = left->getType();
    500 		const TType &rightType = right->getType();
    501 
    502 		if(isSamplerRegister(result))
    503 		{
    504 			return false;   // Don't traverse, the register index is determined statically
    505 		}
    506 
    507 		switch(node->getOp())
    508 		{
    509 		case EOpAssign:
    510 			if(visit == PostVisit)
    511 			{
    512 				assignLvalue(left, right);
    513 				copy(result, right);
    514 			}
    515 			break;
    516 		case EOpInitialize:
    517 			if(visit == PostVisit)
    518 			{
    519 				copy(left, right);
    520 			}
    521 			break;
    522 		case EOpMatrixTimesScalarAssign:
    523 			if(visit == PostVisit)
    524 			{
    525 				for(int i = 0; i < leftType.getNominalSize(); i++)
    526 				{
    527 					emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
    528 				}
    529 
    530 				assignLvalue(left, result);
    531 			}
    532 			break;
    533 		case EOpVectorTimesMatrixAssign:
    534 			if(visit == PostVisit)
    535 			{
    536 				int size = leftType.getNominalSize();
    537 
    538 				for(int i = 0; i < size; i++)
    539 				{
    540 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
    541 					dot->dst.mask = 1 << i;
    542 				}
    543 
    544 				assignLvalue(left, result);
    545 			}
    546 			break;
    547 		case EOpMatrixTimesMatrixAssign:
    548 			if(visit == PostVisit)
    549 			{
    550 				int dim = leftType.getNominalSize();
    551 
    552 				for(int i = 0; i < dim; i++)
    553 				{
    554 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    555 					mul->src[1].swizzle = 0x00;
    556 
    557 					for(int j = 1; j < dim; j++)
    558 					{
    559 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
    560 						mad->src[1].swizzle = j * 0x55;
    561 					}
    562 				}
    563 
    564 				assignLvalue(left, result);
    565 			}
    566 			break;
    567 		case EOpIndexDirect:
    568 			if(visit == PostVisit)
    569 			{
    570 				int index = right->getAsConstantUnion()->getIConst(0);
    571 
    572 				if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock())
    573 				{
    574 					ASSERT(left->isArray());
    575 					copy(result, left, index * left->elementRegisterCount());
    576 				}
    577 				else if(result->isRegister())
    578 				{
    579 					int srcIndex = 0;
    580 					if(left->isRegister())
    581 					{
    582 						srcIndex = 0;
    583 					}
    584 					else if(left->isArray())
    585 					{
    586 						srcIndex = index * left->elementRegisterCount();
    587 					}
    588 					else if(left->isMatrix())
    589 					{
    590 						ASSERT(index < left->getNominalSize());   // FIXME: Report semantic error
    591 						srcIndex = index;
    592 					}
    593 					else UNREACHABLE(0);
    594 
    595 					Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, 0, left, srcIndex);
    596 
    597 					if(left->isRegister())
    598 					{
    599 						mov->src[0].swizzle = index;
    600 					}
    601 				}
    602 				else UNREACHABLE(0);
    603 			}
    604 			break;
    605 		case EOpIndexIndirect:
    606 			if(visit == PostVisit)
    607 			{
    608 				if(left->isArray() || left->isMatrix())
    609 				{
    610 					for(int index = 0; index < result->totalRegisterCount(); index++)
    611 					{
    612 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index, left, index);
    613 						mov->dst.mask = writeMask(result, index);
    614 
    615 						if(left->totalRegisterCount() > 1)
    616 						{
    617 							sw::Shader::SourceParameter relativeRegister;
    618 							argument(relativeRegister, right);
    619 
    620 							mov->src[0].rel.type = relativeRegister.type;
    621 							mov->src[0].rel.index = relativeRegister.index;
    622 							mov->src[0].rel.scale =	result->totalRegisterCount();
    623 							mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
    624 						}
    625 					}
    626 				}
    627 				else if(left->isRegister())
    628 				{
    629 					emit(sw::Shader::OPCODE_EXTRACT, result, left, right);
    630 				}
    631 				else UNREACHABLE(0);
    632 			}
    633 			break;
    634 		case EOpIndexDirectStruct:
    635 		case EOpIndexDirectInterfaceBlock:
    636 			if(visit == PostVisit)
    637 			{
    638 				ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock()));
    639 
    640 				const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ?
    641 				                           leftType.getStruct()->fields() :
    642 				                           leftType.getInterfaceBlock()->fields();
    643 				int index = right->getAsConstantUnion()->getIConst(0);
    644 				int fieldOffset = 0;
    645 
    646 				for(int i = 0; i < index; i++)
    647 				{
    648 					fieldOffset += fields[i]->type()->totalRegisterCount();
    649 				}
    650 
    651 				copy(result, left, fieldOffset);
    652 			}
    653 			break;
    654 		case EOpVectorSwizzle:
    655 			if(visit == PostVisit)
    656 			{
    657 				int swizzle = 0;
    658 				TIntermAggregate *components = right->getAsAggregate();
    659 
    660 				if(components)
    661 				{
    662 					TIntermSequence &sequence = components->getSequence();
    663 					int component = 0;
    664 
    665 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
    666 					{
    667 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
    668 
    669 						if(element)
    670 						{
    671 							int i = element->getUnionArrayPointer()[0].getIConst();
    672 							swizzle |= i << (component * 2);
    673 							component++;
    674 						}
    675 						else UNREACHABLE(0);
    676 					}
    677 				}
    678 				else UNREACHABLE(0);
    679 
    680 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
    681 				mov->src[0].swizzle = swizzle;
    682 			}
    683 			break;
    684 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
    685 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
    686 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
    687 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
    688 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
    689 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
    690 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
    691 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
    692 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
    693 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
    694 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
    695 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
    696 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
    697 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
    698 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
    699 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
    700 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
    701 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
    702 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
    703 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
    704 		case EOpEqual:
    705 			if(visit == PostVisit)
    706 			{
    707 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
    708 
    709 				for(int index = 1; index < left->totalRegisterCount(); index++)
    710 				{
    711 					Temporary equal(this);
    712 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
    713 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
    714 				}
    715 			}
    716 			break;
    717 		case EOpNotEqual:
    718 			if(visit == PostVisit)
    719 			{
    720 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
    721 
    722 				for(int index = 1; index < left->totalRegisterCount(); index++)
    723 				{
    724 					Temporary notEqual(this);
    725 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
    726 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
    727 				}
    728 			}
    729 			break;
    730 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
    731 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
    732 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
    733 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
    734 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
    735 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
    736 		case EOpMatrixTimesScalar:
    737 			if(visit == PostVisit)
    738 			{
    739 				if(left->isMatrix())
    740 				{
    741 					for(int i = 0; i < leftType.getNominalSize(); i++)
    742 					{
    743 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
    744 					}
    745 				}
    746 				else if(right->isMatrix())
    747 				{
    748 					for(int i = 0; i < rightType.getNominalSize(); i++)
    749 					{
    750 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    751 					}
    752 				}
    753 				else UNREACHABLE(0);
    754 			}
    755 			break;
    756 		case EOpVectorTimesMatrix:
    757 			if(visit == PostVisit)
    758 			{
    759 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
    760 
    761 				int size = rightType.getNominalSize();
    762 				for(int i = 0; i < size; i++)
    763 				{
    764 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
    765 					dot->dst.mask = 1 << i;
    766 				}
    767 			}
    768 			break;
    769 		case EOpMatrixTimesVector:
    770 			if(visit == PostVisit)
    771 			{
    772 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
    773 				mul->src[1].swizzle = 0x00;
    774 
    775 				int size = rightType.getNominalSize();
    776 				for(int i = 1; i < size; i++)
    777 				{
    778 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
    779 					mad->src[1].swizzle = i * 0x55;
    780 				}
    781 			}
    782 			break;
    783 		case EOpMatrixTimesMatrix:
    784 			if(visit == PostVisit)
    785 			{
    786 				int dim = leftType.getNominalSize();
    787 
    788 				int size = rightType.getNominalSize();
    789 				for(int i = 0; i < size; i++)
    790 				{
    791 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    792 					mul->src[1].swizzle = 0x00;
    793 
    794 					for(int j = 1; j < dim; j++)
    795 					{
    796 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
    797 						mad->src[1].swizzle = j * 0x55;
    798 					}
    799 				}
    800 			}
    801 			break;
    802 		case EOpLogicalOr:
    803 			if(trivial(right, 6))
    804 			{
    805 				if(visit == PostVisit)
    806 				{
    807 					emit(sw::Shader::OPCODE_OR, result, left, right);
    808 				}
    809 			}
    810 			else   // Short-circuit evaluation
    811 			{
    812 				if(visit == InVisit)
    813 				{
    814 					emit(sw::Shader::OPCODE_MOV, result, left);
    815 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
    816 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
    817 				}
    818 				else if(visit == PostVisit)
    819 				{
    820 					emit(sw::Shader::OPCODE_MOV, result, right);
    821 					emit(sw::Shader::OPCODE_ENDIF);
    822 				}
    823 			}
    824 			break;
    825 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
    826 		case EOpLogicalAnd:
    827 			if(trivial(right, 6))
    828 			{
    829 				if(visit == PostVisit)
    830 				{
    831 					emit(sw::Shader::OPCODE_AND, result, left, right);
    832 				}
    833 			}
    834 			else   // Short-circuit evaluation
    835 			{
    836 				if(visit == InVisit)
    837 				{
    838 					emit(sw::Shader::OPCODE_MOV, result, left);
    839 					emit(sw::Shader::OPCODE_IF, 0, result);
    840 				}
    841 				else if(visit == PostVisit)
    842 				{
    843 					emit(sw::Shader::OPCODE_MOV, result, right);
    844 					emit(sw::Shader::OPCODE_ENDIF);
    845 				}
    846 			}
    847 			break;
    848 		default: UNREACHABLE(node->getOp());
    849 		}
    850 
    851 		return true;
    852 	}
    853 
    854 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
    855 	{
    856 		switch(size)
    857 		{
    858 		case 1: // Used for cofactor computation only
    859 			{
    860 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
    861 				bool isMov = (row == col);
    862 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
    863 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
    864 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
    865 				mov->dst.mask = 1 << outRow;
    866 			}
    867 			break;
    868 		case 2:
    869 			{
    870 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
    871 
    872 				bool isCofactor = (col >= 0) && (row >= 0);
    873 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
    874 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
    875 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
    876 
    877 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
    878 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
    879 				det->dst.mask = 1 << outRow;
    880 			}
    881 			break;
    882 		case 3:
    883 			{
    884 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
    885 
    886 				bool isCofactor = (col >= 0) && (row >= 0);
    887 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
    888 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
    889 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
    890 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
    891 
    892 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
    893 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
    894 				det->dst.mask = 1 << outRow;
    895 			}
    896 			break;
    897 		case 4:
    898 			{
    899 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
    900 				det->dst.mask = 1 << outRow;
    901 			}
    902 			break;
    903 		default:
    904 			UNREACHABLE(size);
    905 			break;
    906 		}
    907 	}
    908 
    909 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
    910 	{
    911 		if(currentScope != emitScope)
    912 		{
    913 			return false;
    914 		}
    915 
    916 		TIntermTyped *result = node;
    917 		TIntermTyped *arg = node->getOperand();
    918 		TBasicType basicType = arg->getType().getBasicType();
    919 
    920 		union
    921 		{
    922 			float f;
    923 			int i;
    924 		} one_value;
    925 
    926 		if(basicType == EbtInt || basicType == EbtUInt)
    927 		{
    928 			one_value.i = 1;
    929 		}
    930 		else
    931 		{
    932 			one_value.f = 1.0f;
    933 		}
    934 
    935 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
    936 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
    937 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
    938 
    939 		switch(node->getOp())
    940 		{
    941 		case EOpNegative:
    942 			if(visit == PostVisit)
    943 			{
    944 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
    945 				for(int index = 0; index < arg->totalRegisterCount(); index++)
    946 				{
    947 					emit(negOpcode, result, index, arg, index);
    948 				}
    949 			}
    950 			break;
    951 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
    952 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
    953 		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
    954 		case EOpPostIncrement:
    955 			if(visit == PostVisit)
    956 			{
    957 				copy(result, arg);
    958 
    959 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
    960 				for(int index = 0; index < arg->totalRegisterCount(); index++)
    961 				{
    962 					emit(addOpcode, arg, index, arg, index, &one);
    963 				}
    964 
    965 				assignLvalue(arg, arg);
    966 			}
    967 			break;
    968 		case EOpPostDecrement:
    969 			if(visit == PostVisit)
    970 			{
    971 				copy(result, arg);
    972 
    973 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
    974 				for(int index = 0; index < arg->totalRegisterCount(); index++)
    975 				{
    976 					emit(subOpcode, arg, index, arg, index, &one);
    977 				}
    978 
    979 				assignLvalue(arg, arg);
    980 			}
    981 			break;
    982 		case EOpPreIncrement:
    983 			if(visit == PostVisit)
    984 			{
    985 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
    986 				for(int index = 0; index < arg->totalRegisterCount(); index++)
    987 				{
    988 					emit(addOpcode, result, index, arg, index, &one);
    989 				}
    990 
    991 				assignLvalue(arg, result);
    992 			}
    993 			break;
    994 		case EOpPreDecrement:
    995 			if(visit == PostVisit)
    996 			{
    997 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
    998 				for(int index = 0; index < arg->totalRegisterCount(); index++)
    999 				{
   1000 					emit(subOpcode, result, index, arg, index, &one);
   1001 				}
   1002 
   1003 				assignLvalue(arg, result);
   1004 			}
   1005 			break;
   1006 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
   1007 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
   1008 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
   1009 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
   1010 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
   1011 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
   1012 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
   1013 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
   1014 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
   1015 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
   1016 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
   1017 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
   1018 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
   1019 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
   1020 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
   1021 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
   1022 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
   1023 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
   1024 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
   1025 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
   1026 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
   1027 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
   1028 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
   1029 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
   1030 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
   1031 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
   1032 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
   1033 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
   1034 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
   1035 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
   1036 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
   1037 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
   1038 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
   1039 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
   1040 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
   1041 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
   1042 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
   1043 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
   1044 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
   1045 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
   1046 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
   1047 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
   1048 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
   1049 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
   1050 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
   1051 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
   1052 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
   1053 		case EOpTranspose:
   1054 			if(visit == PostVisit)
   1055 			{
   1056 				int numCols = arg->getNominalSize();
   1057 				int numRows = arg->getSecondarySize();
   1058 				for(int i = 0; i < numCols; ++i)
   1059 				{
   1060 					for(int j = 0; j < numRows; ++j)
   1061 					{
   1062 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
   1063 						mov->src[0].swizzle = 0x55 * j;
   1064 						mov->dst.mask = 1 << i;
   1065 					}
   1066 				}
   1067 			}
   1068 			break;
   1069 		case EOpDeterminant:
   1070 			if(visit == PostVisit)
   1071 			{
   1072 				int size = arg->getNominalSize();
   1073 				ASSERT(size == arg->getSecondarySize());
   1074 
   1075 				emitDeterminant(result, arg, size);
   1076 			}
   1077 			break;
   1078 		case EOpInverse:
   1079 			if(visit == PostVisit)
   1080 			{
   1081 				int size = arg->getNominalSize();
   1082 				ASSERT(size == arg->getSecondarySize());
   1083 
   1084 				// Compute transposed matrix of cofactors
   1085 				for(int i = 0; i < size; ++i)
   1086 				{
   1087 					for(int j = 0; j < size; ++j)
   1088 					{
   1089 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
   1090 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
   1091 						emitDeterminant(result, arg, size - 1, j, i, i, j);
   1092 					}
   1093 				}
   1094 
   1095 				// Compute 1 / determinant
   1096 				Temporary invDet(this);
   1097 				emitDeterminant(&invDet, arg, size);
   1098 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
   1099 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
   1100 				div->src[1].swizzle = 0x00; // xxxx
   1101 
   1102 				// Divide transposed matrix of cofactors by determinant
   1103 				for(int i = 0; i < size; ++i)
   1104 				{
   1105 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
   1106 				}
   1107 			}
   1108 			break;
   1109 		default: UNREACHABLE(node->getOp());
   1110 		}
   1111 
   1112 		return true;
   1113 	}
   1114 
   1115 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
   1116 	{
   1117 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
   1118 		{
   1119 			return false;
   1120 		}
   1121 
   1122 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
   1123 
   1124 		TIntermTyped *result = node;
   1125 		const TType &resultType = node->getType();
   1126 		TIntermSequence &arg = node->getSequence();
   1127 		size_t argumentCount = arg.size();
   1128 
   1129 		switch(node->getOp())
   1130 		{
   1131 		case EOpSequence:             break;
   1132 		case EOpDeclaration:          break;
   1133 		case EOpInvariantDeclaration: break;
   1134 		case EOpPrototype:            break;
   1135 		case EOpComma:
   1136 			if(visit == PostVisit)
   1137 			{
   1138 				copy(result, arg[1]);
   1139 			}
   1140 			break;
   1141 		case EOpFunction:
   1142 			if(visit == PreVisit)
   1143 			{
   1144 				const TString &name = node->getName();
   1145 
   1146 				if(emitScope == FUNCTION)
   1147 				{
   1148 					if(functionArray.size() > 1)   // No need for a label when there's only main()
   1149 					{
   1150 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
   1151 						label->dst.type = sw::Shader::PARAMETER_LABEL;
   1152 
   1153 						const Function *function = findFunction(name);
   1154 						ASSERT(function);   // Should have been added during global pass
   1155 						label->dst.index = function->label;
   1156 						currentFunction = function->label;
   1157 					}
   1158 				}
   1159 				else if(emitScope == GLOBAL)
   1160 				{
   1161 					if(name != "main(")
   1162 					{
   1163 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
   1164 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
   1165 					}
   1166 				}
   1167 				else UNREACHABLE(emitScope);
   1168 
   1169 				currentScope = FUNCTION;
   1170 			}
   1171 			else if(visit == PostVisit)
   1172 			{
   1173 				if(emitScope == FUNCTION)
   1174 				{
   1175 					if(functionArray.size() > 1)   // No need to return when there's only main()
   1176 					{
   1177 						emit(sw::Shader::OPCODE_RET);
   1178 					}
   1179 				}
   1180 
   1181 				currentScope = GLOBAL;
   1182 			}
   1183 			break;
   1184 		case EOpFunctionCall:
   1185 			if(visit == PostVisit)
   1186 			{
   1187 				if(node->isUserDefined())
   1188 				{
   1189 					const TString &name = node->getName();
   1190 					const Function *function = findFunction(name);
   1191 
   1192 					if(!function)
   1193 					{
   1194 						mContext.error(node->getLine(), "function definition not found", name.c_str());
   1195 						return false;
   1196 					}
   1197 
   1198 					TIntermSequence &arguments = *function->arg;
   1199 
   1200 					for(size_t i = 0; i < argumentCount; i++)
   1201 					{
   1202 						TIntermTyped *in = arguments[i]->getAsTyped();
   1203 
   1204 						if(in->getQualifier() == EvqIn ||
   1205 						   in->getQualifier() == EvqInOut ||
   1206 						   in->getQualifier() == EvqConstReadOnly)
   1207 						{
   1208 							copy(in, arg[i]);
   1209 						}
   1210 					}
   1211 
   1212 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
   1213 					call->dst.type = sw::Shader::PARAMETER_LABEL;
   1214 					call->dst.index = function->label;
   1215 
   1216 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
   1217 					{
   1218 						copy(result, function->ret);
   1219 					}
   1220 
   1221 					for(size_t i = 0; i < argumentCount; i++)
   1222 					{
   1223 						TIntermTyped *argument = arguments[i]->getAsTyped();
   1224 						TIntermTyped *out = arg[i]->getAsTyped();
   1225 
   1226 						if(argument->getQualifier() == EvqOut ||
   1227 						   argument->getQualifier() == EvqInOut)
   1228 						{
   1229 							assignLvalue(out, argument);
   1230 						}
   1231 					}
   1232 				}
   1233 				else
   1234 				{
   1235 					const TextureFunction textureFunction(node->getName());
   1236 					TIntermTyped *t = arg[1]->getAsTyped();
   1237 
   1238 					Temporary coord(this);
   1239 
   1240 					if(textureFunction.proj)
   1241 					{
   1242 						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
   1243 						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
   1244 						rcp->dst.mask = 0x7;
   1245 
   1246 						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
   1247 						mul->dst.mask = 0x7;
   1248 					}
   1249 					else
   1250 					{
   1251 						emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
   1252 					}
   1253 
   1254 					switch(textureFunction.method)
   1255 					{
   1256 					case TextureFunction::IMPLICIT:
   1257 						{
   1258 							TIntermNode* offset = textureFunction.offset ? arg[2] : 0;
   1259 
   1260 							if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3))
   1261 							{
   1262 								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,
   1263 								     result, &coord, arg[0], offset);
   1264 							}
   1265 							else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))   // bias
   1266 							{
   1267 								Instruction *bias = emit(sw::Shader::OPCODE_MOV, &coord, arg[textureFunction.offset ? 3 : 2]);
   1268 								bias->dst.mask = 0x8;
   1269 
   1270 								Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,
   1271 								                        result, &coord, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction
   1272 								tex->bias = true;
   1273 							}
   1274 							else UNREACHABLE(argumentCount);
   1275 						}
   1276 						break;
   1277 					case TextureFunction::LOD:
   1278 						{
   1279 							Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);
   1280 							lod->dst.mask = 0x8;
   1281 
   1282 							emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL,
   1283 							     result, &coord, arg[0], textureFunction.offset ? arg[3] : nullptr);
   1284 						}
   1285 						break;
   1286 					case TextureFunction::FETCH:
   1287 						{
   1288 							if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))
   1289 							{
   1290 								Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);
   1291 								lod->dst.mask = 0x8;
   1292 
   1293 								TIntermNode *offset = textureFunction.offset ? arg[3] : nullptr;
   1294 
   1295 								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH,
   1296 								     result, &coord, arg[0], offset);
   1297 							}
   1298 							else UNREACHABLE(argumentCount);
   1299 						}
   1300 						break;
   1301 					case TextureFunction::GRAD:
   1302 						{
   1303 							if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5))
   1304 							{
   1305 								TIntermNode *offset = textureFunction.offset ? arg[4] : nullptr;
   1306 
   1307 								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD,
   1308 								     result, &coord, arg[0], arg[2], arg[3], offset);
   1309 							}
   1310 							else UNREACHABLE(argumentCount);
   1311 						}
   1312 						break;
   1313 					case TextureFunction::SIZE:
   1314 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]);
   1315 						break;
   1316 					default:
   1317 						UNREACHABLE(textureFunction.method);
   1318 					}
   1319 				}
   1320 			}
   1321 			break;
   1322 		case EOpParameters:
   1323 			break;
   1324 		case EOpConstructFloat:
   1325 		case EOpConstructVec2:
   1326 		case EOpConstructVec3:
   1327 		case EOpConstructVec4:
   1328 		case EOpConstructBool:
   1329 		case EOpConstructBVec2:
   1330 		case EOpConstructBVec3:
   1331 		case EOpConstructBVec4:
   1332 		case EOpConstructInt:
   1333 		case EOpConstructIVec2:
   1334 		case EOpConstructIVec3:
   1335 		case EOpConstructIVec4:
   1336 		case EOpConstructUInt:
   1337 		case EOpConstructUVec2:
   1338 		case EOpConstructUVec3:
   1339 		case EOpConstructUVec4:
   1340 			if(visit == PostVisit)
   1341 			{
   1342 				int component = 0;
   1343 				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
   1344 				int arrayComponents = result->getType().getElementSize();
   1345 				for(size_t i = 0; i < argumentCount; i++)
   1346 				{
   1347 					TIntermTyped *argi = arg[i]->getAsTyped();
   1348 					int size = argi->getNominalSize();
   1349 					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
   1350 					int swizzle = component - (arrayIndex * arrayComponents);
   1351 
   1352 					if(!argi->isMatrix())
   1353 					{
   1354 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
   1355 						mov->dst.mask = (0xF << swizzle) & 0xF;
   1356 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
   1357 
   1358 						component += size;
   1359 					}
   1360 					else   // Matrix
   1361 					{
   1362 						int column = 0;
   1363 
   1364 						while(component < resultType.getNominalSize())
   1365 						{
   1366 							Instruction *mov = emitCast(result, arrayIndex, argi, column);
   1367 							mov->dst.mask = (0xF << swizzle) & 0xF;
   1368 							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
   1369 
   1370 							column++;
   1371 							component += size;
   1372 						}
   1373 					}
   1374 				}
   1375 			}
   1376 			break;
   1377 		case EOpConstructMat2:
   1378 		case EOpConstructMat2x3:
   1379 		case EOpConstructMat2x4:
   1380 		case EOpConstructMat3x2:
   1381 		case EOpConstructMat3:
   1382 		case EOpConstructMat3x4:
   1383 		case EOpConstructMat4x2:
   1384 		case EOpConstructMat4x3:
   1385 		case EOpConstructMat4:
   1386 			if(visit == PostVisit)
   1387 			{
   1388 				TIntermTyped *arg0 = arg[0]->getAsTyped();
   1389 				const int outCols = result->getNominalSize();
   1390 				const int outRows = result->getSecondarySize();
   1391 
   1392 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
   1393 				{
   1394 					for(int i = 0; i < outCols; i++)
   1395 					{
   1396 						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
   1397 						Instruction *mov = emitCast(result, i, arg0, 0);
   1398 						mov->dst.mask = 1 << i;
   1399 						ASSERT(mov->src[0].swizzle == 0x00);
   1400 					}
   1401 				}
   1402 				else if(arg0->isMatrix())
   1403 				{
   1404 					int arraySize = result->isArray() ? result->getArraySize() : 1;
   1405 
   1406 					for(int n = 0; n < arraySize; n++)
   1407 					{
   1408 						TIntermTyped *argi = arg[n]->getAsTyped();
   1409 						const int inCols = argi->getNominalSize();
   1410 						const int inRows = argi->getSecondarySize();
   1411 
   1412 						for(int i = 0; i < outCols; i++)
   1413 						{
   1414 							if(i >= inCols || outRows > inRows)
   1415 							{
   1416 								// Initialize to identity matrix
   1417 								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
   1418 								emitCast(result, i + n * outCols, &col, 0);
   1419 							}
   1420 
   1421 							if(i < inCols)
   1422 							{
   1423 								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
   1424 								mov->dst.mask = 0xF >> (4 - inRows);
   1425 							}
   1426 						}
   1427 					}
   1428 				}
   1429 				else
   1430 				{
   1431 					int column = 0;
   1432 					int row = 0;
   1433 
   1434 					for(size_t i = 0; i < argumentCount; i++)
   1435 					{
   1436 						TIntermTyped *argi = arg[i]->getAsTyped();
   1437 						int size = argi->getNominalSize();
   1438 						int element = 0;
   1439 
   1440 						while(element < size)
   1441 						{
   1442 							Instruction *mov = emitCast(result, column, argi, 0);
   1443 							mov->dst.mask = (0xF << row) & 0xF;
   1444 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
   1445 
   1446 							int end = row + size - element;
   1447 							column = end >= outRows ? column + 1 : column;
   1448 							element = element + outRows - row;
   1449 							row = end >= outRows ? 0 : end;
   1450 						}
   1451 					}
   1452 				}
   1453 			}
   1454 			break;
   1455 		case EOpConstructStruct:
   1456 			if(visit == PostVisit)
   1457 			{
   1458 				int offset = 0;
   1459 				for(size_t i = 0; i < argumentCount; i++)
   1460 				{
   1461 					TIntermTyped *argi = arg[i]->getAsTyped();
   1462 					int size = argi->totalRegisterCount();
   1463 
   1464 					for(int index = 0; index < size; index++)
   1465 					{
   1466 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
   1467 						mov->dst.mask = writeMask(result, offset + index);
   1468 					}
   1469 
   1470 					offset += size;
   1471 				}
   1472 			}
   1473 			break;
   1474 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
   1475 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
   1476 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
   1477 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
   1478 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
   1479 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
   1480 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
   1481 		case EOpModf:
   1482 			if(visit == PostVisit)
   1483 			{
   1484 				TIntermTyped* arg1 = arg[1]->getAsTyped();
   1485 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
   1486 				assignLvalue(arg1, arg1);
   1487 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
   1488 			}
   1489 			break;
   1490 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
   1491 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
   1492 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
   1493 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
   1494 		case EOpClamp:
   1495 			if(visit == PostVisit)
   1496 			{
   1497 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
   1498 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
   1499 			}
   1500 			break;
   1501 		case EOpMix:         if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break;
   1502 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
   1503 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
   1504 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
   1505 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
   1506 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
   1507 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
   1508 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
   1509 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
   1510 		case EOpMul:
   1511 			if(visit == PostVisit)
   1512 			{
   1513 				TIntermTyped *arg0 = arg[0]->getAsTyped();
   1514 				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
   1515 				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
   1516 
   1517 				int size = arg0->getNominalSize();
   1518 				for(int i = 0; i < size; i++)
   1519 				{
   1520 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
   1521 				}
   1522 			}
   1523 			break;
   1524 		case EOpOuterProduct:
   1525 			if(visit == PostVisit)
   1526 			{
   1527 				for(int i = 0; i < dim(arg[1]); i++)
   1528 				{
   1529 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
   1530 					mul->src[1].swizzle = 0x55 * i;
   1531 				}
   1532 			}
   1533 			break;
   1534 		default: UNREACHABLE(node->getOp());
   1535 		}
   1536 
   1537 		return true;
   1538 	}
   1539 
   1540 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
   1541 	{
   1542 		if(currentScope != emitScope)
   1543 		{
   1544 			return false;
   1545 		}
   1546 
   1547 		TIntermTyped *condition = node->getCondition();
   1548 		TIntermNode *trueBlock = node->getTrueBlock();
   1549 		TIntermNode *falseBlock = node->getFalseBlock();
   1550 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
   1551 
   1552 		condition->traverse(this);
   1553 
   1554 		if(node->usesTernaryOperator())
   1555 		{
   1556 			if(constantCondition)
   1557 			{
   1558 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   1559 
   1560 				if(trueCondition)
   1561 				{
   1562 					trueBlock->traverse(this);
   1563 					copy(node, trueBlock);
   1564 				}
   1565 				else
   1566 				{
   1567 					falseBlock->traverse(this);
   1568 					copy(node, falseBlock);
   1569 				}
   1570 			}
   1571 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
   1572 			{
   1573 				trueBlock->traverse(this);
   1574 				falseBlock->traverse(this);
   1575 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
   1576 			}
   1577 			else
   1578 			{
   1579 				emit(sw::Shader::OPCODE_IF, 0, condition);
   1580 
   1581 				if(trueBlock)
   1582 				{
   1583 					trueBlock->traverse(this);
   1584 					copy(node, trueBlock);
   1585 				}
   1586 
   1587 				if(falseBlock)
   1588 				{
   1589 					emit(sw::Shader::OPCODE_ELSE);
   1590 					falseBlock->traverse(this);
   1591 					copy(node, falseBlock);
   1592 				}
   1593 
   1594 				emit(sw::Shader::OPCODE_ENDIF);
   1595 			}
   1596 		}
   1597 		else  // if/else statement
   1598 		{
   1599 			if(constantCondition)
   1600 			{
   1601 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   1602 
   1603 				if(trueCondition)
   1604 				{
   1605 					if(trueBlock)
   1606 					{
   1607 						trueBlock->traverse(this);
   1608 					}
   1609 				}
   1610 				else
   1611 				{
   1612 					if(falseBlock)
   1613 					{
   1614 						falseBlock->traverse(this);
   1615 					}
   1616 				}
   1617 			}
   1618 			else
   1619 			{
   1620 				emit(sw::Shader::OPCODE_IF, 0, condition);
   1621 
   1622 				if(trueBlock)
   1623 				{
   1624 					trueBlock->traverse(this);
   1625 				}
   1626 
   1627 				if(falseBlock)
   1628 				{
   1629 					emit(sw::Shader::OPCODE_ELSE);
   1630 					falseBlock->traverse(this);
   1631 				}
   1632 
   1633 				emit(sw::Shader::OPCODE_ENDIF);
   1634 			}
   1635 		}
   1636 
   1637 		return false;
   1638 	}
   1639 
   1640 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
   1641 	{
   1642 		if(currentScope != emitScope)
   1643 		{
   1644 			return false;
   1645 		}
   1646 
   1647 		unsigned int iterations = loopCount(node);
   1648 
   1649 		if(iterations == 0)
   1650 		{
   1651 			return false;
   1652 		}
   1653 
   1654 		bool unroll = (iterations <= 4);
   1655 
   1656 		if(unroll)
   1657 		{
   1658 			LoopUnrollable loopUnrollable;
   1659 			unroll = loopUnrollable.traverse(node);
   1660 		}
   1661 
   1662 		TIntermNode *init = node->getInit();
   1663 		TIntermTyped *condition = node->getCondition();
   1664 		TIntermTyped *expression = node->getExpression();
   1665 		TIntermNode *body = node->getBody();
   1666 		Constant True(true);
   1667 
   1668 		if(node->getType() == ELoopDoWhile)
   1669 		{
   1670 			Temporary iterate(this);
   1671 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
   1672 
   1673 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
   1674 
   1675 			if(body)
   1676 			{
   1677 				body->traverse(this);
   1678 			}
   1679 
   1680 			emit(sw::Shader::OPCODE_TEST);
   1681 
   1682 			condition->traverse(this);
   1683 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
   1684 
   1685 			emit(sw::Shader::OPCODE_ENDWHILE);
   1686 		}
   1687 		else
   1688 		{
   1689 			if(init)
   1690 			{
   1691 				init->traverse(this);
   1692 			}
   1693 
   1694 			if(unroll)
   1695 			{
   1696 				for(unsigned int i = 0; i < iterations; i++)
   1697 				{
   1698 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
   1699 
   1700 					if(body)
   1701 					{
   1702 						body->traverse(this);
   1703 					}
   1704 
   1705 					if(expression)
   1706 					{
   1707 						expression->traverse(this);
   1708 					}
   1709 				}
   1710 			}
   1711 			else
   1712 			{
   1713 				if(condition)
   1714 				{
   1715 					condition->traverse(this);
   1716 				}
   1717 				else
   1718 				{
   1719 					condition = &True;
   1720 				}
   1721 
   1722 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
   1723 
   1724 				if(body)
   1725 				{
   1726 					body->traverse(this);
   1727 				}
   1728 
   1729 				emit(sw::Shader::OPCODE_TEST);
   1730 
   1731 				if(expression)
   1732 				{
   1733 					expression->traverse(this);
   1734 				}
   1735 
   1736 				if(condition)
   1737 				{
   1738 					condition->traverse(this);
   1739 				}
   1740 
   1741 				emit(sw::Shader::OPCODE_ENDWHILE);
   1742 			}
   1743 		}
   1744 
   1745 		return false;
   1746 	}
   1747 
   1748 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
   1749 	{
   1750 		if(currentScope != emitScope)
   1751 		{
   1752 			return false;
   1753 		}
   1754 
   1755 		switch(node->getFlowOp())
   1756 		{
   1757 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
   1758 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
   1759 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
   1760 		case EOpReturn:
   1761 			if(visit == PostVisit)
   1762 			{
   1763 				TIntermTyped *value = node->getExpression();
   1764 
   1765 				if(value)
   1766 				{
   1767 					copy(functionArray[currentFunction].ret, value);
   1768 				}
   1769 
   1770 				emit(sw::Shader::OPCODE_LEAVE);
   1771 			}
   1772 			break;
   1773 		default: UNREACHABLE(node->getFlowOp());
   1774 		}
   1775 
   1776 		return true;
   1777 	}
   1778 
   1779 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
   1780 	{
   1781 		if(currentScope != emitScope)
   1782 		{
   1783 			return false;
   1784 		}
   1785 
   1786 		TIntermTyped* switchValue = node->getInit();
   1787 		TIntermAggregate* opList = node->getStatementList();
   1788 
   1789 		if(!switchValue || !opList)
   1790 		{
   1791 			return false;
   1792 		}
   1793 
   1794 		switchValue->traverse(this);
   1795 
   1796 		emit(sw::Shader::OPCODE_SWITCH);
   1797 
   1798 		TIntermSequence& sequence = opList->getSequence();
   1799 		TIntermSequence::iterator it = sequence.begin();
   1800 		TIntermSequence::iterator defaultIt = sequence.end();
   1801 		int nbCases = 0;
   1802 		for(; it != sequence.end(); ++it)
   1803 		{
   1804 			TIntermCase* currentCase = (*it)->getAsCaseNode();
   1805 			if(currentCase)
   1806 			{
   1807 				TIntermSequence::iterator caseIt = it;
   1808 
   1809 				TIntermTyped* condition = currentCase->getCondition();
   1810 				if(condition) // non default case
   1811 				{
   1812 					if(nbCases != 0)
   1813 					{
   1814 						emit(sw::Shader::OPCODE_ELSE);
   1815 					}
   1816 
   1817 					condition->traverse(this);
   1818 					Temporary result(this);
   1819 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
   1820 					emit(sw::Shader::OPCODE_IF, 0, &result);
   1821 					nbCases++;
   1822 
   1823 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
   1824 					{
   1825 						(*caseIt)->traverse(this);
   1826 						if((*caseIt)->getAsBranchNode()) // Kill, Break, Continue or Return
   1827 						{
   1828 							break;
   1829 						}
   1830 					}
   1831 				}
   1832 				else
   1833 				{
   1834 					defaultIt = it; // The default case might not be the last case, keep it for last
   1835 				}
   1836 			}
   1837 		}
   1838 
   1839 		// If there's a default case, traverse it here
   1840 		if(defaultIt != sequence.end())
   1841 		{
   1842 			emit(sw::Shader::OPCODE_ELSE);
   1843 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
   1844 			{
   1845 				(*defaultIt)->traverse(this);
   1846 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
   1847 				{
   1848 					break;
   1849 				}
   1850 			}
   1851 		}
   1852 
   1853 		for(int i = 0; i < nbCases; ++i)
   1854 		{
   1855 			emit(sw::Shader::OPCODE_ENDIF);
   1856 		}
   1857 
   1858 		emit(sw::Shader::OPCODE_ENDSWITCH);
   1859 
   1860 		return false;
   1861 	}
   1862 
   1863 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
   1864 	{
   1865 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
   1866 	}
   1867 
   1868 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
   1869 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
   1870 	{
   1871 		Instruction *instruction = new Instruction(op);
   1872 
   1873 		if(dst)
   1874 		{
   1875 			instruction->dst.type = registerType(dst);
   1876 			instruction->dst.index = registerIndex(dst) + dstIndex;
   1877 			instruction->dst.mask = writeMask(dst);
   1878 			instruction->dst.integer = (dst->getBasicType() == EbtInt);
   1879 		}
   1880 
   1881 		argument(instruction->src[0], src0, index0);
   1882 		argument(instruction->src[1], src1, index1);
   1883 		argument(instruction->src[2], src2, index2);
   1884 		argument(instruction->src[3], src3, index3);
   1885 		argument(instruction->src[4], src4, index4);
   1886 
   1887 		shader->append(instruction);
   1888 
   1889 		return instruction;
   1890 	}
   1891 
   1892 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
   1893 	{
   1894 		return emitCast(dst, 0, src, 0);
   1895 	}
   1896 
   1897 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
   1898 	{
   1899 		switch(src->getBasicType())
   1900 		{
   1901 		case EbtBool:
   1902 			switch(dst->getBasicType())
   1903 			{
   1904 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
   1905 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
   1906 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
   1907 			default:       break;
   1908 			}
   1909 			break;
   1910 		case EbtInt:
   1911 			switch(dst->getBasicType())
   1912 			{
   1913 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
   1914 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
   1915 			default:       break;
   1916 			}
   1917 			break;
   1918 		case EbtUInt:
   1919 			switch(dst->getBasicType())
   1920 			{
   1921 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
   1922 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
   1923 			default:       break;
   1924 			}
   1925 			break;
   1926 		case EbtFloat:
   1927 			switch(dst->getBasicType())
   1928 			{
   1929 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
   1930 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
   1931 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
   1932 			default:      break;
   1933 			}
   1934 			break;
   1935 		default:
   1936 			break;
   1937 		}
   1938 
   1939 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
   1940 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
   1941 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
   1942 
   1943 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
   1944 	}
   1945 
   1946 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
   1947 	{
   1948 		for(int index = 0; index < dst->elementRegisterCount(); index++)
   1949 		{
   1950 			emit(op, dst, index, src0, index, src1, index, src2, index);
   1951 		}
   1952 	}
   1953 
   1954 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
   1955 	{
   1956 		emitBinary(op, result, src0, src1);
   1957 		assignLvalue(lhs, result);
   1958 	}
   1959 
   1960 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
   1961 	{
   1962 		sw::Shader::Opcode opcode;
   1963 		switch(left->getAsTyped()->getBasicType())
   1964 		{
   1965 		case EbtBool:
   1966 		case EbtInt:
   1967 			opcode = sw::Shader::OPCODE_ICMP;
   1968 			break;
   1969 		case EbtUInt:
   1970 			opcode = sw::Shader::OPCODE_UCMP;
   1971 			break;
   1972 		default:
   1973 			opcode = sw::Shader::OPCODE_CMP;
   1974 			break;
   1975 		}
   1976 
   1977 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
   1978 		cmp->control = cmpOp;
   1979 	}
   1980 
   1981 	int componentCount(const TType &type, int registers)
   1982 	{
   1983 		if(registers == 0)
   1984 		{
   1985 			return 0;
   1986 		}
   1987 
   1988 		if(type.isArray() && registers >= type.elementRegisterCount())
   1989 		{
   1990 			int index = registers / type.elementRegisterCount();
   1991 			registers -= index * type.elementRegisterCount();
   1992 			return index * type.getElementSize() + componentCount(type, registers);
   1993 		}
   1994 
   1995 		if(type.isStruct() || type.isInterfaceBlock())
   1996 		{
   1997 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
   1998 			int elements = 0;
   1999 
   2000 			for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)
   2001 			{
   2002 				const TType &fieldType = *((*field)->type());
   2003 
   2004 				if(fieldType.totalRegisterCount() <= registers)
   2005 				{
   2006 					registers -= fieldType.totalRegisterCount();
   2007 					elements += fieldType.getObjectSize();
   2008 				}
   2009 				else   // Register within this field
   2010 				{
   2011 					return elements + componentCount(fieldType, registers);
   2012 				}
   2013 			}
   2014 		}
   2015 		else if(type.isMatrix())
   2016 		{
   2017 			return registers * type.registerSize();
   2018 		}
   2019 
   2020 		UNREACHABLE(0);
   2021 		return 0;
   2022 	}
   2023 
   2024 	int registerSize(const TType &type, int registers)
   2025 	{
   2026 		if(registers == 0)
   2027 		{
   2028 			if(type.isStruct())
   2029 			{
   2030 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
   2031 			}
   2032 			else if(type.isInterfaceBlock())
   2033 			{
   2034 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
   2035 			}
   2036 
   2037 			return type.registerSize();
   2038 		}
   2039 
   2040 		if(type.isArray() && registers >= type.elementRegisterCount())
   2041 		{
   2042 			int index = registers / type.elementRegisterCount();
   2043 			registers -= index * type.elementRegisterCount();
   2044 			return registerSize(type, registers);
   2045 		}
   2046 
   2047 		if(type.isStruct() || type.isInterfaceBlock())
   2048 		{
   2049 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
   2050 			int elements = 0;
   2051 
   2052 			for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)
   2053 			{
   2054 				const TType &fieldType = *((*field)->type());
   2055 
   2056 				if(fieldType.totalRegisterCount() <= registers)
   2057 				{
   2058 					registers -= fieldType.totalRegisterCount();
   2059 					elements += fieldType.getObjectSize();
   2060 				}
   2061 				else   // Register within this field
   2062 				{
   2063 					return registerSize(fieldType, registers);
   2064 				}
   2065 			}
   2066 		}
   2067 		else if(type.isMatrix())
   2068 		{
   2069 			return registerSize(type, 0);
   2070 		}
   2071 
   2072 		UNREACHABLE(0);
   2073 		return 0;
   2074 	}
   2075 
   2076 	int OutputASM::getBlockId(TIntermTyped *arg)
   2077 	{
   2078 		if(arg)
   2079 		{
   2080 			const TType &type = arg->getType();
   2081 			TInterfaceBlock* block = type.getInterfaceBlock();
   2082 			if(block && (type.getQualifier() == EvqUniform))
   2083 			{
   2084 				// Make sure the uniform block is declared
   2085 				uniformRegister(arg);
   2086 
   2087 				const char* blockName = block->name().c_str();
   2088 
   2089 				// Fetch uniform block index from array of blocks
   2090 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
   2091 				{
   2092 					if(blockName == it->name)
   2093 					{
   2094 						return it->blockId;
   2095 					}
   2096 				}
   2097 
   2098 				ASSERT(false);
   2099 			}
   2100 		}
   2101 
   2102 		return -1;
   2103 	}
   2104 
   2105 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
   2106 	{
   2107 		const TType &type = arg->getType();
   2108 		int blockId = getBlockId(arg);
   2109 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
   2110 		if(blockId != -1)
   2111 		{
   2112 			argumentInfo.bufferIndex = 0;
   2113 			for(int i = 0; i < blockId; ++i)
   2114 			{
   2115 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
   2116 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
   2117 			}
   2118 
   2119 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
   2120 
   2121 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
   2122 			BlockDefinitionIndexMap::const_iterator it = itEnd;
   2123 
   2124 			argumentInfo.clampedIndex = index;
   2125 			if(type.isInterfaceBlock())
   2126 			{
   2127 				// Offset index to the beginning of the selected instance
   2128 				int blockRegisters = type.elementRegisterCount();
   2129 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
   2130 				argumentInfo.bufferIndex += bufferOffset;
   2131 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
   2132 			}
   2133 
   2134 			int regIndex = registerIndex(arg);
   2135 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
   2136 			{
   2137 				it = blockDefinition.find(i);
   2138 				if(it != itEnd)
   2139 				{
   2140 					argumentInfo.clampedIndex -= (i - regIndex);
   2141 					break;
   2142 				}
   2143 			}
   2144 			ASSERT(it != itEnd);
   2145 
   2146 			argumentInfo.typedMemberInfo = it->second;
   2147 
   2148 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
   2149 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
   2150 		}
   2151 		else
   2152 		{
   2153 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
   2154 		}
   2155 
   2156 		return argumentInfo;
   2157 	}
   2158 
   2159 	void OutputASM::argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
   2160 	{
   2161 		if(argument)
   2162 		{
   2163 			TIntermTyped *arg = argument->getAsTyped();
   2164 			Temporary unpackedUniform(this);
   2165 
   2166 			const TType& srcType = arg->getType();
   2167 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
   2168 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
   2169 			{
   2170 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
   2171 				const TType &memberType = argumentInfo.typedMemberInfo.type;
   2172 
   2173 				if(memberType.getBasicType() == EbtBool)
   2174 				{
   2175 					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
   2176 
   2177 					// Convert the packed bool, which is currently an int, to a true bool
   2178 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
   2179 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
   2180 					instruction->dst.index = registerIndex(&unpackedUniform);
   2181 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
   2182 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
   2183 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
   2184 
   2185 					shader->append(instruction);
   2186 
   2187 					arg = &unpackedUniform;
   2188 					index = 0;
   2189 				}
   2190 				else if((srcBlock->matrixPacking() == EmpRowMajor) && memberType.isMatrix())
   2191 				{
   2192 					int numCols = memberType.getNominalSize();
   2193 					int numRows = memberType.getSecondarySize();
   2194 
   2195 					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
   2196 
   2197 					unsigned int dstIndex = registerIndex(&unpackedUniform);
   2198 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
   2199 					int arrayIndex = argumentInfo.clampedIndex / numCols;
   2200 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
   2201 
   2202 					for(int j = 0; j < numRows; ++j)
   2203 					{
   2204 						// Transpose the row major matrix
   2205 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
   2206 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
   2207 						instruction->dst.index = dstIndex;
   2208 						instruction->dst.mask = 1 << j;
   2209 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
   2210 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
   2211 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
   2212 						instruction->src[0].swizzle = srcSwizzle;
   2213 
   2214 						shader->append(instruction);
   2215 					}
   2216 
   2217 					arg = &unpackedUniform;
   2218 					index = 0;
   2219 				}
   2220 			}
   2221 
   2222 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
   2223 			const TType &type = argumentInfo.typedMemberInfo.type;
   2224 
   2225 			int size = registerSize(type, argumentInfo.clampedIndex);
   2226 
   2227 			parameter.type = registerType(arg);
   2228 			parameter.bufferIndex = argumentInfo.bufferIndex;
   2229 
   2230 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
   2231 			{
   2232 				int component = componentCount(type, argumentInfo.clampedIndex);
   2233 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
   2234 
   2235 				for(int i = 0; i < 4; i++)
   2236 				{
   2237 					if(size == 1)   // Replicate
   2238 					{
   2239 						parameter.value[i] = constants[component + 0].getAsFloat();
   2240 					}
   2241 					else if(i < size)
   2242 					{
   2243 						parameter.value[i] = constants[component + i].getAsFloat();
   2244 					}
   2245 					else
   2246 					{
   2247 						parameter.value[i] = 0.0f;
   2248 					}
   2249 				}
   2250 			}
   2251 			else
   2252 			{
   2253 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
   2254 
   2255 				if(parameter.bufferIndex != -1)
   2256 				{
   2257 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
   2258 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
   2259 				}
   2260 			}
   2261 
   2262 			if(!IsSampler(arg->getBasicType()))
   2263 			{
   2264 				parameter.swizzle = readSwizzle(arg, size);
   2265 			}
   2266 		}
   2267 	}
   2268 
   2269 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
   2270 	{
   2271 		for(int index = 0; index < dst->totalRegisterCount(); index++)
   2272 		{
   2273 			Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
   2274 			mov->dst.mask = writeMask(dst, index);
   2275 		}
   2276 	}
   2277 
   2278 	int swizzleElement(int swizzle, int index)
   2279 	{
   2280 		return (swizzle >> (index * 2)) & 0x03;
   2281 	}
   2282 
   2283 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
   2284 	{
   2285 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
   2286 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
   2287 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
   2288 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
   2289 	}
   2290 
   2291 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
   2292 	{
   2293 		if(src &&
   2294 			((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
   2295 			 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))))
   2296 		{
   2297 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
   2298 		}
   2299 
   2300 		TIntermBinary *binary = dst->getAsBinaryNode();
   2301 
   2302 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
   2303 		{
   2304 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
   2305 
   2306 			Temporary address(this);
   2307 			lvalue(insert->dst, address, dst);
   2308 
   2309 			insert->src[0].type = insert->dst.type;
   2310 			insert->src[0].index = insert->dst.index;
   2311 			insert->src[0].rel = insert->dst.rel;
   2312 			argument(insert->src[1], src);
   2313 			argument(insert->src[2], binary->getRight());
   2314 
   2315 			shader->append(insert);
   2316 		}
   2317 		else
   2318 		{
   2319 			for(int offset = 0; offset < dst->totalRegisterCount(); offset++)
   2320 			{
   2321 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
   2322 
   2323 				Temporary address(this);
   2324 				int swizzle = lvalue(mov->dst, address, dst);
   2325 				mov->dst.index += offset;
   2326 
   2327 				if(offset > 0)
   2328 				{
   2329 					mov->dst.mask = writeMask(dst, offset);
   2330 				}
   2331 
   2332 				argument(mov->src[0], src, offset);
   2333 				mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle);
   2334 
   2335 				shader->append(mov);
   2336 			}
   2337 		}
   2338 	}
   2339 
   2340 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node)
   2341 	{
   2342 		TIntermTyped *result = node;
   2343 		TIntermBinary *binary = node->getAsBinaryNode();
   2344 		TIntermSymbol *symbol = node->getAsSymbolNode();
   2345 
   2346 		if(binary)
   2347 		{
   2348 			TIntermTyped *left = binary->getLeft();
   2349 			TIntermTyped *right = binary->getRight();
   2350 
   2351 			int leftSwizzle = lvalue(dst, address, left);   // Resolve the l-value of the left side
   2352 
   2353 			switch(binary->getOp())
   2354 			{
   2355 			case EOpIndexDirect:
   2356 				{
   2357 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
   2358 
   2359 					if(left->isRegister())
   2360 					{
   2361 						int leftMask = dst.mask;
   2362 
   2363 						dst.mask = 1;
   2364 						while((leftMask & dst.mask) == 0)
   2365 						{
   2366 							dst.mask = dst.mask << 1;
   2367 						}
   2368 
   2369 						int element = swizzleElement(leftSwizzle, rightIndex);
   2370 						dst.mask = 1 << element;
   2371 
   2372 						return element;
   2373 					}
   2374 					else if(left->isArray() || left->isMatrix())
   2375 					{
   2376 						dst.index += rightIndex * result->totalRegisterCount();
   2377 						return 0xE4;
   2378 					}
   2379 					else UNREACHABLE(0);
   2380 				}
   2381 				break;
   2382 			case EOpIndexIndirect:
   2383 				{
   2384 					if(left->isRegister())
   2385 					{
   2386 						// Requires INSERT instruction (handled by calling function)
   2387 					}
   2388 					else if(left->isArray() || left->isMatrix())
   2389 					{
   2390 						int scale = result->totalRegisterCount();
   2391 
   2392 						if(dst.rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
   2393 						{
   2394 							if(left->totalRegisterCount() > 1)
   2395 							{
   2396 								sw::Shader::SourceParameter relativeRegister;
   2397 								argument(relativeRegister, right);
   2398 
   2399 								dst.rel.index = relativeRegister.index;
   2400 								dst.rel.type = relativeRegister.type;
   2401 								dst.rel.scale = scale;
   2402 								dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
   2403 							}
   2404 						}
   2405 						else if(dst.rel.index != registerIndex(&address))   // Move the previous index register to the address register
   2406 						{
   2407 							if(scale == 1)
   2408 							{
   2409 								Constant oldScale((int)dst.rel.scale);
   2410 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
   2411 								mad->src[0].index = dst.rel.index;
   2412 								mad->src[0].type = dst.rel.type;
   2413 							}
   2414 							else
   2415 							{
   2416 								Constant oldScale((int)dst.rel.scale);
   2417 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
   2418 								mul->src[0].index = dst.rel.index;
   2419 								mul->src[0].type = dst.rel.type;
   2420 
   2421 								Constant newScale(scale);
   2422 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
   2423 							}
   2424 
   2425 							dst.rel.type = sw::Shader::PARAMETER_TEMP;
   2426 							dst.rel.index = registerIndex(&address);
   2427 							dst.rel.scale = 1;
   2428 						}
   2429 						else   // Just add the new index to the address register
   2430 						{
   2431 							if(scale == 1)
   2432 							{
   2433 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
   2434 							}
   2435 							else
   2436 							{
   2437 								Constant newScale(scale);
   2438 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
   2439 							}
   2440 						}
   2441 					}
   2442 					else UNREACHABLE(0);
   2443 				}
   2444 				break;
   2445 			case EOpIndexDirectStruct:
   2446 			case EOpIndexDirectInterfaceBlock:
   2447 				{
   2448 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
   2449 					                           left->getType().getStruct()->fields() :
   2450 					                           left->getType().getInterfaceBlock()->fields();
   2451 					int index = right->getAsConstantUnion()->getIConst(0);
   2452 					int fieldOffset = 0;
   2453 
   2454 					for(int i = 0; i < index; i++)
   2455 					{
   2456 						fieldOffset += fields[i]->type()->totalRegisterCount();
   2457 					}
   2458 
   2459 					dst.type = registerType(left);
   2460 					dst.index += fieldOffset;
   2461 					dst.mask = writeMask(result);
   2462 
   2463 					return 0xE4;
   2464 				}
   2465 				break;
   2466 			case EOpVectorSwizzle:
   2467 				{
   2468 					ASSERT(left->isRegister());
   2469 
   2470 					int leftMask = dst.mask;
   2471 
   2472 					int swizzle = 0;
   2473 					int rightMask = 0;
   2474 
   2475 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
   2476 
   2477 					for(unsigned int i = 0; i < sequence.size(); i++)
   2478 					{
   2479 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
   2480 
   2481 						int element = swizzleElement(leftSwizzle, index);
   2482 						rightMask = rightMask | (1 << element);
   2483 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
   2484 					}
   2485 
   2486 					dst.mask = leftMask & rightMask;
   2487 
   2488 					return swizzle;
   2489 				}
   2490 				break;
   2491 			default:
   2492 				UNREACHABLE(binary->getOp());   // Not an l-value operator
   2493 				break;
   2494 			}
   2495 		}
   2496 		else if(symbol)
   2497 		{
   2498 			dst.type = registerType(symbol);
   2499 			dst.index = registerIndex(symbol);
   2500 			dst.mask = writeMask(symbol);
   2501 			return 0xE4;
   2502 		}
   2503 
   2504 		return 0xE4;
   2505 	}
   2506 
   2507 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
   2508 	{
   2509 		if(isSamplerRegister(operand))
   2510 		{
   2511 			return sw::Shader::PARAMETER_SAMPLER;
   2512 		}
   2513 
   2514 		const TQualifier qualifier = operand->getQualifier();
   2515 		if((EvqFragColor == qualifier) || (EvqFragData == qualifier))
   2516 		{
   2517 			if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) ||
   2518 			   ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier)))
   2519 			{
   2520 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
   2521 			}
   2522 			outputQualifier = qualifier;
   2523 		}
   2524 
   2525 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
   2526 		{
   2527 			return sw::Shader::PARAMETER_TEMP;
   2528 		}
   2529 
   2530 		switch(qualifier)
   2531 		{
   2532 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
   2533 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
   2534 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
   2535 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
   2536 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
   2537 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
   2538 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
   2539 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
   2540 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
   2541 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
   2542 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
   2543 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
   2544 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
   2545 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
   2546 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
   2547 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
   2548 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
   2549 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
   2550 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
   2551 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
   2552 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
   2553 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
   2554 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
   2555 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
   2556 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
   2557 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
   2558 		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
   2559 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
   2560 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
   2561 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
   2562 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
   2563 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
   2564 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
   2565 		default: UNREACHABLE(qualifier);
   2566 		}
   2567 
   2568 		return sw::Shader::PARAMETER_VOID;
   2569 	}
   2570 
   2571 	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
   2572 	{
   2573 		const TQualifier qualifier = operand->getQualifier();
   2574 		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
   2575 	}
   2576 
   2577 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
   2578 	{
   2579 		if(isSamplerRegister(operand))
   2580 		{
   2581 			return samplerRegister(operand);
   2582 		}
   2583 
   2584 		switch(operand->getQualifier())
   2585 		{
   2586 		case EvqTemporary:           return temporaryRegister(operand);
   2587 		case EvqGlobal:              return temporaryRegister(operand);
   2588 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
   2589 		case EvqAttribute:           return attributeRegister(operand);
   2590 		case EvqVaryingIn:           return varyingRegister(operand);
   2591 		case EvqVaryingOut:          return varyingRegister(operand);
   2592 		case EvqVertexIn:            return attributeRegister(operand);
   2593 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
   2594 		case EvqVertexOut:           return varyingRegister(operand);
   2595 		case EvqFragmentIn:          return varyingRegister(operand);
   2596 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
   2597 		case EvqInvariantVaryingOut: return varyingRegister(operand);
   2598 		case EvqSmooth:              return varyingRegister(operand);
   2599 		case EvqFlat:                return varyingRegister(operand);
   2600 		case EvqCentroidOut:         return varyingRegister(operand);
   2601 		case EvqSmoothIn:            return varyingRegister(operand);
   2602 		case EvqFlatIn:              return varyingRegister(operand);
   2603 		case EvqCentroidIn:          return varyingRegister(operand);
   2604 		case EvqUniform:             return uniformRegister(operand);
   2605 		case EvqIn:                  return temporaryRegister(operand);
   2606 		case EvqOut:                 return temporaryRegister(operand);
   2607 		case EvqInOut:               return temporaryRegister(operand);
   2608 		case EvqConstReadOnly:       return temporaryRegister(operand);
   2609 		case EvqPosition:            return varyingRegister(operand);
   2610 		case EvqPointSize:           return varyingRegister(operand);
   2611 		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
   2612 		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
   2613 		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
   2614 		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
   2615 		case EvqPointCoord:          return varyingRegister(operand);
   2616 		case EvqFragColor:           return 0;
   2617 		case EvqFragData:            return fragmentOutputRegister(operand);
   2618 		case EvqFragDepth:           return 0;
   2619 		default: UNREACHABLE(operand->getQualifier());
   2620 		}
   2621 
   2622 		return 0;
   2623 	}
   2624 
   2625 	int OutputASM::writeMask(TIntermTyped *destination, int index)
   2626 	{
   2627 		if(destination->getQualifier() == EvqPointSize)
   2628 		{
   2629 			return 0x2;   // Point size stored in the y component
   2630 		}
   2631 
   2632 		return 0xF >> (4 - registerSize(destination->getType(), index));
   2633 	}
   2634 
   2635 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
   2636 	{
   2637 		if(argument->getQualifier() == EvqPointSize)
   2638 		{
   2639 			return 0x55;   // Point size stored in the y component
   2640 		}
   2641 
   2642 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
   2643 
   2644 		return swizzleSize[size];
   2645 	}
   2646 
   2647 	// Conservatively checks whether an expression is fast to compute and has no side effects
   2648 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
   2649 	{
   2650 		if(!expression->isRegister())
   2651 		{
   2652 			return false;
   2653 		}
   2654 
   2655 		return cost(expression, budget) >= 0;
   2656 	}
   2657 
   2658 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
   2659 	int OutputASM::cost(TIntermNode *expression, int budget)
   2660 	{
   2661 		if(budget < 0)
   2662 		{
   2663 			return budget;
   2664 		}
   2665 
   2666 		if(expression->getAsSymbolNode())
   2667 		{
   2668 			return budget;
   2669 		}
   2670 		else if(expression->getAsConstantUnion())
   2671 		{
   2672 			return budget;
   2673 		}
   2674 		else if(expression->getAsBinaryNode())
   2675 		{
   2676 			TIntermBinary *binary = expression->getAsBinaryNode();
   2677 
   2678 			switch(binary->getOp())
   2679 			{
   2680 			case EOpVectorSwizzle:
   2681 			case EOpIndexDirect:
   2682 			case EOpIndexDirectStruct:
   2683 			case EOpIndexDirectInterfaceBlock:
   2684 				return cost(binary->getLeft(), budget - 0);
   2685 			case EOpAdd:
   2686 			case EOpSub:
   2687 			case EOpMul:
   2688 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
   2689 			default:
   2690 				return -1;
   2691 			}
   2692 		}
   2693 		else if(expression->getAsUnaryNode())
   2694 		{
   2695 			TIntermUnary *unary = expression->getAsUnaryNode();
   2696 
   2697 			switch(unary->getOp())
   2698 			{
   2699 			case EOpAbs:
   2700 			case EOpNegative:
   2701 				return cost(unary->getOperand(), budget - 1);
   2702 			default:
   2703 				return -1;
   2704 			}
   2705 		}
   2706 		else if(expression->getAsSelectionNode())
   2707 		{
   2708 			TIntermSelection *selection = expression->getAsSelectionNode();
   2709 
   2710 			if(selection->usesTernaryOperator())
   2711 			{
   2712 				TIntermTyped *condition = selection->getCondition();
   2713 				TIntermNode *trueBlock = selection->getTrueBlock();
   2714 				TIntermNode *falseBlock = selection->getFalseBlock();
   2715 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
   2716 
   2717 				if(constantCondition)
   2718 				{
   2719 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   2720 
   2721 					if(trueCondition)
   2722 					{
   2723 						return cost(trueBlock, budget - 0);
   2724 					}
   2725 					else
   2726 					{
   2727 						return cost(falseBlock, budget - 0);
   2728 					}
   2729 				}
   2730 				else
   2731 				{
   2732 					return cost(trueBlock, cost(falseBlock, budget - 2));
   2733 				}
   2734 			}
   2735 		}
   2736 
   2737 		return -1;
   2738 	}
   2739 
   2740 	const Function *OutputASM::findFunction(const TString &name)
   2741 	{
   2742 		for(unsigned int f = 0; f < functionArray.size(); f++)
   2743 		{
   2744 			if(functionArray[f].name == name)
   2745 			{
   2746 				return &functionArray[f];
   2747 			}
   2748 		}
   2749 
   2750 		return 0;
   2751 	}
   2752 
   2753 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
   2754 	{
   2755 		return allocate(temporaries, temporary);
   2756 	}
   2757 
   2758 	int OutputASM::varyingRegister(TIntermTyped *varying)
   2759 	{
   2760 		int var = lookup(varyings, varying);
   2761 
   2762 		if(var == -1)
   2763 		{
   2764 			var = allocate(varyings, varying);
   2765 			int componentCount = varying->registerSize();
   2766 			int registerCount = varying->totalRegisterCount();
   2767 
   2768 			if(pixelShader)
   2769 			{
   2770 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
   2771 				{
   2772 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
   2773 					return 0;
   2774 				}
   2775 
   2776 				if(varying->getQualifier() == EvqPointCoord)
   2777 				{
   2778 					ASSERT(varying->isRegister());
   2779 					pixelShader->setInput(var, componentCount, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
   2780 				}
   2781 				else
   2782 				{
   2783 					for(int i = 0; i < varying->totalRegisterCount(); i++)
   2784 					{
   2785 						bool flat = hasFlatQualifier(varying);
   2786 
   2787 						pixelShader->setInput(var + i, componentCount, sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
   2788 					}
   2789 				}
   2790 			}
   2791 			else if(vertexShader)
   2792 			{
   2793 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
   2794 				{
   2795 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
   2796 					return 0;
   2797 				}
   2798 
   2799 				if(varying->getQualifier() == EvqPosition)
   2800 				{
   2801 					ASSERT(varying->isRegister());
   2802 					vertexShader->setPositionRegister(var);
   2803 				}
   2804 				else if(varying->getQualifier() == EvqPointSize)
   2805 				{
   2806 					ASSERT(varying->isRegister());
   2807 					vertexShader->setPointSizeRegister(var);
   2808 				}
   2809 				else
   2810 				{
   2811 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
   2812 				}
   2813 			}
   2814 			else UNREACHABLE(0);
   2815 
   2816 			declareVarying(varying, var);
   2817 		}
   2818 
   2819 		return var;
   2820 	}
   2821 
   2822 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
   2823 	{
   2824 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
   2825 		{
   2826 			const TType &type = varying->getType();
   2827 			const char *name = varying->getAsSymbolNode()->getSymbol().c_str();
   2828 			VaryingList &activeVaryings = shaderObject->varyings;
   2829 
   2830 			// Check if this varying has been declared before without having a register assigned
   2831 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
   2832 			{
   2833 				if(v->name == name)
   2834 				{
   2835 					if(reg >= 0)
   2836 					{
   2837 						ASSERT(v->reg < 0 || v->reg == reg);
   2838 						v->reg = reg;
   2839 					}
   2840 
   2841 					return;
   2842 				}
   2843 			}
   2844 
   2845 			activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0));
   2846 		}
   2847 	}
   2848 
   2849 	int OutputASM::uniformRegister(TIntermTyped *uniform)
   2850 	{
   2851 		const TType &type = uniform->getType();
   2852 		ASSERT(!IsSampler(type.getBasicType()));
   2853 		TInterfaceBlock *block = type.getAsInterfaceBlock();
   2854 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
   2855 		ASSERT(symbol || block);
   2856 
   2857 		if(symbol || block)
   2858 		{
   2859 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
   2860 			bool isBlockMember = (!block && parentBlock);
   2861 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
   2862 
   2863 			if(index == -1 || isBlockMember)
   2864 			{
   2865 				if(index == -1)
   2866 				{
   2867 					index = allocate(uniforms, uniform);
   2868 				}
   2869 
   2870 				// Verify if the current uniform is a member of an already declared block
   2871 				const TString &name = symbol ? symbol->getSymbol() : block->name();
   2872 				int blockMemberIndex = blockMemberLookup(type, name, index);
   2873 				if(blockMemberIndex == -1)
   2874 				{
   2875 					declareUniform(type, name, index);
   2876 				}
   2877 				else
   2878 				{
   2879 					index = blockMemberIndex;
   2880 				}
   2881 			}
   2882 
   2883 			return index;
   2884 		}
   2885 
   2886 		return 0;
   2887 	}
   2888 
   2889 	int OutputASM::attributeRegister(TIntermTyped *attribute)
   2890 	{
   2891 		ASSERT(!attribute->isArray());
   2892 
   2893 		int index = lookup(attributes, attribute);
   2894 
   2895 		if(index == -1)
   2896 		{
   2897 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
   2898 			ASSERT(symbol);
   2899 
   2900 			if(symbol)
   2901 			{
   2902 				index = allocate(attributes, attribute);
   2903 				const TType &type = attribute->getType();
   2904 				int registerCount = attribute->totalRegisterCount();
   2905 				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
   2906 				switch(type.getBasicType())
   2907 				{
   2908 				case EbtInt:
   2909 					attribType = sw::VertexShader::ATTRIBTYPE_INT;
   2910 					break;
   2911 				case EbtUInt:
   2912 					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
   2913 					break;
   2914 				case EbtFloat:
   2915 				default:
   2916 					break;
   2917 				}
   2918 
   2919 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
   2920 				{
   2921 					for(int i = 0; i < registerCount; i++)
   2922 					{
   2923 						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
   2924 					}
   2925 				}
   2926 
   2927 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
   2928 
   2929 				const char *name = symbol->getSymbol().c_str();
   2930 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
   2931 			}
   2932 		}
   2933 
   2934 		return index;
   2935 	}
   2936 
   2937 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
   2938 	{
   2939 		return allocate(fragmentOutputs, fragmentOutput);
   2940 	}
   2941 
   2942 	int OutputASM::samplerRegister(TIntermTyped *sampler)
   2943 	{
   2944 		const TType &type = sampler->getType();
   2945 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
   2946 
   2947 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
   2948 		TIntermBinary *binary = sampler->getAsBinaryNode();
   2949 
   2950 		if(symbol)
   2951 		{
   2952 			switch(type.getQualifier())
   2953 			{
   2954 			case EvqUniform:
   2955 				return samplerRegister(symbol);
   2956 			case EvqIn:
   2957 			case EvqConstReadOnly:
   2958 				// Function arguments are not (uniform) sampler registers
   2959 				return -1;
   2960 			default:
   2961 				UNREACHABLE(type.getQualifier());
   2962 			}
   2963 		}
   2964 		else if(binary)
   2965 		{
   2966 			TIntermTyped *left = binary->getLeft();
   2967 			TIntermTyped *right = binary->getRight();
   2968 			const TType &leftType = left->getType();
   2969 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
   2970 			int offset = 0;
   2971 
   2972 			switch(binary->getOp())
   2973 			{
   2974 			case EOpIndexDirect:
   2975 				ASSERT(left->isArray());
   2976 				offset = index * leftType.elementRegisterCount();
   2977 				break;
   2978 			case EOpIndexDirectStruct:
   2979 				ASSERT(leftType.isStruct());
   2980 				{
   2981 					const TFieldList &fields = leftType.getStruct()->fields();
   2982 
   2983 					for(int i = 0; i < index; i++)
   2984 					{
   2985 						offset += fields[i]->type()->totalRegisterCount();
   2986 					}
   2987 				}
   2988 				break;
   2989 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
   2990 				return -1;
   2991 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
   2992 			default:
   2993 				UNREACHABLE(binary->getOp());
   2994 				return -1;
   2995 			}
   2996 
   2997 			int base = samplerRegister(left);
   2998 
   2999 			if(base < 0)
   3000 			{
   3001 				return -1;
   3002 			}
   3003 
   3004 			return base + offset;
   3005 		}
   3006 
   3007 		UNREACHABLE(0);
   3008 		return -1;   // Not a (uniform) sampler register
   3009 	}
   3010 
   3011 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
   3012 	{
   3013 		const TType &type = sampler->getType();
   3014 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
   3015 
   3016 		int index = lookup(samplers, sampler);
   3017 
   3018 		if(index == -1)
   3019 		{
   3020 			index = allocate(samplers, sampler);
   3021 
   3022 			if(sampler->getQualifier() == EvqUniform)
   3023 			{
   3024 				const char *name = sampler->getSymbol().c_str();
   3025 				declareUniform(type, name, index);
   3026 			}
   3027 		}
   3028 
   3029 		return index;
   3030 	}
   3031 
   3032 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
   3033 	{
   3034 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
   3035 	}
   3036 
   3037 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
   3038 	{
   3039 		for(unsigned int i = 0; i < list.size(); i++)
   3040 		{
   3041 			if(list[i] == variable)
   3042 			{
   3043 				return i;   // Pointer match
   3044 			}
   3045 		}
   3046 
   3047 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
   3048 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
   3049 
   3050 		if(varBlock)
   3051 		{
   3052 			for(unsigned int i = 0; i < list.size(); i++)
   3053 			{
   3054 				if(list[i])
   3055 				{
   3056 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
   3057 
   3058 					if(listBlock)
   3059 					{
   3060 						if(listBlock->name() == varBlock->name())
   3061 						{
   3062 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
   3063 							ASSERT(listBlock->fields() == varBlock->fields());
   3064 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
   3065 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
   3066 
   3067 							return i;
   3068 						}
   3069 					}
   3070 				}
   3071 			}
   3072 		}
   3073 		else if(varSymbol)
   3074 		{
   3075 			for(unsigned int i = 0; i < list.size(); i++)
   3076 			{
   3077 				if(list[i])
   3078 				{
   3079 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
   3080 
   3081 					if(listSymbol)
   3082 					{
   3083 						if(listSymbol->getId() == varSymbol->getId())
   3084 						{
   3085 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
   3086 							ASSERT(listSymbol->getType() == varSymbol->getType());
   3087 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
   3088 
   3089 							return i;
   3090 						}
   3091 					}
   3092 				}
   3093 			}
   3094 		}
   3095 
   3096 		return -1;
   3097 	}
   3098 
   3099 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
   3100 	{
   3101 		for(unsigned int i = 0; i < list.size(); i++)
   3102 		{
   3103 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
   3104 			{
   3105 				return i;   // Pointer match
   3106 			}
   3107 		}
   3108 		return -1;
   3109 	}
   3110 
   3111 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable)
   3112 	{
   3113 		int index = lookup(list, variable);
   3114 
   3115 		if(index == -1)
   3116 		{
   3117 			unsigned int registerCount = variable->blockRegisterCount();
   3118 
   3119 			for(unsigned int i = 0; i < list.size(); i++)
   3120 			{
   3121 				if(list[i] == 0)
   3122 				{
   3123 					unsigned int j = 1;
   3124 					for( ; j < registerCount && (i + j) < list.size(); j++)
   3125 					{
   3126 						if(list[i + j] != 0)
   3127 						{
   3128 							break;
   3129 						}
   3130 					}
   3131 
   3132 					if(j == registerCount)   // Found free slots
   3133 					{
   3134 						for(unsigned int j = 0; j < registerCount; j++)
   3135 						{
   3136 							list[i + j] = variable;
   3137 						}
   3138 
   3139 						return i;
   3140 					}
   3141 				}
   3142 			}
   3143 
   3144 			index = list.size();
   3145 
   3146 			for(unsigned int i = 0; i < registerCount; i++)
   3147 			{
   3148 				list.push_back(variable);
   3149 			}
   3150 		}
   3151 
   3152 		return index;
   3153 	}
   3154 
   3155 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
   3156 	{
   3157 		int index = lookup(list, variable);
   3158 
   3159 		if(index >= 0)
   3160 		{
   3161 			list[index] = 0;
   3162 		}
   3163 	}
   3164 
   3165 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
   3166 	{
   3167 		const TInterfaceBlock *block = type.getInterfaceBlock();
   3168 
   3169 		if(block)
   3170 		{
   3171 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
   3172 			const TFieldList& fields = block->fields();
   3173 			const TString &blockName = block->name();
   3174 			int fieldRegisterIndex = registerIndex;
   3175 
   3176 			if(!type.isInterfaceBlock())
   3177 			{
   3178 				// This is a uniform that's part of a block, let's see if the block is already defined
   3179 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
   3180 				{
   3181 					if(activeUniformBlocks[i].name == blockName.c_str())
   3182 					{
   3183 						// The block is already defined, find the register for the current uniform and return it
   3184 						for(size_t j = 0; j < fields.size(); j++)
   3185 						{
   3186 							const TString &fieldName = fields[j]->name();
   3187 							if(fieldName == name)
   3188 							{
   3189 								return fieldRegisterIndex;
   3190 							}
   3191 
   3192 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
   3193 						}
   3194 
   3195 						ASSERT(false);
   3196 						return fieldRegisterIndex;
   3197 					}
   3198 				}
   3199 			}
   3200 		}
   3201 
   3202 		return -1;
   3203 	}
   3204 
   3205 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder)
   3206 	{
   3207 		const TStructure *structure = type.getStruct();
   3208 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
   3209 
   3210 		if(!structure && !block)
   3211 		{
   3212 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
   3213 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
   3214 			if(blockId >= 0)
   3215 			{
   3216 				blockDefinitions[blockId][registerIndex] = TypedMemberInfo(blockInfo, type);
   3217 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
   3218 			}
   3219 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
   3220 			activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(),
   3221 			                                 fieldRegisterIndex, blockId, blockInfo));
   3222 			if(IsSampler(type.getBasicType()))
   3223 			{
   3224 				for(int i = 0; i < type.totalRegisterCount(); i++)
   3225 				{
   3226 					shader->declareSampler(fieldRegisterIndex + i);
   3227 				}
   3228 			}
   3229 		}
   3230 		else if(block)
   3231 		{
   3232 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
   3233 			const TFieldList& fields = block->fields();
   3234 			const TString &blockName = block->name();
   3235 			int fieldRegisterIndex = registerIndex;
   3236 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
   3237 
   3238 			blockId = activeUniformBlocks.size();
   3239 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
   3240 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
   3241 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
   3242 			blockDefinitions.push_back(BlockDefinitionIndexMap());
   3243 
   3244 			Std140BlockEncoder currentBlockEncoder(isRowMajor);
   3245 			currentBlockEncoder.enterAggregateType();
   3246 			for(size_t i = 0; i < fields.size(); i++)
   3247 			{
   3248 				const TType &fieldType = *(fields[i]->type());
   3249 				const TString &fieldName = fields[i]->name();
   3250 				if(isUniformBlockMember && (fieldName == name))
   3251 				{
   3252 					registerIndex = fieldRegisterIndex;
   3253 				}
   3254 
   3255 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
   3256 
   3257 				declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, &currentBlockEncoder);
   3258 				fieldRegisterIndex += fieldType.totalRegisterCount();
   3259 			}
   3260 			currentBlockEncoder.exitAggregateType();
   3261 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
   3262 		}
   3263 		else
   3264 		{
   3265 			int fieldRegisterIndex = registerIndex;
   3266 
   3267 			const TFieldList& fields = structure->fields();
   3268 			if(type.isArray() && (structure || type.isInterfaceBlock()))
   3269 			{
   3270 				for(int i = 0; i < type.getArraySize(); i++)
   3271 				{
   3272 					if(encoder)
   3273 					{
   3274 						encoder->enterAggregateType();
   3275 					}
   3276 					for(size_t j = 0; j < fields.size(); j++)
   3277 					{
   3278 						const TType &fieldType = *(fields[j]->type());
   3279 						const TString &fieldName = fields[j]->name();
   3280 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
   3281 
   3282 						declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);
   3283 						fieldRegisterIndex += fieldType.totalRegisterCount();
   3284 					}
   3285 					if(encoder)
   3286 					{
   3287 						encoder->exitAggregateType();
   3288 					}
   3289 				}
   3290 			}
   3291 			else
   3292 			{
   3293 				if(encoder)
   3294 				{
   3295 					encoder->enterAggregateType();
   3296 				}
   3297 				for(size_t i = 0; i < fields.size(); i++)
   3298 				{
   3299 					const TType &fieldType = *(fields[i]->type());
   3300 					const TString &fieldName = fields[i]->name();
   3301 					const TString uniformName = name + "." + fieldName;
   3302 
   3303 					declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);
   3304 					fieldRegisterIndex += fieldType.totalRegisterCount();
   3305 				}
   3306 				if(encoder)
   3307 				{
   3308 					encoder->exitAggregateType();
   3309 				}
   3310 			}
   3311 		}
   3312 	}
   3313 
   3314 	GLenum OutputASM::glVariableType(const TType &type)
   3315 	{
   3316 		switch(type.getBasicType())
   3317 		{
   3318 		case EbtFloat:
   3319 			if(type.isScalar())
   3320 			{
   3321 				return GL_FLOAT;
   3322 			}
   3323 			else if(type.isVector())
   3324 			{
   3325 				switch(type.getNominalSize())
   3326 				{
   3327 				case 2: return GL_FLOAT_VEC2;
   3328 				case 3: return GL_FLOAT_VEC3;
   3329 				case 4: return GL_FLOAT_VEC4;
   3330 				default: UNREACHABLE(type.getNominalSize());
   3331 				}
   3332 			}
   3333 			else if(type.isMatrix())
   3334 			{
   3335 				switch(type.getNominalSize())
   3336 				{
   3337 				case 2:
   3338 					switch(type.getSecondarySize())
   3339 					{
   3340 					case 2: return GL_FLOAT_MAT2;
   3341 					case 3: return GL_FLOAT_MAT2x3;
   3342 					case 4: return GL_FLOAT_MAT2x4;
   3343 					default: UNREACHABLE(type.getSecondarySize());
   3344 					}
   3345 				case 3:
   3346 					switch(type.getSecondarySize())
   3347 					{
   3348 					case 2: return GL_FLOAT_MAT3x2;
   3349 					case 3: return GL_FLOAT_MAT3;
   3350 					case 4: return GL_FLOAT_MAT3x4;
   3351 					default: UNREACHABLE(type.getSecondarySize());
   3352 					}
   3353 				case 4:
   3354 					switch(type.getSecondarySize())
   3355 					{
   3356 					case 2: return GL_FLOAT_MAT4x2;
   3357 					case 3: return GL_FLOAT_MAT4x3;
   3358 					case 4: return GL_FLOAT_MAT4;
   3359 					default: UNREACHABLE(type.getSecondarySize());
   3360 					}
   3361 				default: UNREACHABLE(type.getNominalSize());
   3362 				}
   3363 			}
   3364 			else UNREACHABLE(0);
   3365 			break;
   3366 		case EbtInt:
   3367 			if(type.isScalar())
   3368 			{
   3369 				return GL_INT;
   3370 			}
   3371 			else if(type.isVector())
   3372 			{
   3373 				switch(type.getNominalSize())
   3374 				{
   3375 				case 2: return GL_INT_VEC2;
   3376 				case 3: return GL_INT_VEC3;
   3377 				case 4: return GL_INT_VEC4;
   3378 				default: UNREACHABLE(type.getNominalSize());
   3379 				}
   3380 			}
   3381 			else UNREACHABLE(0);
   3382 			break;
   3383 		case EbtUInt:
   3384 			if(type.isScalar())
   3385 			{
   3386 				return GL_UNSIGNED_INT;
   3387 			}
   3388 			else if(type.isVector())
   3389 			{
   3390 				switch(type.getNominalSize())
   3391 				{
   3392 				case 2: return GL_UNSIGNED_INT_VEC2;
   3393 				case 3: return GL_UNSIGNED_INT_VEC3;
   3394 				case 4: return GL_UNSIGNED_INT_VEC4;
   3395 				default: UNREACHABLE(type.getNominalSize());
   3396 				}
   3397 			}
   3398 			else UNREACHABLE(0);
   3399 			break;
   3400 		case EbtBool:
   3401 			if(type.isScalar())
   3402 			{
   3403 				return GL_BOOL;
   3404 			}
   3405 			else if(type.isVector())
   3406 			{
   3407 				switch(type.getNominalSize())
   3408 				{
   3409 				case 2: return GL_BOOL_VEC2;
   3410 				case 3: return GL_BOOL_VEC3;
   3411 				case 4: return GL_BOOL_VEC4;
   3412 				default: UNREACHABLE(type.getNominalSize());
   3413 				}
   3414 			}
   3415 			else UNREACHABLE(0);
   3416 			break;
   3417 		case EbtSampler2D:
   3418 			return GL_SAMPLER_2D;
   3419 		case EbtISampler2D:
   3420 			return GL_INT_SAMPLER_2D;
   3421 		case EbtUSampler2D:
   3422 			return GL_UNSIGNED_INT_SAMPLER_2D;
   3423 		case EbtSamplerCube:
   3424 			return GL_SAMPLER_CUBE;
   3425 		case EbtISamplerCube:
   3426 			return GL_INT_SAMPLER_CUBE;
   3427 		case EbtUSamplerCube:
   3428 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
   3429 		case EbtSamplerExternalOES:
   3430 			return GL_SAMPLER_EXTERNAL_OES;
   3431 		case EbtSampler3D:
   3432 			return GL_SAMPLER_3D_OES;
   3433 		case EbtISampler3D:
   3434 			return GL_INT_SAMPLER_3D;
   3435 		case EbtUSampler3D:
   3436 			return GL_UNSIGNED_INT_SAMPLER_3D;
   3437 		case EbtSampler2DArray:
   3438 			return GL_SAMPLER_2D_ARRAY;
   3439 		case EbtISampler2DArray:
   3440 			return GL_INT_SAMPLER_2D_ARRAY;
   3441 		case EbtUSampler2DArray:
   3442 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
   3443 		case EbtSampler2DShadow:
   3444 			return GL_SAMPLER_2D_SHADOW;
   3445 		case EbtSamplerCubeShadow:
   3446 			return GL_SAMPLER_CUBE_SHADOW;
   3447 		case EbtSampler2DArrayShadow:
   3448 			return GL_SAMPLER_2D_ARRAY_SHADOW;
   3449 		default:
   3450 			UNREACHABLE(type.getBasicType());
   3451 			break;
   3452 		}
   3453 
   3454 		return GL_NONE;
   3455 	}
   3456 
   3457 	GLenum OutputASM::glVariablePrecision(const TType &type)
   3458 	{
   3459 		if(type.getBasicType() == EbtFloat)
   3460 		{
   3461 			switch(type.getPrecision())
   3462 			{
   3463 			case EbpHigh:   return GL_HIGH_FLOAT;
   3464 			case EbpMedium: return GL_MEDIUM_FLOAT;
   3465 			case EbpLow:    return GL_LOW_FLOAT;
   3466 			case EbpUndefined:
   3467 				// Should be defined as the default precision by the parser
   3468 			default: UNREACHABLE(type.getPrecision());
   3469 			}
   3470 		}
   3471 		else if(type.getBasicType() == EbtInt)
   3472 		{
   3473 			switch(type.getPrecision())
   3474 			{
   3475 			case EbpHigh:   return GL_HIGH_INT;
   3476 			case EbpMedium: return GL_MEDIUM_INT;
   3477 			case EbpLow:    return GL_LOW_INT;
   3478 			case EbpUndefined:
   3479 				// Should be defined as the default precision by the parser
   3480 			default: UNREACHABLE(type.getPrecision());
   3481 			}
   3482 		}
   3483 
   3484 		// Other types (boolean, sampler) don't have a precision
   3485 		return GL_NONE;
   3486 	}
   3487 
   3488 	int OutputASM::dim(TIntermNode *v)
   3489 	{
   3490 		TIntermTyped *vector = v->getAsTyped();
   3491 		ASSERT(vector && vector->isRegister());
   3492 		return vector->getNominalSize();
   3493 	}
   3494 
   3495 	int OutputASM::dim2(TIntermNode *m)
   3496 	{
   3497 		TIntermTyped *matrix = m->getAsTyped();
   3498 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
   3499 		return matrix->getSecondarySize();
   3500 	}
   3501 
   3502 	// Returns ~0u if no loop count could be determined
   3503 	unsigned int OutputASM::loopCount(TIntermLoop *node)
   3504 	{
   3505 		// Parse loops of the form:
   3506 		// for(int index = initial; index [comparator] limit; index += increment)
   3507 		TIntermSymbol *index = 0;
   3508 		TOperator comparator = EOpNull;
   3509 		int initial = 0;
   3510 		int limit = 0;
   3511 		int increment = 0;
   3512 
   3513 		// Parse index name and intial value
   3514 		if(node->getInit())
   3515 		{
   3516 			TIntermAggregate *init = node->getInit()->getAsAggregate();
   3517 
   3518 			if(init)
   3519 			{
   3520 				TIntermSequence &sequence = init->getSequence();
   3521 				TIntermTyped *variable = sequence[0]->getAsTyped();
   3522 
   3523 				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
   3524 				{
   3525 					TIntermBinary *assign = variable->getAsBinaryNode();
   3526 
   3527 					if(assign && assign->getOp() == EOpInitialize)
   3528 					{
   3529 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
   3530 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
   3531 
   3532 						if(symbol && constant)
   3533 						{
   3534 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3535 							{
   3536 								index = symbol;
   3537 								initial = constant->getUnionArrayPointer()[0].getIConst();
   3538 							}
   3539 						}
   3540 					}
   3541 				}
   3542 			}
   3543 		}
   3544 
   3545 		// Parse comparator and limit value
   3546 		if(index && node->getCondition())
   3547 		{
   3548 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
   3549 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
   3550 
   3551 			if(left && (left->getId() == index->getId()))
   3552 			{
   3553 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
   3554 
   3555 				if(constant)
   3556 				{
   3557 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3558 					{
   3559 						comparator = test->getOp();
   3560 						limit = constant->getUnionArrayPointer()[0].getIConst();
   3561 					}
   3562 				}
   3563 			}
   3564 		}
   3565 
   3566 		// Parse increment
   3567 		if(index && comparator != EOpNull && node->getExpression())
   3568 		{
   3569 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
   3570 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
   3571 
   3572 			if(binaryTerminal)
   3573 			{
   3574 				TOperator op = binaryTerminal->getOp();
   3575 				TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
   3576 
   3577 				if(constant)
   3578 				{
   3579 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3580 					{
   3581 						int value = constant->getUnionArrayPointer()[0].getIConst();
   3582 
   3583 						switch(op)
   3584 						{
   3585 						case EOpAddAssign: increment = value;  break;
   3586 						case EOpSubAssign: increment = -value; break;
   3587 						default: UNIMPLEMENTED();
   3588 						}
   3589 					}
   3590 				}
   3591 			}
   3592 			else if(unaryTerminal)
   3593 			{
   3594 				TOperator op = unaryTerminal->getOp();
   3595 
   3596 				switch(op)
   3597 				{
   3598 				case EOpPostIncrement: increment = 1;  break;
   3599 				case EOpPostDecrement: increment = -1; break;
   3600 				case EOpPreIncrement:  increment = 1;  break;
   3601 				case EOpPreDecrement:  increment = -1; break;
   3602 				default: UNIMPLEMENTED();
   3603 				}
   3604 			}
   3605 		}
   3606 
   3607 		if(index && comparator != EOpNull && increment != 0)
   3608 		{
   3609 			if(comparator == EOpLessThanEqual)
   3610 			{
   3611 				comparator = EOpLessThan;
   3612 				limit += 1;
   3613 			}
   3614 			else if(comparator == EOpGreaterThanEqual)
   3615 			{
   3616 				comparator = EOpLessThan;
   3617 				limit -= 1;
   3618 				std::swap(initial, limit);
   3619 				increment = -increment;
   3620 			}
   3621 			else if(comparator == EOpGreaterThan)
   3622 			{
   3623 				comparator = EOpLessThan;
   3624 				std::swap(initial, limit);
   3625 				increment = -increment;
   3626 			}
   3627 
   3628 			if(comparator == EOpLessThan)
   3629 			{
   3630 				if(!(initial < limit))   // Never loops
   3631 				{
   3632 					return 0;
   3633 				}
   3634 
   3635 				int iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
   3636 
   3637 				if(iterations < 0)
   3638 				{
   3639 					return ~0u;
   3640 				}
   3641 
   3642 				return iterations;
   3643 			}
   3644 			else UNIMPLEMENTED();   // Falls through
   3645 		}
   3646 
   3647 		return ~0u;
   3648 	}
   3649 
   3650 	bool LoopUnrollable::traverse(TIntermNode *node)
   3651 	{
   3652 		loopDepth = 0;
   3653 		loopUnrollable = true;
   3654 
   3655 		node->traverse(this);
   3656 
   3657 		return loopUnrollable;
   3658 	}
   3659 
   3660 	bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop)
   3661 	{
   3662 		if(visit == PreVisit)
   3663 		{
   3664 			loopDepth++;
   3665 		}
   3666 		else if(visit == PostVisit)
   3667 		{
   3668 			loopDepth++;
   3669 		}
   3670 
   3671 		return true;
   3672 	}
   3673 
   3674 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
   3675 	{
   3676 		if(!loopUnrollable)
   3677 		{
   3678 			return false;
   3679 		}
   3680 
   3681 		if(!loopDepth)
   3682 		{
   3683 			return true;
   3684 		}
   3685 
   3686 		switch(node->getFlowOp())
   3687 		{
   3688 		case EOpKill:
   3689 		case EOpReturn:
   3690 			break;
   3691 		case EOpBreak:
   3692 		case EOpContinue:
   3693 			loopUnrollable = false;
   3694 			break;
   3695 		default: UNREACHABLE(node->getFlowOp());
   3696 		}
   3697 
   3698 		return loopUnrollable;
   3699 	}
   3700 
   3701 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
   3702 	{
   3703 		return loopUnrollable;
   3704 	}
   3705 }
   3706