Home | History | Annotate | Download | only in compiler
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "OutputASM.h"
     16 #include "Common/Math.hpp"
     17 
     18 #include "common/debug.h"
     19 #include "InfoSink.h"
     20 
     21 #include "libGLESv2/Shader.h"
     22 
     23 #include <GLES2/gl2.h>
     24 #include <GLES2/gl2ext.h>
     25 #include <GLES3/gl3.h>
     26 
     27 #include <stdlib.h>
     28 
     29 namespace
     30 {
     31 	GLenum glVariableType(const TType &type)
     32 	{
     33 		switch(type.getBasicType())
     34 		{
     35 		case EbtFloat:
     36 			if(type.isScalar())
     37 			{
     38 				return GL_FLOAT;
     39 			}
     40 			else if(type.isVector())
     41 			{
     42 				switch(type.getNominalSize())
     43 				{
     44 				case 2: return GL_FLOAT_VEC2;
     45 				case 3: return GL_FLOAT_VEC3;
     46 				case 4: return GL_FLOAT_VEC4;
     47 				default: UNREACHABLE(type.getNominalSize());
     48 				}
     49 			}
     50 			else if(type.isMatrix())
     51 			{
     52 				switch(type.getNominalSize())
     53 				{
     54 				case 2:
     55 					switch(type.getSecondarySize())
     56 					{
     57 					case 2: return GL_FLOAT_MAT2;
     58 					case 3: return GL_FLOAT_MAT2x3;
     59 					case 4: return GL_FLOAT_MAT2x4;
     60 					default: UNREACHABLE(type.getSecondarySize());
     61 					}
     62 				case 3:
     63 					switch(type.getSecondarySize())
     64 					{
     65 					case 2: return GL_FLOAT_MAT3x2;
     66 					case 3: return GL_FLOAT_MAT3;
     67 					case 4: return GL_FLOAT_MAT3x4;
     68 					default: UNREACHABLE(type.getSecondarySize());
     69 					}
     70 				case 4:
     71 					switch(type.getSecondarySize())
     72 					{
     73 					case 2: return GL_FLOAT_MAT4x2;
     74 					case 3: return GL_FLOAT_MAT4x3;
     75 					case 4: return GL_FLOAT_MAT4;
     76 					default: UNREACHABLE(type.getSecondarySize());
     77 					}
     78 				default: UNREACHABLE(type.getNominalSize());
     79 				}
     80 			}
     81 			else UNREACHABLE(0);
     82 			break;
     83 		case EbtInt:
     84 			if(type.isScalar())
     85 			{
     86 				return GL_INT;
     87 			}
     88 			else if(type.isVector())
     89 			{
     90 				switch(type.getNominalSize())
     91 				{
     92 				case 2: return GL_INT_VEC2;
     93 				case 3: return GL_INT_VEC3;
     94 				case 4: return GL_INT_VEC4;
     95 				default: UNREACHABLE(type.getNominalSize());
     96 				}
     97 			}
     98 			else UNREACHABLE(0);
     99 			break;
    100 		case EbtUInt:
    101 			if(type.isScalar())
    102 			{
    103 				return GL_UNSIGNED_INT;
    104 			}
    105 			else if(type.isVector())
    106 			{
    107 				switch(type.getNominalSize())
    108 				{
    109 				case 2: return GL_UNSIGNED_INT_VEC2;
    110 				case 3: return GL_UNSIGNED_INT_VEC3;
    111 				case 4: return GL_UNSIGNED_INT_VEC4;
    112 				default: UNREACHABLE(type.getNominalSize());
    113 				}
    114 			}
    115 			else UNREACHABLE(0);
    116 			break;
    117 		case EbtBool:
    118 			if(type.isScalar())
    119 			{
    120 				return GL_BOOL;
    121 			}
    122 			else if(type.isVector())
    123 			{
    124 				switch(type.getNominalSize())
    125 				{
    126 				case 2: return GL_BOOL_VEC2;
    127 				case 3: return GL_BOOL_VEC3;
    128 				case 4: return GL_BOOL_VEC4;
    129 				default: UNREACHABLE(type.getNominalSize());
    130 				}
    131 			}
    132 			else UNREACHABLE(0);
    133 			break;
    134 		case EbtSampler2D:
    135 			return GL_SAMPLER_2D;
    136 		case EbtISampler2D:
    137 			return GL_INT_SAMPLER_2D;
    138 		case EbtUSampler2D:
    139 			return GL_UNSIGNED_INT_SAMPLER_2D;
    140 		case EbtSamplerCube:
    141 			return GL_SAMPLER_CUBE;
    142 		case EbtSampler2DRect:
    143 			return GL_SAMPLER_2D_RECT_ARB;
    144 		case EbtISamplerCube:
    145 			return GL_INT_SAMPLER_CUBE;
    146 		case EbtUSamplerCube:
    147 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
    148 		case EbtSamplerExternalOES:
    149 			return GL_SAMPLER_EXTERNAL_OES;
    150 		case EbtSampler3D:
    151 			return GL_SAMPLER_3D_OES;
    152 		case EbtISampler3D:
    153 			return GL_INT_SAMPLER_3D;
    154 		case EbtUSampler3D:
    155 			return GL_UNSIGNED_INT_SAMPLER_3D;
    156 		case EbtSampler2DArray:
    157 			return GL_SAMPLER_2D_ARRAY;
    158 		case EbtISampler2DArray:
    159 			return GL_INT_SAMPLER_2D_ARRAY;
    160 		case EbtUSampler2DArray:
    161 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
    162 		case EbtSampler2DShadow:
    163 			return GL_SAMPLER_2D_SHADOW;
    164 		case EbtSamplerCubeShadow:
    165 			return GL_SAMPLER_CUBE_SHADOW;
    166 		case EbtSampler2DArrayShadow:
    167 			return GL_SAMPLER_2D_ARRAY_SHADOW;
    168 		default:
    169 			UNREACHABLE(type.getBasicType());
    170 			break;
    171 		}
    172 
    173 		return GL_NONE;
    174 	}
    175 
    176 	GLenum glVariablePrecision(const TType &type)
    177 	{
    178 		if(type.getBasicType() == EbtFloat)
    179 		{
    180 			switch(type.getPrecision())
    181 			{
    182 			case EbpHigh:   return GL_HIGH_FLOAT;
    183 			case EbpMedium: return GL_MEDIUM_FLOAT;
    184 			case EbpLow:    return GL_LOW_FLOAT;
    185 			case EbpUndefined:
    186 				// Should be defined as the default precision by the parser
    187 			default: UNREACHABLE(type.getPrecision());
    188 			}
    189 		}
    190 		else if(type.getBasicType() == EbtInt)
    191 		{
    192 			switch(type.getPrecision())
    193 			{
    194 			case EbpHigh:   return GL_HIGH_INT;
    195 			case EbpMedium: return GL_MEDIUM_INT;
    196 			case EbpLow:    return GL_LOW_INT;
    197 			case EbpUndefined:
    198 				// Should be defined as the default precision by the parser
    199 			default: UNREACHABLE(type.getPrecision());
    200 			}
    201 		}
    202 
    203 		// Other types (boolean, sampler) don't have a precision
    204 		return GL_NONE;
    205 	}
    206 }
    207 
    208 namespace glsl
    209 {
    210 	// Integer to TString conversion
    211 	TString str(int i)
    212 	{
    213 		char buffer[20];
    214 		sprintf(buffer, "%d", i);
    215 		return buffer;
    216 	}
    217 
    218 	class Temporary : public TIntermSymbol
    219 	{
    220 	public:
    221 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
    222 		{
    223 		}
    224 
    225 		~Temporary()
    226 		{
    227 			assembler->freeTemporary(this);
    228 		}
    229 
    230 	private:
    231 		OutputASM *const assembler;
    232 	};
    233 
    234 	class Constant : public TIntermConstantUnion
    235 	{
    236 	public:
    237 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
    238 		{
    239 			constants[0].setFConst(x);
    240 			constants[1].setFConst(y);
    241 			constants[2].setFConst(z);
    242 			constants[3].setFConst(w);
    243 		}
    244 
    245 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
    246 		{
    247 			constants[0].setBConst(b);
    248 		}
    249 
    250 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
    251 		{
    252 			constants[0].setIConst(i);
    253 		}
    254 
    255 		~Constant()
    256 		{
    257 		}
    258 
    259 	private:
    260 		ConstantUnion constants[4];
    261 	};
    262 
    263 	ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) :
    264 		type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)),
    265 		name(name), arraySize(type.getArraySize()), registerIndex(registerIndex)
    266 	{
    267 		if(type.isStruct())
    268 		{
    269 			for(const auto& field : type.getStruct()->fields())
    270 			{
    271 				fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1));
    272 			}
    273 		}
    274 	}
    275 
    276 	Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
    277 		ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
    278 	{
    279 	}
    280 
    281 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
    282 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
    283 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
    284 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
    285 	{
    286 	}
    287 
    288 	BlockLayoutEncoder::BlockLayoutEncoder()
    289 		: mCurrentOffset(0)
    290 	{
    291 	}
    292 
    293 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
    294 	{
    295 		int arrayStride;
    296 		int matrixStride;
    297 
    298 		bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor;
    299 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
    300 
    301 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
    302 		                                 static_cast<int>(arrayStride * BytesPerComponent),
    303 		                                 static_cast<int>(matrixStride * BytesPerComponent),
    304 		                                 (matrixStride > 0) && isRowMajor);
    305 
    306 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
    307 
    308 		return memberInfo;
    309 	}
    310 
    311 	// static
    312 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
    313 	{
    314 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
    315 	}
    316 
    317 	// static
    318 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
    319 	{
    320 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
    321 	}
    322 
    323 	void BlockLayoutEncoder::nextRegister()
    324 	{
    325 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
    326 	}
    327 
    328 	Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder()
    329 	{
    330 	}
    331 
    332 	void Std140BlockEncoder::enterAggregateType()
    333 	{
    334 		nextRegister();
    335 	}
    336 
    337 	void Std140BlockEncoder::exitAggregateType()
    338 	{
    339 		nextRegister();
    340 	}
    341 
    342 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
    343 	{
    344 		size_t baseAlignment = 0;
    345 		int matrixStride = 0;
    346 		int arrayStride = 0;
    347 
    348 		if(type.isMatrix())
    349 		{
    350 			baseAlignment = ComponentsPerRegister;
    351 			matrixStride = ComponentsPerRegister;
    352 
    353 			if(arraySize > 0)
    354 			{
    355 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
    356 				arrayStride = ComponentsPerRegister * numRegisters;
    357 			}
    358 		}
    359 		else if(arraySize > 0)
    360 		{
    361 			baseAlignment = ComponentsPerRegister;
    362 			arrayStride = ComponentsPerRegister;
    363 		}
    364 		else
    365 		{
    366 			const size_t numComponents = type.getElementSize();
    367 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
    368 		}
    369 
    370 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
    371 
    372 		*matrixStrideOut = matrixStride;
    373 		*arrayStrideOut = arrayStride;
    374 	}
    375 
    376 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
    377 	{
    378 		if(arraySize > 0)
    379 		{
    380 			mCurrentOffset += arrayStride * arraySize;
    381 		}
    382 		else if(type.isMatrix())
    383 		{
    384 			ASSERT(matrixStride == ComponentsPerRegister);
    385 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
    386 			mCurrentOffset += ComponentsPerRegister * numRegisters;
    387 		}
    388 		else
    389 		{
    390 			mCurrentOffset += type.getElementSize();
    391 		}
    392 	}
    393 
    394 	Attribute::Attribute()
    395 	{
    396 		type = GL_NONE;
    397 		arraySize = 0;
    398 		registerIndex = 0;
    399 	}
    400 
    401 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)
    402 	{
    403 		this->type = type;
    404 		this->name = name;
    405 		this->arraySize = arraySize;
    406 		this->location = location;
    407 		this->registerIndex = registerIndex;
    408 	}
    409 
    410 	sw::PixelShader *Shader::getPixelShader() const
    411 	{
    412 		return nullptr;
    413 	}
    414 
    415 	sw::VertexShader *Shader::getVertexShader() const
    416 	{
    417 		return nullptr;
    418 	}
    419 
    420 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
    421 	{
    422 		TString name = TFunction::unmangleName(nodeName);
    423 
    424 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect")
    425 		{
    426 			method = IMPLICIT;
    427 		}
    428 		else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj")
    429 		{
    430 			method = IMPLICIT;
    431 			proj = true;
    432 		}
    433 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
    434 		{
    435 			method = LOD;
    436 		}
    437 		else if(name == "texture2DProjLod" || name == "textureProjLod")
    438 		{
    439 			method = LOD;
    440 			proj = true;
    441 		}
    442 		else if(name == "textureSize")
    443 		{
    444 			method = SIZE;
    445 		}
    446 		else if(name == "textureOffset")
    447 		{
    448 			method = IMPLICIT;
    449 			offset = true;
    450 		}
    451 		else if(name == "textureProjOffset")
    452 		{
    453 			method = IMPLICIT;
    454 			offset = true;
    455 			proj = true;
    456 		}
    457 		else if(name == "textureLodOffset")
    458 		{
    459 			method = LOD;
    460 			offset = true;
    461 		}
    462 		else if(name == "textureProjLodOffset")
    463 		{
    464 			method = LOD;
    465 			proj = true;
    466 			offset = true;
    467 		}
    468 		else if(name == "texelFetch")
    469 		{
    470 			method = FETCH;
    471 		}
    472 		else if(name == "texelFetchOffset")
    473 		{
    474 			method = FETCH;
    475 			offset = true;
    476 		}
    477 		else if(name == "textureGrad")
    478 		{
    479 			method = GRAD;
    480 		}
    481 		else if(name == "textureGradOffset")
    482 		{
    483 			method = GRAD;
    484 			offset = true;
    485 		}
    486 		else if(name == "textureProjGrad")
    487 		{
    488 			method = GRAD;
    489 			proj = true;
    490 		}
    491 		else if(name == "textureProjGradOffset")
    492 		{
    493 			method = GRAD;
    494 			proj = true;
    495 			offset = true;
    496 		}
    497 		else UNREACHABLE(0);
    498 	}
    499 
    500 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
    501 	{
    502 		shader = nullptr;
    503 		pixelShader = nullptr;
    504 		vertexShader = nullptr;
    505 
    506 		if(shaderObject)
    507 		{
    508 			shader = shaderObject->getShader();
    509 			pixelShader = shaderObject->getPixelShader();
    510 			vertexShader = shaderObject->getVertexShader();
    511 		}
    512 
    513 		functionArray.push_back(Function(0, "main(", nullptr, nullptr));
    514 		currentFunction = 0;
    515 		outputQualifier = EvqOutput;   // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData
    516 	}
    517 
    518 	OutputASM::~OutputASM()
    519 	{
    520 	}
    521 
    522 	void OutputASM::output()
    523 	{
    524 		if(shader)
    525 		{
    526 			emitShader(GLOBAL);
    527 
    528 			if(functionArray.size() > 1)   // Only call main() when there are other functions
    529 			{
    530 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
    531 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
    532 				callMain->dst.index = 0;   // main()
    533 
    534 				emit(sw::Shader::OPCODE_RET);
    535 			}
    536 
    537 			emitShader(FUNCTION);
    538 		}
    539 	}
    540 
    541 	void OutputASM::emitShader(Scope scope)
    542 	{
    543 		emitScope = scope;
    544 		currentScope = GLOBAL;
    545 		mContext.getTreeRoot()->traverse(this);
    546 	}
    547 
    548 	void OutputASM::freeTemporary(Temporary *temporary)
    549 	{
    550 		free(temporaries, temporary);
    551 	}
    552 
    553 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
    554 	{
    555 		TBasicType baseType = in->getType().getBasicType();
    556 
    557 		switch(op)
    558 		{
    559 		case sw::Shader::OPCODE_NEG:
    560 			switch(baseType)
    561 			{
    562 			case EbtInt:
    563 			case EbtUInt:
    564 				return sw::Shader::OPCODE_INEG;
    565 			case EbtFloat:
    566 			default:
    567 				return op;
    568 			}
    569 		case sw::Shader::OPCODE_ABS:
    570 			switch(baseType)
    571 			{
    572 			case EbtInt:
    573 				return sw::Shader::OPCODE_IABS;
    574 			case EbtFloat:
    575 			default:
    576 				return op;
    577 			}
    578 		case sw::Shader::OPCODE_SGN:
    579 			switch(baseType)
    580 			{
    581 			case EbtInt:
    582 				return sw::Shader::OPCODE_ISGN;
    583 			case EbtFloat:
    584 			default:
    585 				return op;
    586 			}
    587 		case sw::Shader::OPCODE_ADD:
    588 			switch(baseType)
    589 			{
    590 			case EbtInt:
    591 			case EbtUInt:
    592 				return sw::Shader::OPCODE_IADD;
    593 			case EbtFloat:
    594 			default:
    595 				return op;
    596 			}
    597 		case sw::Shader::OPCODE_SUB:
    598 			switch(baseType)
    599 			{
    600 			case EbtInt:
    601 			case EbtUInt:
    602 				return sw::Shader::OPCODE_ISUB;
    603 			case EbtFloat:
    604 			default:
    605 				return op;
    606 			}
    607 		case sw::Shader::OPCODE_MUL:
    608 			switch(baseType)
    609 			{
    610 			case EbtInt:
    611 			case EbtUInt:
    612 				return sw::Shader::OPCODE_IMUL;
    613 			case EbtFloat:
    614 			default:
    615 				return op;
    616 			}
    617 		case sw::Shader::OPCODE_DIV:
    618 			switch(baseType)
    619 			{
    620 			case EbtInt:
    621 				return sw::Shader::OPCODE_IDIV;
    622 			case EbtUInt:
    623 				return sw::Shader::OPCODE_UDIV;
    624 			case EbtFloat:
    625 			default:
    626 				return op;
    627 			}
    628 		case sw::Shader::OPCODE_IMOD:
    629 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
    630 		case sw::Shader::OPCODE_ISHR:
    631 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
    632 		case sw::Shader::OPCODE_MIN:
    633 			switch(baseType)
    634 			{
    635 			case EbtInt:
    636 				return sw::Shader::OPCODE_IMIN;
    637 			case EbtUInt:
    638 				return sw::Shader::OPCODE_UMIN;
    639 			case EbtFloat:
    640 			default:
    641 				return op;
    642 			}
    643 		case sw::Shader::OPCODE_MAX:
    644 			switch(baseType)
    645 			{
    646 			case EbtInt:
    647 				return sw::Shader::OPCODE_IMAX;
    648 			case EbtUInt:
    649 				return sw::Shader::OPCODE_UMAX;
    650 			case EbtFloat:
    651 			default:
    652 				return op;
    653 			}
    654 		default:
    655 			return op;
    656 		}
    657 	}
    658 
    659 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
    660 	{
    661 		// The type of vertex outputs and fragment inputs with the same name must match (validated at link time),
    662 		// so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code).
    663 		switch(symbol->getQualifier())
    664 		{
    665 		case EvqVaryingIn:
    666 		case EvqVaryingOut:
    667 		case EvqInvariantVaryingIn:
    668 		case EvqInvariantVaryingOut:
    669 		case EvqVertexOut:
    670 		case EvqFragmentIn:
    671 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
    672 			{
    673 				declareVarying(symbol, -1);
    674 			}
    675 			break;
    676 		case EvqFragmentOut:
    677 			declareFragmentOutput(symbol);
    678 			break;
    679 		default:
    680 			break;
    681 		}
    682 
    683 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
    684 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
    685 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
    686 		// are considered active, even if they are not referenced in any shader in the program.
    687 		// The uniform block itself is also considered active, even if no member of the block is referenced."
    688 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
    689 		{
    690 			uniformRegister(symbol);
    691 		}
    692 	}
    693 
    694 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
    695 	{
    696 		if(currentScope != emitScope)
    697 		{
    698 			return false;
    699 		}
    700 
    701 		TIntermTyped *result = node;
    702 		TIntermTyped *left = node->getLeft();
    703 		TIntermTyped *right = node->getRight();
    704 		const TType &leftType = left->getType();
    705 		const TType &rightType = right->getType();
    706 
    707 		if(isSamplerRegister(result))
    708 		{
    709 			return false;   // Don't traverse, the register index is determined statically
    710 		}
    711 
    712 		switch(node->getOp())
    713 		{
    714 		case EOpAssign:
    715 			assert(visit == PreVisit);
    716 			right->traverse(this);
    717 			assignLvalue(left, right);
    718 			copy(result, right);
    719 			return false;
    720 		case EOpInitialize:
    721 			assert(visit == PreVisit);
    722 			// Constant arrays go into the constant register file.
    723 			if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1)
    724 			{
    725 				for(int i = 0; i < left->totalRegisterCount(); i++)
    726 				{
    727 					emit(sw::Shader::OPCODE_DEF, left, i, right, i);
    728 				}
    729 			}
    730 			else
    731 			{
    732 				right->traverse(this);
    733 				copy(left, right);
    734 			}
    735 			return false;
    736 		case EOpMatrixTimesScalarAssign:
    737 			assert(visit == PreVisit);
    738 			right->traverse(this);
    739 			for(int i = 0; i < leftType.getNominalSize(); i++)
    740 			{
    741 				emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
    742 			}
    743 
    744 			assignLvalue(left, result);
    745 			return false;
    746 		case EOpVectorTimesMatrixAssign:
    747 			assert(visit == PreVisit);
    748 			{
    749 				right->traverse(this);
    750 				int size = leftType.getNominalSize();
    751 
    752 				for(int i = 0; i < size; i++)
    753 				{
    754 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
    755 					dot->dst.mask = 1 << i;
    756 				}
    757 
    758 				assignLvalue(left, result);
    759 			}
    760 			return false;
    761 		case EOpMatrixTimesMatrixAssign:
    762 			assert(visit == PreVisit);
    763 			{
    764 				right->traverse(this);
    765 				int dim = leftType.getNominalSize();
    766 
    767 				for(int i = 0; i < dim; i++)
    768 				{
    769 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    770 					mul->src[1].swizzle = 0x00;
    771 
    772 					for(int j = 1; j < dim; j++)
    773 					{
    774 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
    775 						mad->src[1].swizzle = j * 0x55;
    776 					}
    777 				}
    778 
    779 				assignLvalue(left, result);
    780 			}
    781 			return false;
    782 		case EOpIndexDirect:
    783 		case EOpIndexIndirect:
    784 		case EOpIndexDirectStruct:
    785 		case EOpIndexDirectInterfaceBlock:
    786 			assert(visit == PreVisit);
    787 			evaluateRvalue(node);
    788 			return false;
    789 		case EOpVectorSwizzle:
    790 			if(visit == PostVisit)
    791 			{
    792 				int swizzle = 0;
    793 				TIntermAggregate *components = right->getAsAggregate();
    794 
    795 				if(components)
    796 				{
    797 					TIntermSequence &sequence = components->getSequence();
    798 					int component = 0;
    799 
    800 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
    801 					{
    802 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
    803 
    804 						if(element)
    805 						{
    806 							int i = element->getUnionArrayPointer()[0].getIConst();
    807 							swizzle |= i << (component * 2);
    808 							component++;
    809 						}
    810 						else UNREACHABLE(0);
    811 					}
    812 				}
    813 				else UNREACHABLE(0);
    814 
    815 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
    816 				mov->src[0].swizzle = swizzle;
    817 			}
    818 			break;
    819 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
    820 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
    821 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
    822 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
    823 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
    824 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
    825 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
    826 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
    827 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
    828 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
    829 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
    830 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
    831 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
    832 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
    833 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
    834 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
    835 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
    836 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
    837 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
    838 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
    839 		case EOpEqual:
    840 			if(visit == PostVisit)
    841 			{
    842 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
    843 
    844 				for(int index = 1; index < left->totalRegisterCount(); index++)
    845 				{
    846 					Temporary equal(this);
    847 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
    848 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
    849 				}
    850 			}
    851 			break;
    852 		case EOpNotEqual:
    853 			if(visit == PostVisit)
    854 			{
    855 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
    856 
    857 				for(int index = 1; index < left->totalRegisterCount(); index++)
    858 				{
    859 					Temporary notEqual(this);
    860 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
    861 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
    862 				}
    863 			}
    864 			break;
    865 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
    866 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
    867 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
    868 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
    869 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
    870 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
    871 		case EOpMatrixTimesScalar:
    872 			if(visit == PostVisit)
    873 			{
    874 				if(left->isMatrix())
    875 				{
    876 					for(int i = 0; i < leftType.getNominalSize(); i++)
    877 					{
    878 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
    879 					}
    880 				}
    881 				else if(right->isMatrix())
    882 				{
    883 					for(int i = 0; i < rightType.getNominalSize(); i++)
    884 					{
    885 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    886 					}
    887 				}
    888 				else UNREACHABLE(0);
    889 			}
    890 			break;
    891 		case EOpVectorTimesMatrix:
    892 			if(visit == PostVisit)
    893 			{
    894 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
    895 
    896 				int size = rightType.getNominalSize();
    897 				for(int i = 0; i < size; i++)
    898 				{
    899 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
    900 					dot->dst.mask = 1 << i;
    901 				}
    902 			}
    903 			break;
    904 		case EOpMatrixTimesVector:
    905 			if(visit == PostVisit)
    906 			{
    907 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
    908 				mul->src[1].swizzle = 0x00;
    909 
    910 				int size = rightType.getNominalSize();
    911 				for(int i = 1; i < size; i++)
    912 				{
    913 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
    914 					mad->src[1].swizzle = i * 0x55;
    915 				}
    916 			}
    917 			break;
    918 		case EOpMatrixTimesMatrix:
    919 			if(visit == PostVisit)
    920 			{
    921 				int dim = leftType.getNominalSize();
    922 
    923 				int size = rightType.getNominalSize();
    924 				for(int i = 0; i < size; i++)
    925 				{
    926 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    927 					mul->src[1].swizzle = 0x00;
    928 
    929 					for(int j = 1; j < dim; j++)
    930 					{
    931 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
    932 						mad->src[1].swizzle = j * 0x55;
    933 					}
    934 				}
    935 			}
    936 			break;
    937 		case EOpLogicalOr:
    938 			if(trivial(right, 6))
    939 			{
    940 				if(visit == PostVisit)
    941 				{
    942 					emit(sw::Shader::OPCODE_OR, result, left, right);
    943 				}
    944 			}
    945 			else   // Short-circuit evaluation
    946 			{
    947 				if(visit == InVisit)
    948 				{
    949 					emit(sw::Shader::OPCODE_MOV, result, left);
    950 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
    951 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
    952 				}
    953 				else if(visit == PostVisit)
    954 				{
    955 					emit(sw::Shader::OPCODE_MOV, result, right);
    956 					emit(sw::Shader::OPCODE_ENDIF);
    957 				}
    958 			}
    959 			break;
    960 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
    961 		case EOpLogicalAnd:
    962 			if(trivial(right, 6))
    963 			{
    964 				if(visit == PostVisit)
    965 				{
    966 					emit(sw::Shader::OPCODE_AND, result, left, right);
    967 				}
    968 			}
    969 			else   // Short-circuit evaluation
    970 			{
    971 				if(visit == InVisit)
    972 				{
    973 					emit(sw::Shader::OPCODE_MOV, result, left);
    974 					emit(sw::Shader::OPCODE_IF, 0, result);
    975 				}
    976 				else if(visit == PostVisit)
    977 				{
    978 					emit(sw::Shader::OPCODE_MOV, result, right);
    979 					emit(sw::Shader::OPCODE_ENDIF);
    980 				}
    981 			}
    982 			break;
    983 		default: UNREACHABLE(node->getOp());
    984 		}
    985 
    986 		return true;
    987 	}
    988 
    989 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
    990 	{
    991 		switch(size)
    992 		{
    993 		case 1: // Used for cofactor computation only
    994 			{
    995 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
    996 				bool isMov = (row == col);
    997 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
    998 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
    999 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
   1000 				mov->dst.mask = 1 << outRow;
   1001 			}
   1002 			break;
   1003 		case 2:
   1004 			{
   1005 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
   1006 
   1007 				bool isCofactor = (col >= 0) && (row >= 0);
   1008 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
   1009 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
   1010 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
   1011 
   1012 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
   1013 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
   1014 				det->dst.mask = 1 << outRow;
   1015 			}
   1016 			break;
   1017 		case 3:
   1018 			{
   1019 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
   1020 
   1021 				bool isCofactor = (col >= 0) && (row >= 0);
   1022 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
   1023 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
   1024 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
   1025 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
   1026 
   1027 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
   1028 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
   1029 				det->dst.mask = 1 << outRow;
   1030 			}
   1031 			break;
   1032 		case 4:
   1033 			{
   1034 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
   1035 				det->dst.mask = 1 << outRow;
   1036 			}
   1037 			break;
   1038 		default:
   1039 			UNREACHABLE(size);
   1040 			break;
   1041 		}
   1042 	}
   1043 
   1044 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
   1045 	{
   1046 		if(currentScope != emitScope)
   1047 		{
   1048 			return false;
   1049 		}
   1050 
   1051 		TIntermTyped *result = node;
   1052 		TIntermTyped *arg = node->getOperand();
   1053 		TBasicType basicType = arg->getType().getBasicType();
   1054 
   1055 		union
   1056 		{
   1057 			float f;
   1058 			int i;
   1059 		} one_value;
   1060 
   1061 		if(basicType == EbtInt || basicType == EbtUInt)
   1062 		{
   1063 			one_value.i = 1;
   1064 		}
   1065 		else
   1066 		{
   1067 			one_value.f = 1.0f;
   1068 		}
   1069 
   1070 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
   1071 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
   1072 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
   1073 
   1074 		switch(node->getOp())
   1075 		{
   1076 		case EOpNegative:
   1077 			if(visit == PostVisit)
   1078 			{
   1079 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
   1080 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1081 				{
   1082 					emit(negOpcode, result, index, arg, index);
   1083 				}
   1084 			}
   1085 			break;
   1086 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
   1087 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
   1088 		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
   1089 		case EOpPostIncrement:
   1090 			if(visit == PostVisit)
   1091 			{
   1092 				copy(result, arg);
   1093 
   1094 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
   1095 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1096 				{
   1097 					emit(addOpcode, arg, index, arg, index, &one);
   1098 				}
   1099 
   1100 				assignLvalue(arg, arg);
   1101 			}
   1102 			break;
   1103 		case EOpPostDecrement:
   1104 			if(visit == PostVisit)
   1105 			{
   1106 				copy(result, arg);
   1107 
   1108 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
   1109 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1110 				{
   1111 					emit(subOpcode, arg, index, arg, index, &one);
   1112 				}
   1113 
   1114 				assignLvalue(arg, arg);
   1115 			}
   1116 			break;
   1117 		case EOpPreIncrement:
   1118 			if(visit == PostVisit)
   1119 			{
   1120 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
   1121 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1122 				{
   1123 					emit(addOpcode, result, index, arg, index, &one);
   1124 				}
   1125 
   1126 				assignLvalue(arg, result);
   1127 			}
   1128 			break;
   1129 		case EOpPreDecrement:
   1130 			if(visit == PostVisit)
   1131 			{
   1132 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
   1133 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1134 				{
   1135 					emit(subOpcode, result, index, arg, index, &one);
   1136 				}
   1137 
   1138 				assignLvalue(arg, result);
   1139 			}
   1140 			break;
   1141 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
   1142 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
   1143 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
   1144 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
   1145 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
   1146 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
   1147 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
   1148 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
   1149 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
   1150 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
   1151 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
   1152 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
   1153 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
   1154 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
   1155 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
   1156 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
   1157 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
   1158 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
   1159 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
   1160 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
   1161 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
   1162 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
   1163 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
   1164 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
   1165 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
   1166 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
   1167 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
   1168 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
   1169 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
   1170 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
   1171 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
   1172 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
   1173 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
   1174 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
   1175 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
   1176 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
   1177 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
   1178 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
   1179 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
   1180 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
   1181 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
   1182 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
   1183 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
   1184 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
   1185 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
   1186 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
   1187 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
   1188 		case EOpTranspose:
   1189 			if(visit == PostVisit)
   1190 			{
   1191 				int numCols = arg->getNominalSize();
   1192 				int numRows = arg->getSecondarySize();
   1193 				for(int i = 0; i < numCols; ++i)
   1194 				{
   1195 					for(int j = 0; j < numRows; ++j)
   1196 					{
   1197 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
   1198 						mov->src[0].swizzle = 0x55 * j;
   1199 						mov->dst.mask = 1 << i;
   1200 					}
   1201 				}
   1202 			}
   1203 			break;
   1204 		case EOpDeterminant:
   1205 			if(visit == PostVisit)
   1206 			{
   1207 				int size = arg->getNominalSize();
   1208 				ASSERT(size == arg->getSecondarySize());
   1209 
   1210 				emitDeterminant(result, arg, size);
   1211 			}
   1212 			break;
   1213 		case EOpInverse:
   1214 			if(visit == PostVisit)
   1215 			{
   1216 				int size = arg->getNominalSize();
   1217 				ASSERT(size == arg->getSecondarySize());
   1218 
   1219 				// Compute transposed matrix of cofactors
   1220 				for(int i = 0; i < size; ++i)
   1221 				{
   1222 					for(int j = 0; j < size; ++j)
   1223 					{
   1224 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
   1225 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
   1226 						emitDeterminant(result, arg, size - 1, j, i, i, j);
   1227 					}
   1228 				}
   1229 
   1230 				// Compute 1 / determinant
   1231 				Temporary invDet(this);
   1232 				emitDeterminant(&invDet, arg, size);
   1233 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
   1234 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
   1235 				div->src[1].swizzle = 0x00; // xxxx
   1236 
   1237 				// Divide transposed matrix of cofactors by determinant
   1238 				for(int i = 0; i < size; ++i)
   1239 				{
   1240 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
   1241 				}
   1242 			}
   1243 			break;
   1244 		default: UNREACHABLE(node->getOp());
   1245 		}
   1246 
   1247 		return true;
   1248 	}
   1249 
   1250 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
   1251 	{
   1252 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
   1253 		{
   1254 			return false;
   1255 		}
   1256 
   1257 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
   1258 
   1259 		TIntermTyped *result = node;
   1260 		const TType &resultType = node->getType();
   1261 		TIntermSequence &arg = node->getSequence();
   1262 		size_t argumentCount = arg.size();
   1263 
   1264 		switch(node->getOp())
   1265 		{
   1266 		case EOpSequence:             break;
   1267 		case EOpDeclaration:          break;
   1268 		case EOpInvariantDeclaration: break;
   1269 		case EOpPrototype:            break;
   1270 		case EOpComma:
   1271 			if(visit == PostVisit)
   1272 			{
   1273 				copy(result, arg[1]);
   1274 			}
   1275 			break;
   1276 		case EOpFunction:
   1277 			if(visit == PreVisit)
   1278 			{
   1279 				const TString &name = node->getName();
   1280 
   1281 				if(emitScope == FUNCTION)
   1282 				{
   1283 					if(functionArray.size() > 1)   // No need for a label when there's only main()
   1284 					{
   1285 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
   1286 						label->dst.type = sw::Shader::PARAMETER_LABEL;
   1287 
   1288 						const Function *function = findFunction(name);
   1289 						ASSERT(function);   // Should have been added during global pass
   1290 						label->dst.index = function->label;
   1291 						currentFunction = function->label;
   1292 					}
   1293 				}
   1294 				else if(emitScope == GLOBAL)
   1295 				{
   1296 					if(name != "main(")
   1297 					{
   1298 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
   1299 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
   1300 					}
   1301 				}
   1302 				else UNREACHABLE(emitScope);
   1303 
   1304 				currentScope = FUNCTION;
   1305 			}
   1306 			else if(visit == PostVisit)
   1307 			{
   1308 				if(emitScope == FUNCTION)
   1309 				{
   1310 					if(functionArray.size() > 1)   // No need to return when there's only main()
   1311 					{
   1312 						emit(sw::Shader::OPCODE_RET);
   1313 					}
   1314 				}
   1315 
   1316 				currentScope = GLOBAL;
   1317 			}
   1318 			break;
   1319 		case EOpFunctionCall:
   1320 			if(visit == PostVisit)
   1321 			{
   1322 				if(node->isUserDefined())
   1323 				{
   1324 					const TString &name = node->getName();
   1325 					const Function *function = findFunction(name);
   1326 
   1327 					if(!function)
   1328 					{
   1329 						mContext.error(node->getLine(), "function definition not found", name.c_str());
   1330 						return false;
   1331 					}
   1332 
   1333 					TIntermSequence &arguments = *function->arg;
   1334 
   1335 					for(size_t i = 0; i < argumentCount; i++)
   1336 					{
   1337 						TIntermTyped *in = arguments[i]->getAsTyped();
   1338 
   1339 						if(in->getQualifier() == EvqIn ||
   1340 						   in->getQualifier() == EvqInOut ||
   1341 						   in->getQualifier() == EvqConstReadOnly)
   1342 						{
   1343 							copy(in, arg[i]);
   1344 						}
   1345 					}
   1346 
   1347 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
   1348 					call->dst.type = sw::Shader::PARAMETER_LABEL;
   1349 					call->dst.index = function->label;
   1350 
   1351 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
   1352 					{
   1353 						copy(result, function->ret);
   1354 					}
   1355 
   1356 					for(size_t i = 0; i < argumentCount; i++)
   1357 					{
   1358 						TIntermTyped *argument = arguments[i]->getAsTyped();
   1359 						TIntermTyped *out = arg[i]->getAsTyped();
   1360 
   1361 						if(argument->getQualifier() == EvqOut ||
   1362 						   argument->getQualifier() == EvqInOut)
   1363 						{
   1364 							assignLvalue(out, argument);
   1365 						}
   1366 					}
   1367 				}
   1368 				else
   1369 				{
   1370 					const TextureFunction textureFunction(node->getName());
   1371 					TIntermTyped *s = arg[0]->getAsTyped();
   1372 					TIntermTyped *t = arg[1]->getAsTyped();
   1373 
   1374 					Temporary coord(this);
   1375 
   1376 					if(textureFunction.proj)
   1377 					{
   1378 						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
   1379 						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
   1380 						rcp->dst.mask = 0x7;
   1381 
   1382 						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
   1383 						mul->dst.mask = 0x7;
   1384 
   1385 						if(IsShadowSampler(s->getBasicType()))
   1386 						{
   1387 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
   1388 							Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord);
   1389 							mov->src[0].swizzle = 0xA4;
   1390 						}
   1391 					}
   1392 					else
   1393 					{
   1394 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
   1395 
   1396 						if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3)
   1397 						{
   1398 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
   1399 							mov->src[0].swizzle = 0xA4;
   1400 						}
   1401 					}
   1402 
   1403 					switch(textureFunction.method)
   1404 					{
   1405 					case TextureFunction::IMPLICIT:
   1406 						if(!textureFunction.offset)
   1407 						{
   1408 							if(argumentCount == 2)
   1409 							{
   1410 								emit(sw::Shader::OPCODE_TEX, result, &coord, s);
   1411 							}
   1412 							else if(argumentCount == 3)   // Bias
   1413 							{
   1414 								emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]);
   1415 							}
   1416 							else UNREACHABLE(argumentCount);
   1417 						}
   1418 						else   // Offset
   1419 						{
   1420 							if(argumentCount == 3)
   1421 							{
   1422 								emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]);
   1423 							}
   1424 							else if(argumentCount == 4)   // Bias
   1425 							{
   1426 								emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]);
   1427 							}
   1428 							else UNREACHABLE(argumentCount);
   1429 						}
   1430 						break;
   1431 					case TextureFunction::LOD:
   1432 						if(!textureFunction.offset && argumentCount == 3)
   1433 						{
   1434 							emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]);
   1435 						}
   1436 						else if(argumentCount == 4)   // Offset
   1437 						{
   1438 							emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]);
   1439 						}
   1440 						else UNREACHABLE(argumentCount);
   1441 						break;
   1442 					case TextureFunction::FETCH:
   1443 						if(!textureFunction.offset && argumentCount == 3)
   1444 						{
   1445 							emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]);
   1446 						}
   1447 						else if(argumentCount == 4)   // Offset
   1448 						{
   1449 							emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]);
   1450 						}
   1451 						else UNREACHABLE(argumentCount);
   1452 						break;
   1453 					case TextureFunction::GRAD:
   1454 						if(!textureFunction.offset && argumentCount == 4)
   1455 						{
   1456 							emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]);
   1457 						}
   1458 						else if(argumentCount == 5)   // Offset
   1459 						{
   1460 							emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]);
   1461 						}
   1462 						else UNREACHABLE(argumentCount);
   1463 						break;
   1464 					case TextureFunction::SIZE:
   1465 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s);
   1466 						break;
   1467 					default:
   1468 						UNREACHABLE(textureFunction.method);
   1469 					}
   1470 				}
   1471 			}
   1472 			break;
   1473 		case EOpParameters:
   1474 			break;
   1475 		case EOpConstructFloat:
   1476 		case EOpConstructVec2:
   1477 		case EOpConstructVec3:
   1478 		case EOpConstructVec4:
   1479 		case EOpConstructBool:
   1480 		case EOpConstructBVec2:
   1481 		case EOpConstructBVec3:
   1482 		case EOpConstructBVec4:
   1483 		case EOpConstructInt:
   1484 		case EOpConstructIVec2:
   1485 		case EOpConstructIVec3:
   1486 		case EOpConstructIVec4:
   1487 		case EOpConstructUInt:
   1488 		case EOpConstructUVec2:
   1489 		case EOpConstructUVec3:
   1490 		case EOpConstructUVec4:
   1491 			if(visit == PostVisit)
   1492 			{
   1493 				int component = 0;
   1494 				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
   1495 				int arrayComponents = result->getType().getElementSize();
   1496 				for(size_t i = 0; i < argumentCount; i++)
   1497 				{
   1498 					TIntermTyped *argi = arg[i]->getAsTyped();
   1499 					int size = argi->getNominalSize();
   1500 					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
   1501 					int swizzle = component - (arrayIndex * arrayComponents);
   1502 
   1503 					if(!argi->isMatrix())
   1504 					{
   1505 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
   1506 						mov->dst.mask = (0xF << swizzle) & 0xF;
   1507 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
   1508 
   1509 						component += size;
   1510 					}
   1511 					else if(!result->isMatrix()) // Construct a non matrix from a matrix
   1512 					{
   1513 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
   1514 						mov->dst.mask = (0xF << swizzle) & 0xF;
   1515 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
   1516 
   1517 						// At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3
   1518 						if(result->getNominalSize() > size)
   1519 						{
   1520 							Instruction *mov = emitCast(result, arrayIndex, argi, 1);
   1521 							mov->dst.mask = (0xF << (swizzle + size)) & 0xF;
   1522 							// mat2: xxxy (0x40), mat3: xxxx (0x00)
   1523 							mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2);
   1524 						}
   1525 
   1526 						component += size;
   1527 					}
   1528 					else   // Matrix
   1529 					{
   1530 						int column = 0;
   1531 
   1532 						while(component < resultType.getNominalSize())
   1533 						{
   1534 							Instruction *mov = emitCast(result, arrayIndex, argi, column);
   1535 							mov->dst.mask = (0xF << swizzle) & 0xF;
   1536 							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
   1537 
   1538 							column++;
   1539 							component += size;
   1540 						}
   1541 					}
   1542 				}
   1543 			}
   1544 			break;
   1545 		case EOpConstructMat2:
   1546 		case EOpConstructMat2x3:
   1547 		case EOpConstructMat2x4:
   1548 		case EOpConstructMat3x2:
   1549 		case EOpConstructMat3:
   1550 		case EOpConstructMat3x4:
   1551 		case EOpConstructMat4x2:
   1552 		case EOpConstructMat4x3:
   1553 		case EOpConstructMat4:
   1554 			if(visit == PostVisit)
   1555 			{
   1556 				TIntermTyped *arg0 = arg[0]->getAsTyped();
   1557 				const int outCols = result->getNominalSize();
   1558 				const int outRows = result->getSecondarySize();
   1559 
   1560 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
   1561 				{
   1562 					for(int i = 0; i < outCols; i++)
   1563 					{
   1564 						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
   1565 						Instruction *mov = emitCast(result, i, arg0, 0);
   1566 						mov->dst.mask = 1 << i;
   1567 						ASSERT(mov->src[0].swizzle == 0x00);
   1568 					}
   1569 				}
   1570 				else if(arg0->isMatrix())
   1571 				{
   1572 					int arraySize = result->isArray() ? result->getArraySize() : 1;
   1573 
   1574 					for(int n = 0; n < arraySize; n++)
   1575 					{
   1576 						TIntermTyped *argi = arg[n]->getAsTyped();
   1577 						const int inCols = argi->getNominalSize();
   1578 						const int inRows = argi->getSecondarySize();
   1579 
   1580 						for(int i = 0; i < outCols; i++)
   1581 						{
   1582 							if(i >= inCols || outRows > inRows)
   1583 							{
   1584 								// Initialize to identity matrix
   1585 								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
   1586 								emitCast(result, i + n * outCols, &col, 0);
   1587 							}
   1588 
   1589 							if(i < inCols)
   1590 							{
   1591 								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
   1592 								mov->dst.mask = 0xF >> (4 - inRows);
   1593 							}
   1594 						}
   1595 					}
   1596 				}
   1597 				else
   1598 				{
   1599 					int column = 0;
   1600 					int row = 0;
   1601 
   1602 					for(size_t i = 0; i < argumentCount; i++)
   1603 					{
   1604 						TIntermTyped *argi = arg[i]->getAsTyped();
   1605 						int size = argi->getNominalSize();
   1606 						int element = 0;
   1607 
   1608 						while(element < size)
   1609 						{
   1610 							Instruction *mov = emitCast(result, column, argi, 0);
   1611 							mov->dst.mask = (0xF << row) & 0xF;
   1612 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
   1613 
   1614 							int end = row + size - element;
   1615 							column = end >= outRows ? column + 1 : column;
   1616 							element = element + outRows - row;
   1617 							row = end >= outRows ? 0 : end;
   1618 						}
   1619 					}
   1620 				}
   1621 			}
   1622 			break;
   1623 		case EOpConstructStruct:
   1624 			if(visit == PostVisit)
   1625 			{
   1626 				int offset = 0;
   1627 				for(size_t i = 0; i < argumentCount; i++)
   1628 				{
   1629 					TIntermTyped *argi = arg[i]->getAsTyped();
   1630 					int size = argi->totalRegisterCount();
   1631 
   1632 					for(int index = 0; index < size; index++)
   1633 					{
   1634 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
   1635 						mov->dst.mask = writeMask(result, offset + index);
   1636 					}
   1637 
   1638 					offset += size;
   1639 				}
   1640 			}
   1641 			break;
   1642 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
   1643 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
   1644 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
   1645 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
   1646 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
   1647 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
   1648 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
   1649 		case EOpModf:
   1650 			if(visit == PostVisit)
   1651 			{
   1652 				TIntermTyped* arg1 = arg[1]->getAsTyped();
   1653 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
   1654 				assignLvalue(arg1, arg1);
   1655 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
   1656 			}
   1657 			break;
   1658 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
   1659 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
   1660 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
   1661 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
   1662 		case EOpClamp:
   1663 			if(visit == PostVisit)
   1664 			{
   1665 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
   1666 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
   1667 			}
   1668 			break;
   1669 		case EOpMix:
   1670 			if(visit == PostVisit)
   1671 			{
   1672 				if(arg[2]->getAsTyped()->getBasicType() == EbtBool)
   1673 				{
   1674 					emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]);
   1675 				}
   1676 				else
   1677 				{
   1678 					emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]);
   1679 				}
   1680 			}
   1681 			break;
   1682 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
   1683 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
   1684 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
   1685 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
   1686 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
   1687 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
   1688 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
   1689 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
   1690 		case EOpMul:
   1691 			if(visit == PostVisit)
   1692 			{
   1693 				TIntermTyped *arg0 = arg[0]->getAsTyped();
   1694 				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
   1695 				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
   1696 
   1697 				int size = arg0->getNominalSize();
   1698 				for(int i = 0; i < size; i++)
   1699 				{
   1700 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
   1701 				}
   1702 			}
   1703 			break;
   1704 		case EOpOuterProduct:
   1705 			if(visit == PostVisit)
   1706 			{
   1707 				for(int i = 0; i < dim(arg[1]); i++)
   1708 				{
   1709 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
   1710 					mul->src[1].swizzle = 0x55 * i;
   1711 				}
   1712 			}
   1713 			break;
   1714 		default: UNREACHABLE(node->getOp());
   1715 		}
   1716 
   1717 		return true;
   1718 	}
   1719 
   1720 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
   1721 	{
   1722 		if(currentScope != emitScope)
   1723 		{
   1724 			return false;
   1725 		}
   1726 
   1727 		TIntermTyped *condition = node->getCondition();
   1728 		TIntermNode *trueBlock = node->getTrueBlock();
   1729 		TIntermNode *falseBlock = node->getFalseBlock();
   1730 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
   1731 
   1732 		condition->traverse(this);
   1733 
   1734 		if(node->usesTernaryOperator())
   1735 		{
   1736 			if(constantCondition)
   1737 			{
   1738 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   1739 
   1740 				if(trueCondition)
   1741 				{
   1742 					trueBlock->traverse(this);
   1743 					copy(node, trueBlock);
   1744 				}
   1745 				else
   1746 				{
   1747 					falseBlock->traverse(this);
   1748 					copy(node, falseBlock);
   1749 				}
   1750 			}
   1751 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
   1752 			{
   1753 				trueBlock->traverse(this);
   1754 				falseBlock->traverse(this);
   1755 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
   1756 			}
   1757 			else
   1758 			{
   1759 				emit(sw::Shader::OPCODE_IF, 0, condition);
   1760 
   1761 				if(trueBlock)
   1762 				{
   1763 					trueBlock->traverse(this);
   1764 					copy(node, trueBlock);
   1765 				}
   1766 
   1767 				if(falseBlock)
   1768 				{
   1769 					emit(sw::Shader::OPCODE_ELSE);
   1770 					falseBlock->traverse(this);
   1771 					copy(node, falseBlock);
   1772 				}
   1773 
   1774 				emit(sw::Shader::OPCODE_ENDIF);
   1775 			}
   1776 		}
   1777 		else  // if/else statement
   1778 		{
   1779 			if(constantCondition)
   1780 			{
   1781 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   1782 
   1783 				if(trueCondition)
   1784 				{
   1785 					if(trueBlock)
   1786 					{
   1787 						trueBlock->traverse(this);
   1788 					}
   1789 				}
   1790 				else
   1791 				{
   1792 					if(falseBlock)
   1793 					{
   1794 						falseBlock->traverse(this);
   1795 					}
   1796 				}
   1797 			}
   1798 			else
   1799 			{
   1800 				emit(sw::Shader::OPCODE_IF, 0, condition);
   1801 
   1802 				if(trueBlock)
   1803 				{
   1804 					trueBlock->traverse(this);
   1805 				}
   1806 
   1807 				if(falseBlock)
   1808 				{
   1809 					emit(sw::Shader::OPCODE_ELSE);
   1810 					falseBlock->traverse(this);
   1811 				}
   1812 
   1813 				emit(sw::Shader::OPCODE_ENDIF);
   1814 			}
   1815 		}
   1816 
   1817 		return false;
   1818 	}
   1819 
   1820 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
   1821 	{
   1822 		if(currentScope != emitScope)
   1823 		{
   1824 			return false;
   1825 		}
   1826 
   1827 		unsigned int iterations = loopCount(node);
   1828 
   1829 		if(iterations == 0)
   1830 		{
   1831 			return false;
   1832 		}
   1833 
   1834 		bool unroll = (iterations <= 4);
   1835 
   1836 		if(unroll)
   1837 		{
   1838 			LoopUnrollable loopUnrollable;
   1839 			unroll = loopUnrollable.traverse(node);
   1840 		}
   1841 
   1842 		TIntermNode *init = node->getInit();
   1843 		TIntermTyped *condition = node->getCondition();
   1844 		TIntermTyped *expression = node->getExpression();
   1845 		TIntermNode *body = node->getBody();
   1846 		Constant True(true);
   1847 
   1848 		if(node->getType() == ELoopDoWhile)
   1849 		{
   1850 			Temporary iterate(this);
   1851 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
   1852 
   1853 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
   1854 
   1855 			if(body)
   1856 			{
   1857 				body->traverse(this);
   1858 			}
   1859 
   1860 			emit(sw::Shader::OPCODE_TEST);
   1861 
   1862 			condition->traverse(this);
   1863 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
   1864 
   1865 			emit(sw::Shader::OPCODE_ENDWHILE);
   1866 		}
   1867 		else
   1868 		{
   1869 			if(init)
   1870 			{
   1871 				init->traverse(this);
   1872 			}
   1873 
   1874 			if(unroll)
   1875 			{
   1876 				for(unsigned int i = 0; i < iterations; i++)
   1877 				{
   1878 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
   1879 
   1880 					if(body)
   1881 					{
   1882 						body->traverse(this);
   1883 					}
   1884 
   1885 					if(expression)
   1886 					{
   1887 						expression->traverse(this);
   1888 					}
   1889 				}
   1890 			}
   1891 			else
   1892 			{
   1893 				if(condition)
   1894 				{
   1895 					condition->traverse(this);
   1896 				}
   1897 				else
   1898 				{
   1899 					condition = &True;
   1900 				}
   1901 
   1902 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
   1903 
   1904 				if(body)
   1905 				{
   1906 					body->traverse(this);
   1907 				}
   1908 
   1909 				emit(sw::Shader::OPCODE_TEST);
   1910 
   1911 				if(expression)
   1912 				{
   1913 					expression->traverse(this);
   1914 				}
   1915 
   1916 				if(condition)
   1917 				{
   1918 					condition->traverse(this);
   1919 				}
   1920 
   1921 				emit(sw::Shader::OPCODE_ENDWHILE);
   1922 			}
   1923 		}
   1924 
   1925 		return false;
   1926 	}
   1927 
   1928 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
   1929 	{
   1930 		if(currentScope != emitScope)
   1931 		{
   1932 			return false;
   1933 		}
   1934 
   1935 		switch(node->getFlowOp())
   1936 		{
   1937 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
   1938 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
   1939 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
   1940 		case EOpReturn:
   1941 			if(visit == PostVisit)
   1942 			{
   1943 				TIntermTyped *value = node->getExpression();
   1944 
   1945 				if(value)
   1946 				{
   1947 					copy(functionArray[currentFunction].ret, value);
   1948 				}
   1949 
   1950 				emit(sw::Shader::OPCODE_LEAVE);
   1951 			}
   1952 			break;
   1953 		default: UNREACHABLE(node->getFlowOp());
   1954 		}
   1955 
   1956 		return true;
   1957 	}
   1958 
   1959 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
   1960 	{
   1961 		if(currentScope != emitScope)
   1962 		{
   1963 			return false;
   1964 		}
   1965 
   1966 		TIntermTyped* switchValue = node->getInit();
   1967 		TIntermAggregate* opList = node->getStatementList();
   1968 
   1969 		if(!switchValue || !opList)
   1970 		{
   1971 			return false;
   1972 		}
   1973 
   1974 		switchValue->traverse(this);
   1975 
   1976 		emit(sw::Shader::OPCODE_SWITCH);
   1977 
   1978 		TIntermSequence& sequence = opList->getSequence();
   1979 		TIntermSequence::iterator it = sequence.begin();
   1980 		TIntermSequence::iterator defaultIt = sequence.end();
   1981 		int nbCases = 0;
   1982 		for(; it != sequence.end(); ++it)
   1983 		{
   1984 			TIntermCase* currentCase = (*it)->getAsCaseNode();
   1985 			if(currentCase)
   1986 			{
   1987 				TIntermSequence::iterator caseIt = it;
   1988 
   1989 				TIntermTyped* condition = currentCase->getCondition();
   1990 				if(condition) // non default case
   1991 				{
   1992 					if(nbCases != 0)
   1993 					{
   1994 						emit(sw::Shader::OPCODE_ELSE);
   1995 					}
   1996 
   1997 					condition->traverse(this);
   1998 					Temporary result(this);
   1999 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
   2000 					emit(sw::Shader::OPCODE_IF, 0, &result);
   2001 					nbCases++;
   2002 
   2003 					// Emit the code for this case and all subsequent cases until we hit a break statement.
   2004 					// TODO: This can repeat a lot of code for switches with many fall-through cases.
   2005 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
   2006 					{
   2007 						(*caseIt)->traverse(this);
   2008 
   2009 						// Stop if we encounter an unconditional branch (break, continue, return, or kill).
   2010 						// TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}).
   2011 						// Note that this eliminates useless operations but shouldn't affect correctness.
   2012 						if((*caseIt)->getAsBranchNode())
   2013 						{
   2014 							break;
   2015 						}
   2016 					}
   2017 				}
   2018 				else
   2019 				{
   2020 					defaultIt = it; // The default case might not be the last case, keep it for last
   2021 				}
   2022 			}
   2023 		}
   2024 
   2025 		// If there's a default case, traverse it here
   2026 		if(defaultIt != sequence.end())
   2027 		{
   2028 			emit(sw::Shader::OPCODE_ELSE);
   2029 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
   2030 			{
   2031 				(*defaultIt)->traverse(this);
   2032 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
   2033 				{
   2034 					break;
   2035 				}
   2036 			}
   2037 		}
   2038 
   2039 		for(int i = 0; i < nbCases; ++i)
   2040 		{
   2041 			emit(sw::Shader::OPCODE_ENDIF);
   2042 		}
   2043 
   2044 		emit(sw::Shader::OPCODE_ENDSWITCH);
   2045 
   2046 		return false;
   2047 	}
   2048 
   2049 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
   2050 	{
   2051 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
   2052 	}
   2053 
   2054 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
   2055 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
   2056 	{
   2057 		Instruction *instruction = new Instruction(op);
   2058 
   2059 		if(dst)
   2060 		{
   2061 			destination(instruction->dst, dst, dstIndex);
   2062 		}
   2063 
   2064 		if(src0)
   2065 		{
   2066 			TIntermTyped* src = src0->getAsTyped();
   2067 			instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow);
   2068 		}
   2069 
   2070 		source(instruction->src[0], src0, index0);
   2071 		source(instruction->src[1], src1, index1);
   2072 		source(instruction->src[2], src2, index2);
   2073 		source(instruction->src[3], src3, index3);
   2074 		source(instruction->src[4], src4, index4);
   2075 
   2076 		shader->append(instruction);
   2077 
   2078 		return instruction;
   2079 	}
   2080 
   2081 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
   2082 	{
   2083 		return emitCast(dst, 0, src, 0);
   2084 	}
   2085 
   2086 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
   2087 	{
   2088 		switch(src->getBasicType())
   2089 		{
   2090 		case EbtBool:
   2091 			switch(dst->getBasicType())
   2092 			{
   2093 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
   2094 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
   2095 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
   2096 			default:       break;
   2097 			}
   2098 			break;
   2099 		case EbtInt:
   2100 			switch(dst->getBasicType())
   2101 			{
   2102 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
   2103 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
   2104 			default:       break;
   2105 			}
   2106 			break;
   2107 		case EbtUInt:
   2108 			switch(dst->getBasicType())
   2109 			{
   2110 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
   2111 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
   2112 			default:       break;
   2113 			}
   2114 			break;
   2115 		case EbtFloat:
   2116 			switch(dst->getBasicType())
   2117 			{
   2118 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
   2119 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
   2120 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
   2121 			default:      break;
   2122 			}
   2123 			break;
   2124 		default:
   2125 			break;
   2126 		}
   2127 
   2128 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
   2129 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
   2130 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
   2131 
   2132 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
   2133 	}
   2134 
   2135 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
   2136 	{
   2137 		for(int index = 0; index < dst->elementRegisterCount(); index++)
   2138 		{
   2139 			emit(op, dst, index, src0, index, src1, index, src2, index);
   2140 		}
   2141 	}
   2142 
   2143 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
   2144 	{
   2145 		emitBinary(op, result, src0, src1);
   2146 		assignLvalue(lhs, result);
   2147 	}
   2148 
   2149 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
   2150 	{
   2151 		sw::Shader::Opcode opcode;
   2152 		switch(left->getAsTyped()->getBasicType())
   2153 		{
   2154 		case EbtBool:
   2155 		case EbtInt:
   2156 			opcode = sw::Shader::OPCODE_ICMP;
   2157 			break;
   2158 		case EbtUInt:
   2159 			opcode = sw::Shader::OPCODE_UCMP;
   2160 			break;
   2161 		default:
   2162 			opcode = sw::Shader::OPCODE_CMP;
   2163 			break;
   2164 		}
   2165 
   2166 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
   2167 		cmp->control = cmpOp;
   2168 	}
   2169 
   2170 	int componentCount(const TType &type, int registers)
   2171 	{
   2172 		if(registers == 0)
   2173 		{
   2174 			return 0;
   2175 		}
   2176 
   2177 		if(type.isArray() && registers >= type.elementRegisterCount())
   2178 		{
   2179 			int index = registers / type.elementRegisterCount();
   2180 			registers -= index * type.elementRegisterCount();
   2181 			return index * type.getElementSize() + componentCount(type, registers);
   2182 		}
   2183 
   2184 		if(type.isStruct() || type.isInterfaceBlock())
   2185 		{
   2186 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
   2187 			int elements = 0;
   2188 
   2189 			for(const auto &field : fields)
   2190 			{
   2191 				const TType &fieldType = *(field->type());
   2192 
   2193 				if(fieldType.totalRegisterCount() <= registers)
   2194 				{
   2195 					registers -= fieldType.totalRegisterCount();
   2196 					elements += fieldType.getObjectSize();
   2197 				}
   2198 				else   // Register within this field
   2199 				{
   2200 					return elements + componentCount(fieldType, registers);
   2201 				}
   2202 			}
   2203 		}
   2204 		else if(type.isMatrix())
   2205 		{
   2206 			return registers * type.registerSize();
   2207 		}
   2208 
   2209 		UNREACHABLE(0);
   2210 		return 0;
   2211 	}
   2212 
   2213 	int registerSize(const TType &type, int registers)
   2214 	{
   2215 		if(registers == 0)
   2216 		{
   2217 			if(type.isStruct())
   2218 			{
   2219 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
   2220 			}
   2221 			else if(type.isInterfaceBlock())
   2222 			{
   2223 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
   2224 			}
   2225 
   2226 			return type.registerSize();
   2227 		}
   2228 
   2229 		if(type.isArray() && registers >= type.elementRegisterCount())
   2230 		{
   2231 			int index = registers / type.elementRegisterCount();
   2232 			registers -= index * type.elementRegisterCount();
   2233 			return registerSize(type, registers);
   2234 		}
   2235 
   2236 		if(type.isStruct() || type.isInterfaceBlock())
   2237 		{
   2238 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
   2239 			int elements = 0;
   2240 
   2241 			for(const auto &field : fields)
   2242 			{
   2243 				const TType &fieldType = *(field->type());
   2244 
   2245 				if(fieldType.totalRegisterCount() <= registers)
   2246 				{
   2247 					registers -= fieldType.totalRegisterCount();
   2248 					elements += fieldType.getObjectSize();
   2249 				}
   2250 				else   // Register within this field
   2251 				{
   2252 					return registerSize(fieldType, registers);
   2253 				}
   2254 			}
   2255 		}
   2256 		else if(type.isMatrix())
   2257 		{
   2258 			return registerSize(type, 0);
   2259 		}
   2260 
   2261 		UNREACHABLE(0);
   2262 		return 0;
   2263 	}
   2264 
   2265 	int OutputASM::getBlockId(TIntermTyped *arg)
   2266 	{
   2267 		if(arg)
   2268 		{
   2269 			const TType &type = arg->getType();
   2270 			TInterfaceBlock* block = type.getInterfaceBlock();
   2271 			if(block && (type.getQualifier() == EvqUniform))
   2272 			{
   2273 				// Make sure the uniform block is declared
   2274 				uniformRegister(arg);
   2275 
   2276 				const char* blockName = block->name().c_str();
   2277 
   2278 				// Fetch uniform block index from array of blocks
   2279 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
   2280 				{
   2281 					if(blockName == it->name)
   2282 					{
   2283 						return it->blockId;
   2284 					}
   2285 				}
   2286 
   2287 				ASSERT(false);
   2288 			}
   2289 		}
   2290 
   2291 		return -1;
   2292 	}
   2293 
   2294 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
   2295 	{
   2296 		const TType &type = arg->getType();
   2297 		int blockId = getBlockId(arg);
   2298 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
   2299 		if(blockId != -1)
   2300 		{
   2301 			argumentInfo.bufferIndex = 0;
   2302 			for(int i = 0; i < blockId; ++i)
   2303 			{
   2304 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
   2305 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
   2306 			}
   2307 
   2308 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
   2309 
   2310 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
   2311 			BlockDefinitionIndexMap::const_iterator it = itEnd;
   2312 
   2313 			argumentInfo.clampedIndex = index;
   2314 			if(type.isInterfaceBlock())
   2315 			{
   2316 				// Offset index to the beginning of the selected instance
   2317 				int blockRegisters = type.elementRegisterCount();
   2318 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
   2319 				argumentInfo.bufferIndex += bufferOffset;
   2320 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
   2321 			}
   2322 
   2323 			int regIndex = registerIndex(arg);
   2324 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
   2325 			{
   2326 				it = blockDefinition.find(i);
   2327 				if(it != itEnd)
   2328 				{
   2329 					argumentInfo.clampedIndex -= (i - regIndex);
   2330 					break;
   2331 				}
   2332 			}
   2333 			ASSERT(it != itEnd);
   2334 
   2335 			argumentInfo.typedMemberInfo = it->second;
   2336 
   2337 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
   2338 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
   2339 		}
   2340 		else
   2341 		{
   2342 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
   2343 		}
   2344 
   2345 		return argumentInfo;
   2346 	}
   2347 
   2348 	void OutputASM::source(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
   2349 	{
   2350 		if(argument)
   2351 		{
   2352 			TIntermTyped *arg = argument->getAsTyped();
   2353 			Temporary unpackedUniform(this);
   2354 
   2355 			const TType& srcType = arg->getType();
   2356 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
   2357 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
   2358 			{
   2359 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
   2360 				const TType &memberType = argumentInfo.typedMemberInfo.type;
   2361 
   2362 				if(memberType.getBasicType() == EbtBool)
   2363 				{
   2364 					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
   2365 
   2366 					// Convert the packed bool, which is currently an int, to a true bool
   2367 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
   2368 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
   2369 					instruction->dst.index = registerIndex(&unpackedUniform);
   2370 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
   2371 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
   2372 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
   2373 
   2374 					shader->append(instruction);
   2375 
   2376 					arg = &unpackedUniform;
   2377 					index = 0;
   2378 				}
   2379 				else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix())
   2380 				{
   2381 					int numCols = memberType.getNominalSize();
   2382 					int numRows = memberType.getSecondarySize();
   2383 
   2384 					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
   2385 
   2386 					unsigned int dstIndex = registerIndex(&unpackedUniform);
   2387 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
   2388 					int arrayIndex = argumentInfo.clampedIndex / numCols;
   2389 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
   2390 
   2391 					for(int j = 0; j < numRows; ++j)
   2392 					{
   2393 						// Transpose the row major matrix
   2394 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
   2395 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
   2396 						instruction->dst.index = dstIndex;
   2397 						instruction->dst.mask = 1 << j;
   2398 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
   2399 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
   2400 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
   2401 						instruction->src[0].swizzle = srcSwizzle;
   2402 
   2403 						shader->append(instruction);
   2404 					}
   2405 
   2406 					arg = &unpackedUniform;
   2407 					index = 0;
   2408 				}
   2409 			}
   2410 
   2411 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
   2412 			const TType &type = argumentInfo.typedMemberInfo.type;
   2413 
   2414 			int size = registerSize(type, argumentInfo.clampedIndex);
   2415 
   2416 			parameter.type = registerType(arg);
   2417 			parameter.bufferIndex = argumentInfo.bufferIndex;
   2418 
   2419 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
   2420 			{
   2421 				int component = componentCount(type, argumentInfo.clampedIndex);
   2422 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
   2423 
   2424 				for(int i = 0; i < 4; i++)
   2425 				{
   2426 					if(size == 1)   // Replicate
   2427 					{
   2428 						parameter.value[i] = constants[component + 0].getAsFloat();
   2429 					}
   2430 					else if(i < size)
   2431 					{
   2432 						parameter.value[i] = constants[component + i].getAsFloat();
   2433 					}
   2434 					else
   2435 					{
   2436 						parameter.value[i] = 0.0f;
   2437 					}
   2438 				}
   2439 			}
   2440 			else
   2441 			{
   2442 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
   2443 
   2444 				if(parameter.bufferIndex != -1)
   2445 				{
   2446 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
   2447 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
   2448 				}
   2449 			}
   2450 
   2451 			if(!IsSampler(arg->getBasicType()))
   2452 			{
   2453 				parameter.swizzle = readSwizzle(arg, size);
   2454 			}
   2455 		}
   2456 	}
   2457 
   2458 	void OutputASM::destination(sw::Shader::DestinationParameter &parameter, TIntermTyped *arg, int index)
   2459 	{
   2460 		parameter.type = registerType(arg);
   2461 		parameter.index = registerIndex(arg) + index;
   2462 		parameter.mask = writeMask(arg, index);
   2463 	}
   2464 
   2465 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
   2466 	{
   2467 		for(int index = 0; index < dst->totalRegisterCount(); index++)
   2468 		{
   2469 			Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
   2470 		}
   2471 	}
   2472 
   2473 	int swizzleElement(int swizzle, int index)
   2474 	{
   2475 		return (swizzle >> (index * 2)) & 0x03;
   2476 	}
   2477 
   2478 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
   2479 	{
   2480 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
   2481 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
   2482 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
   2483 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
   2484 	}
   2485 
   2486 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
   2487 	{
   2488 		if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
   2489 		   (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize()))))
   2490 		{
   2491 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
   2492 		}
   2493 
   2494 		TIntermBinary *binary = dst->getAsBinaryNode();
   2495 
   2496 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
   2497 		{
   2498 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
   2499 
   2500 			lvalue(insert->dst, dst);
   2501 
   2502 			insert->src[0].type = insert->dst.type;
   2503 			insert->src[0].index = insert->dst.index;
   2504 			insert->src[0].rel = insert->dst.rel;
   2505 			source(insert->src[1], src);
   2506 			source(insert->src[2], binary->getRight());
   2507 
   2508 			shader->append(insert);
   2509 		}
   2510 		else
   2511 		{
   2512 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
   2513 
   2514 			int swizzle = lvalue(mov1->dst, dst);
   2515 
   2516 			source(mov1->src[0], src);
   2517 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
   2518 
   2519 			shader->append(mov1);
   2520 
   2521 			for(int offset = 1; offset < dst->totalRegisterCount(); offset++)
   2522 			{
   2523 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
   2524 
   2525 				mov->dst = mov1->dst;
   2526 				mov->dst.index += offset;
   2527 				mov->dst.mask = writeMask(dst, offset);
   2528 
   2529 				source(mov->src[0], src, offset);
   2530 
   2531 				shader->append(mov);
   2532 			}
   2533 		}
   2534 	}
   2535 
   2536 	void OutputASM::evaluateRvalue(TIntermTyped *node)
   2537 	{
   2538 		TIntermBinary *binary = node->getAsBinaryNode();
   2539 
   2540 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar())
   2541 		{
   2542 			Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT);
   2543 
   2544 			destination(insert->dst, node);
   2545 
   2546 			Temporary address(this);
   2547 			unsigned char mask;
   2548 			TIntermTyped *root = nullptr;
   2549 			unsigned int offset = 0;
   2550 			int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node);
   2551 
   2552 			source(insert->src[0], root, offset);
   2553 			insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle);
   2554 
   2555 			source(insert->src[1], binary->getRight());
   2556 
   2557 			shader->append(insert);
   2558 		}
   2559 		else
   2560 		{
   2561 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
   2562 
   2563 			destination(mov1->dst, node, 0);
   2564 
   2565 			Temporary address(this);
   2566 			unsigned char mask;
   2567 			TIntermTyped *root = nullptr;
   2568 			unsigned int offset = 0;
   2569 			int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node);
   2570 
   2571 			source(mov1->src[0], root, offset);
   2572 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
   2573 
   2574 			shader->append(mov1);
   2575 
   2576 			for(int i = 1; i < node->totalRegisterCount(); i++)
   2577 			{
   2578 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i);
   2579 				mov->src[0].rel = mov1->src[0].rel;
   2580 			}
   2581 		}
   2582 	}
   2583 
   2584 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node)
   2585 	{
   2586 		Temporary address(this);
   2587 		TIntermTyped *root = nullptr;
   2588 		unsigned int offset = 0;
   2589 		unsigned char mask = 0xF;
   2590 		int swizzle = lvalue(root, offset, dst.rel, mask, address, node);
   2591 
   2592 		dst.type = registerType(root);
   2593 		dst.index = registerIndex(root) + offset;
   2594 		dst.mask = mask;
   2595 
   2596 		return swizzle;
   2597 	}
   2598 
   2599 	int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node)
   2600 	{
   2601 		TIntermTyped *result = node;
   2602 		TIntermBinary *binary = node->getAsBinaryNode();
   2603 		TIntermSymbol *symbol = node->getAsSymbolNode();
   2604 
   2605 		if(binary)
   2606 		{
   2607 			TIntermTyped *left = binary->getLeft();
   2608 			TIntermTyped *right = binary->getRight();
   2609 
   2610 			int leftSwizzle = lvalue(root, offset, rel, mask, address, left);   // Resolve the l-value of the left side
   2611 
   2612 			switch(binary->getOp())
   2613 			{
   2614 			case EOpIndexDirect:
   2615 				{
   2616 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
   2617 
   2618 					if(left->isRegister())
   2619 					{
   2620 						int leftMask = mask;
   2621 
   2622 						mask = 1;
   2623 						while((leftMask & mask) == 0)
   2624 						{
   2625 							mask = mask << 1;
   2626 						}
   2627 
   2628 						int element = swizzleElement(leftSwizzle, rightIndex);
   2629 						mask = 1 << element;
   2630 
   2631 						return element;
   2632 					}
   2633 					else if(left->isArray() || left->isMatrix())
   2634 					{
   2635 						offset += rightIndex * result->totalRegisterCount();
   2636 						return 0xE4;
   2637 					}
   2638 					else UNREACHABLE(0);
   2639 				}
   2640 				break;
   2641 			case EOpIndexIndirect:
   2642 				{
   2643 					right->traverse(this);
   2644 
   2645 					if(left->isRegister())
   2646 					{
   2647 						// Requires INSERT instruction (handled by calling function)
   2648 					}
   2649 					else if(left->isArray() || left->isMatrix())
   2650 					{
   2651 						int scale = result->totalRegisterCount();
   2652 
   2653 						if(rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
   2654 						{
   2655 							if(left->totalRegisterCount() > 1)
   2656 							{
   2657 								sw::Shader::SourceParameter relativeRegister;
   2658 								source(relativeRegister, right);
   2659 
   2660 								rel.index = relativeRegister.index;
   2661 								rel.type = relativeRegister.type;
   2662 								rel.scale = scale;
   2663 								rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
   2664 							}
   2665 						}
   2666 						else if(rel.index != registerIndex(&address))   // Move the previous index register to the address register
   2667 						{
   2668 							if(scale == 1)
   2669 							{
   2670 								Constant oldScale((int)rel.scale);
   2671 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
   2672 								mad->src[0].index = rel.index;
   2673 								mad->src[0].type = rel.type;
   2674 							}
   2675 							else
   2676 							{
   2677 								Constant oldScale((int)rel.scale);
   2678 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
   2679 								mul->src[0].index = rel.index;
   2680 								mul->src[0].type = rel.type;
   2681 
   2682 								Constant newScale(scale);
   2683 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
   2684 							}
   2685 
   2686 							rel.type = sw::Shader::PARAMETER_TEMP;
   2687 							rel.index = registerIndex(&address);
   2688 							rel.scale = 1;
   2689 						}
   2690 						else   // Just add the new index to the address register
   2691 						{
   2692 							if(scale == 1)
   2693 							{
   2694 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
   2695 							}
   2696 							else
   2697 							{
   2698 								Constant newScale(scale);
   2699 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
   2700 							}
   2701 						}
   2702 					}
   2703 					else UNREACHABLE(0);
   2704 				}
   2705 				break;
   2706 			case EOpIndexDirectStruct:
   2707 			case EOpIndexDirectInterfaceBlock:
   2708 				{
   2709 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
   2710 					                           left->getType().getStruct()->fields() :
   2711 					                           left->getType().getInterfaceBlock()->fields();
   2712 					int index = right->getAsConstantUnion()->getIConst(0);
   2713 					int fieldOffset = 0;
   2714 
   2715 					for(int i = 0; i < index; i++)
   2716 					{
   2717 						fieldOffset += fields[i]->type()->totalRegisterCount();
   2718 					}
   2719 
   2720 					offset += fieldOffset;
   2721 					mask = writeMask(result);
   2722 
   2723 					return 0xE4;
   2724 				}
   2725 				break;
   2726 			case EOpVectorSwizzle:
   2727 				{
   2728 					ASSERT(left->isRegister());
   2729 
   2730 					int leftMask = mask;
   2731 
   2732 					int swizzle = 0;
   2733 					int rightMask = 0;
   2734 
   2735 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
   2736 
   2737 					for(unsigned int i = 0; i < sequence.size(); i++)
   2738 					{
   2739 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
   2740 
   2741 						int element = swizzleElement(leftSwizzle, index);
   2742 						rightMask = rightMask | (1 << element);
   2743 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
   2744 					}
   2745 
   2746 					mask = leftMask & rightMask;
   2747 
   2748 					return swizzle;
   2749 				}
   2750 				break;
   2751 			default:
   2752 				UNREACHABLE(binary->getOp());   // Not an l-value operator
   2753 				break;
   2754 			}
   2755 		}
   2756 		else if(symbol)
   2757 		{
   2758 			root = symbol;
   2759 			offset = 0;
   2760 			mask = writeMask(symbol);
   2761 
   2762 			return 0xE4;
   2763 		}
   2764 		else
   2765 		{
   2766 			node->traverse(this);
   2767 
   2768 			root = node;
   2769 			offset = 0;
   2770 			mask = writeMask(node);
   2771 
   2772 			return 0xE4;
   2773 		}
   2774 
   2775 		return 0xE4;
   2776 	}
   2777 
   2778 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
   2779 	{
   2780 		if(isSamplerRegister(operand))
   2781 		{
   2782 			return sw::Shader::PARAMETER_SAMPLER;
   2783 		}
   2784 
   2785 		const TQualifier qualifier = operand->getQualifier();
   2786 		if((qualifier == EvqFragColor) || (qualifier == EvqFragData))
   2787 		{
   2788 			if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) ||
   2789 			   ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData)))
   2790 			{
   2791 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
   2792 			}
   2793 			outputQualifier = qualifier;
   2794 		}
   2795 
   2796 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
   2797 		{
   2798 			// Constant arrays are in the constant register file.
   2799 			if(operand->isArray() && operand->getArraySize() > 1)
   2800 			{
   2801 				return sw::Shader::PARAMETER_CONST;
   2802 			}
   2803 			else
   2804 			{
   2805 				return sw::Shader::PARAMETER_TEMP;
   2806 			}
   2807 		}
   2808 
   2809 		switch(qualifier)
   2810 		{
   2811 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
   2812 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
   2813 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
   2814 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
   2815 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
   2816 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
   2817 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
   2818 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
   2819 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
   2820 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
   2821 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
   2822 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
   2823 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
   2824 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
   2825 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
   2826 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
   2827 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
   2828 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
   2829 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
   2830 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
   2831 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
   2832 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
   2833 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
   2834 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
   2835 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
   2836 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
   2837 		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
   2838 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
   2839 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
   2840 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
   2841 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
   2842 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
   2843 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
   2844 		default: UNREACHABLE(qualifier);
   2845 		}
   2846 
   2847 		return sw::Shader::PARAMETER_VOID;
   2848 	}
   2849 
   2850 	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
   2851 	{
   2852 		const TQualifier qualifier = operand->getQualifier();
   2853 		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
   2854 	}
   2855 
   2856 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
   2857 	{
   2858 		if(isSamplerRegister(operand))
   2859 		{
   2860 			return samplerRegister(operand);
   2861 		}
   2862 
   2863 		switch(operand->getQualifier())
   2864 		{
   2865 		case EvqTemporary:           return temporaryRegister(operand);
   2866 		case EvqGlobal:              return temporaryRegister(operand);
   2867 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
   2868 		case EvqAttribute:           return attributeRegister(operand);
   2869 		case EvqVaryingIn:           return varyingRegister(operand);
   2870 		case EvqVaryingOut:          return varyingRegister(operand);
   2871 		case EvqVertexIn:            return attributeRegister(operand);
   2872 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
   2873 		case EvqVertexOut:           return varyingRegister(operand);
   2874 		case EvqFragmentIn:          return varyingRegister(operand);
   2875 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
   2876 		case EvqInvariantVaryingOut: return varyingRegister(operand);
   2877 		case EvqSmooth:              return varyingRegister(operand);
   2878 		case EvqFlat:                return varyingRegister(operand);
   2879 		case EvqCentroidOut:         return varyingRegister(operand);
   2880 		case EvqSmoothIn:            return varyingRegister(operand);
   2881 		case EvqFlatIn:              return varyingRegister(operand);
   2882 		case EvqCentroidIn:          return varyingRegister(operand);
   2883 		case EvqUniform:             return uniformRegister(operand);
   2884 		case EvqIn:                  return temporaryRegister(operand);
   2885 		case EvqOut:                 return temporaryRegister(operand);
   2886 		case EvqInOut:               return temporaryRegister(operand);
   2887 		case EvqConstReadOnly:       return temporaryRegister(operand);
   2888 		case EvqPosition:            return varyingRegister(operand);
   2889 		case EvqPointSize:           return varyingRegister(operand);
   2890 		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
   2891 		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
   2892 		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
   2893 		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
   2894 		case EvqPointCoord:          return varyingRegister(operand);
   2895 		case EvqFragColor:           return 0;
   2896 		case EvqFragData:            return fragmentOutputRegister(operand);
   2897 		case EvqFragDepth:           return 0;
   2898 		default: UNREACHABLE(operand->getQualifier());
   2899 		}
   2900 
   2901 		return 0;
   2902 	}
   2903 
   2904 	int OutputASM::writeMask(TIntermTyped *destination, int index)
   2905 	{
   2906 		if(destination->getQualifier() == EvqPointSize)
   2907 		{
   2908 			return 0x2;   // Point size stored in the y component
   2909 		}
   2910 
   2911 		return 0xF >> (4 - registerSize(destination->getType(), index));
   2912 	}
   2913 
   2914 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
   2915 	{
   2916 		if(argument->getQualifier() == EvqPointSize)
   2917 		{
   2918 			return 0x55;   // Point size stored in the y component
   2919 		}
   2920 
   2921 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
   2922 
   2923 		return swizzleSize[size];
   2924 	}
   2925 
   2926 	// Conservatively checks whether an expression is fast to compute and has no side effects
   2927 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
   2928 	{
   2929 		if(!expression->isRegister())
   2930 		{
   2931 			return false;
   2932 		}
   2933 
   2934 		return cost(expression, budget) >= 0;
   2935 	}
   2936 
   2937 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
   2938 	int OutputASM::cost(TIntermNode *expression, int budget)
   2939 	{
   2940 		if(budget < 0)
   2941 		{
   2942 			return budget;
   2943 		}
   2944 
   2945 		if(expression->getAsSymbolNode())
   2946 		{
   2947 			return budget;
   2948 		}
   2949 		else if(expression->getAsConstantUnion())
   2950 		{
   2951 			return budget;
   2952 		}
   2953 		else if(expression->getAsBinaryNode())
   2954 		{
   2955 			TIntermBinary *binary = expression->getAsBinaryNode();
   2956 
   2957 			switch(binary->getOp())
   2958 			{
   2959 			case EOpVectorSwizzle:
   2960 			case EOpIndexDirect:
   2961 			case EOpIndexDirectStruct:
   2962 			case EOpIndexDirectInterfaceBlock:
   2963 				return cost(binary->getLeft(), budget - 0);
   2964 			case EOpAdd:
   2965 			case EOpSub:
   2966 			case EOpMul:
   2967 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
   2968 			default:
   2969 				return -1;
   2970 			}
   2971 		}
   2972 		else if(expression->getAsUnaryNode())
   2973 		{
   2974 			TIntermUnary *unary = expression->getAsUnaryNode();
   2975 
   2976 			switch(unary->getOp())
   2977 			{
   2978 			case EOpAbs:
   2979 			case EOpNegative:
   2980 				return cost(unary->getOperand(), budget - 1);
   2981 			default:
   2982 				return -1;
   2983 			}
   2984 		}
   2985 		else if(expression->getAsSelectionNode())
   2986 		{
   2987 			TIntermSelection *selection = expression->getAsSelectionNode();
   2988 
   2989 			if(selection->usesTernaryOperator())
   2990 			{
   2991 				TIntermTyped *condition = selection->getCondition();
   2992 				TIntermNode *trueBlock = selection->getTrueBlock();
   2993 				TIntermNode *falseBlock = selection->getFalseBlock();
   2994 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
   2995 
   2996 				if(constantCondition)
   2997 				{
   2998 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   2999 
   3000 					if(trueCondition)
   3001 					{
   3002 						return cost(trueBlock, budget - 0);
   3003 					}
   3004 					else
   3005 					{
   3006 						return cost(falseBlock, budget - 0);
   3007 					}
   3008 				}
   3009 				else
   3010 				{
   3011 					return cost(trueBlock, cost(falseBlock, budget - 2));
   3012 				}
   3013 			}
   3014 		}
   3015 
   3016 		return -1;
   3017 	}
   3018 
   3019 	const Function *OutputASM::findFunction(const TString &name)
   3020 	{
   3021 		for(unsigned int f = 0; f < functionArray.size(); f++)
   3022 		{
   3023 			if(functionArray[f].name == name)
   3024 			{
   3025 				return &functionArray[f];
   3026 			}
   3027 		}
   3028 
   3029 		return 0;
   3030 	}
   3031 
   3032 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
   3033 	{
   3034 		return allocate(temporaries, temporary);
   3035 	}
   3036 
   3037 	void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat)
   3038 	{
   3039 		if(type.isStruct())
   3040 		{
   3041 			const TFieldList &fields = type.getStruct()->fields();
   3042 			int fieldVar = var;
   3043 			for(const auto &field : fields)
   3044 			{
   3045 				const TType& fieldType = *(field->type());
   3046 				setPixelShaderInputs(fieldType, fieldVar, flat);
   3047 				fieldVar += fieldType.totalRegisterCount();
   3048 			}
   3049 		}
   3050 		else
   3051 		{
   3052 			for(int i = 0; i < type.totalRegisterCount(); i++)
   3053 			{
   3054 				pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
   3055 			}
   3056 		}
   3057 	}
   3058 
   3059 	int OutputASM::varyingRegister(TIntermTyped *varying)
   3060 	{
   3061 		int var = lookup(varyings, varying);
   3062 
   3063 		if(var == -1)
   3064 		{
   3065 			var = allocate(varyings, varying);
   3066 			int registerCount = varying->totalRegisterCount();
   3067 
   3068 			if(pixelShader)
   3069 			{
   3070 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
   3071 				{
   3072 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
   3073 					return 0;
   3074 				}
   3075 
   3076 				if(varying->getQualifier() == EvqPointCoord)
   3077 				{
   3078 					ASSERT(varying->isRegister());
   3079 					pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
   3080 				}
   3081 				else
   3082 				{
   3083 					setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying));
   3084 				}
   3085 			}
   3086 			else if(vertexShader)
   3087 			{
   3088 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
   3089 				{
   3090 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
   3091 					return 0;
   3092 				}
   3093 
   3094 				if(varying->getQualifier() == EvqPosition)
   3095 				{
   3096 					ASSERT(varying->isRegister());
   3097 					vertexShader->setPositionRegister(var);
   3098 				}
   3099 				else if(varying->getQualifier() == EvqPointSize)
   3100 				{
   3101 					ASSERT(varying->isRegister());
   3102 					vertexShader->setPointSizeRegister(var);
   3103 				}
   3104 				else
   3105 				{
   3106 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
   3107 				}
   3108 			}
   3109 			else UNREACHABLE(0);
   3110 
   3111 			declareVarying(varying, var);
   3112 		}
   3113 
   3114 		return var;
   3115 	}
   3116 
   3117 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
   3118 	{
   3119 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
   3120 		{
   3121 			TIntermSymbol *symbol = varying->getAsSymbolNode();
   3122 			declareVarying(varying->getType(), symbol->getSymbol(), reg);
   3123 		}
   3124 	}
   3125 
   3126 	void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex)
   3127 	{
   3128 		const char *name = varyingName.c_str();
   3129 		VaryingList &activeVaryings = shaderObject->varyings;
   3130 
   3131 		TStructure* structure = type.getStruct();
   3132 		if(structure)
   3133 		{
   3134 			int fieldRegisterIndex = registerIndex;
   3135 
   3136 			const TFieldList &fields = type.getStruct()->fields();
   3137 			for(const auto &field : fields)
   3138 			{
   3139 				const TType& fieldType = *(field->type());
   3140 				declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex);
   3141 				if(fieldRegisterIndex >= 0)
   3142 				{
   3143 					fieldRegisterIndex += fieldType.totalRegisterCount();
   3144 				}
   3145 			}
   3146 		}
   3147 		else
   3148 		{
   3149 			// Check if this varying has been declared before without having a register assigned
   3150 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
   3151 			{
   3152 				if(v->name == name)
   3153 				{
   3154 					if(registerIndex >= 0)
   3155 					{
   3156 						ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex);
   3157 						v->registerIndex = registerIndex;
   3158 					}
   3159 
   3160 					return;
   3161 				}
   3162 			}
   3163 
   3164 			activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0));
   3165 		}
   3166 	}
   3167 
   3168 	void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput)
   3169 	{
   3170 		int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location;
   3171 		int registerCount = fragmentOutput->totalRegisterCount();
   3172 		if(requestedLocation < 0)
   3173 		{
   3174 			ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier
   3175 			return; // No requested location
   3176 		}
   3177 		else if((requestedLocation + registerCount) > sw::RENDERTARGETS)
   3178 		{
   3179 			mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader");
   3180 		}
   3181 		else
   3182 		{
   3183 			int currentIndex = lookup(fragmentOutputs, fragmentOutput);
   3184 			if(requestedLocation != currentIndex)
   3185 			{
   3186 				if(currentIndex != -1)
   3187 				{
   3188 					mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader");
   3189 				}
   3190 				else
   3191 				{
   3192 					if(fragmentOutputs.size() <= (size_t)requestedLocation)
   3193 					{
   3194 						while(fragmentOutputs.size() < (size_t)requestedLocation)
   3195 						{
   3196 							fragmentOutputs.push_back(nullptr);
   3197 						}
   3198 						for(int i = 0; i < registerCount; i++)
   3199 						{
   3200 							fragmentOutputs.push_back(fragmentOutput);
   3201 						}
   3202 					}
   3203 					else
   3204 					{
   3205 						for(int i = 0; i < registerCount; i++)
   3206 						{
   3207 							if(!fragmentOutputs[requestedLocation + i])
   3208 							{
   3209 								fragmentOutputs[requestedLocation + i] = fragmentOutput;
   3210 							}
   3211 							else
   3212 							{
   3213 								mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader");
   3214 								return;
   3215 							}
   3216 						}
   3217 					}
   3218 				}
   3219 			}
   3220 		}
   3221 	}
   3222 
   3223 	int OutputASM::uniformRegister(TIntermTyped *uniform)
   3224 	{
   3225 		const TType &type = uniform->getType();
   3226 		ASSERT(!IsSampler(type.getBasicType()));
   3227 		TInterfaceBlock *block = type.getAsInterfaceBlock();
   3228 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
   3229 		ASSERT(symbol || block);
   3230 
   3231 		if(symbol || block)
   3232 		{
   3233 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
   3234 			bool isBlockMember = (!block && parentBlock);
   3235 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
   3236 
   3237 			if(index == -1 || isBlockMember)
   3238 			{
   3239 				if(index == -1)
   3240 				{
   3241 					index = allocate(uniforms, uniform);
   3242 				}
   3243 
   3244 				// Verify if the current uniform is a member of an already declared block
   3245 				const TString &name = symbol ? symbol->getSymbol() : block->name();
   3246 				int blockMemberIndex = blockMemberLookup(type, name, index);
   3247 				if(blockMemberIndex == -1)
   3248 				{
   3249 					declareUniform(type, name, index, false);
   3250 				}
   3251 				else
   3252 				{
   3253 					index = blockMemberIndex;
   3254 				}
   3255 			}
   3256 
   3257 			return index;
   3258 		}
   3259 
   3260 		return 0;
   3261 	}
   3262 
   3263 	int OutputASM::attributeRegister(TIntermTyped *attribute)
   3264 	{
   3265 		ASSERT(!attribute->isArray());
   3266 
   3267 		int index = lookup(attributes, attribute);
   3268 
   3269 		if(index == -1)
   3270 		{
   3271 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
   3272 			ASSERT(symbol);
   3273 
   3274 			if(symbol)
   3275 			{
   3276 				index = allocate(attributes, attribute);
   3277 				const TType &type = attribute->getType();
   3278 				int registerCount = attribute->totalRegisterCount();
   3279 				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
   3280 				switch(type.getBasicType())
   3281 				{
   3282 				case EbtInt:
   3283 					attribType = sw::VertexShader::ATTRIBTYPE_INT;
   3284 					break;
   3285 				case EbtUInt:
   3286 					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
   3287 					break;
   3288 				case EbtFloat:
   3289 				default:
   3290 					break;
   3291 				}
   3292 
   3293 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
   3294 				{
   3295 					for(int i = 0; i < registerCount; i++)
   3296 					{
   3297 						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
   3298 					}
   3299 				}
   3300 
   3301 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
   3302 
   3303 				const char *name = symbol->getSymbol().c_str();
   3304 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
   3305 			}
   3306 		}
   3307 
   3308 		return index;
   3309 	}
   3310 
   3311 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
   3312 	{
   3313 		return allocate(fragmentOutputs, fragmentOutput);
   3314 	}
   3315 
   3316 	int OutputASM::samplerRegister(TIntermTyped *sampler)
   3317 	{
   3318 		const TType &type = sampler->getType();
   3319 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
   3320 
   3321 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
   3322 		TIntermBinary *binary = sampler->getAsBinaryNode();
   3323 
   3324 		if(symbol)
   3325 		{
   3326 			switch(type.getQualifier())
   3327 			{
   3328 			case EvqUniform:
   3329 				return samplerRegister(symbol);
   3330 			case EvqIn:
   3331 			case EvqConstReadOnly:
   3332 				// Function arguments are not (uniform) sampler registers
   3333 				return -1;
   3334 			default:
   3335 				UNREACHABLE(type.getQualifier());
   3336 			}
   3337 		}
   3338 		else if(binary)
   3339 		{
   3340 			TIntermTyped *left = binary->getLeft();
   3341 			TIntermTyped *right = binary->getRight();
   3342 			const TType &leftType = left->getType();
   3343 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
   3344 			int offset = 0;
   3345 
   3346 			switch(binary->getOp())
   3347 			{
   3348 			case EOpIndexDirect:
   3349 				ASSERT(left->isArray());
   3350 				offset = index * leftType.samplerRegisterCount();
   3351 				break;
   3352 			case EOpIndexDirectStruct:
   3353 				ASSERT(leftType.isStruct());
   3354 				{
   3355 					const TFieldList &fields = leftType.getStruct()->fields();
   3356 
   3357 					for(int i = 0; i < index; i++)
   3358 					{
   3359 						offset += fields[i]->type()->totalSamplerRegisterCount();
   3360 					}
   3361 				}
   3362 				break;
   3363 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
   3364 				return -1;
   3365 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
   3366 			default:
   3367 				UNREACHABLE(binary->getOp());
   3368 				return -1;
   3369 			}
   3370 
   3371 			int base = samplerRegister(left);
   3372 
   3373 			if(base < 0)
   3374 			{
   3375 				return -1;
   3376 			}
   3377 
   3378 			return base + offset;
   3379 		}
   3380 
   3381 		UNREACHABLE(0);
   3382 		return -1;   // Not a (uniform) sampler register
   3383 	}
   3384 
   3385 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
   3386 	{
   3387 		const TType &type = sampler->getType();
   3388 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
   3389 
   3390 		int index = lookup(samplers, sampler);
   3391 
   3392 		if(index == -1)
   3393 		{
   3394 			index = allocate(samplers, sampler, true);
   3395 
   3396 			if(sampler->getQualifier() == EvqUniform)
   3397 			{
   3398 				const char *name = sampler->getSymbol().c_str();
   3399 				declareUniform(type, name, index, true);
   3400 			}
   3401 		}
   3402 
   3403 		return index;
   3404 	}
   3405 
   3406 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
   3407 	{
   3408 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
   3409 	}
   3410 
   3411 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
   3412 	{
   3413 		for(unsigned int i = 0; i < list.size(); i++)
   3414 		{
   3415 			if(list[i] == variable)
   3416 			{
   3417 				return i;   // Pointer match
   3418 			}
   3419 		}
   3420 
   3421 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
   3422 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
   3423 
   3424 		if(varBlock)
   3425 		{
   3426 			for(unsigned int i = 0; i < list.size(); i++)
   3427 			{
   3428 				if(list[i])
   3429 				{
   3430 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
   3431 
   3432 					if(listBlock)
   3433 					{
   3434 						if(listBlock->name() == varBlock->name())
   3435 						{
   3436 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
   3437 							ASSERT(listBlock->fields() == varBlock->fields());
   3438 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
   3439 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
   3440 
   3441 							return i;
   3442 						}
   3443 					}
   3444 				}
   3445 			}
   3446 		}
   3447 		else if(varSymbol)
   3448 		{
   3449 			for(unsigned int i = 0; i < list.size(); i++)
   3450 			{
   3451 				if(list[i])
   3452 				{
   3453 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
   3454 
   3455 					if(listSymbol)
   3456 					{
   3457 						if(listSymbol->getId() == varSymbol->getId())
   3458 						{
   3459 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
   3460 							ASSERT(listSymbol->getType() == varSymbol->getType());
   3461 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
   3462 
   3463 							return i;
   3464 						}
   3465 					}
   3466 				}
   3467 			}
   3468 		}
   3469 
   3470 		return -1;
   3471 	}
   3472 
   3473 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
   3474 	{
   3475 		for(unsigned int i = 0; i < list.size(); i++)
   3476 		{
   3477 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
   3478 			{
   3479 				return i;   // Pointer match
   3480 			}
   3481 		}
   3482 		return -1;
   3483 	}
   3484 
   3485 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly)
   3486 	{
   3487 		int index = lookup(list, variable);
   3488 
   3489 		if(index == -1)
   3490 		{
   3491 			unsigned int registerCount = variable->blockRegisterCount(samplersOnly);
   3492 
   3493 			for(unsigned int i = 0; i < list.size(); i++)
   3494 			{
   3495 				if(list[i] == 0)
   3496 				{
   3497 					unsigned int j = 1;
   3498 					for( ; j < registerCount && (i + j) < list.size(); j++)
   3499 					{
   3500 						if(list[i + j] != 0)
   3501 						{
   3502 							break;
   3503 						}
   3504 					}
   3505 
   3506 					if(j == registerCount)   // Found free slots
   3507 					{
   3508 						for(unsigned int j = 0; j < registerCount; j++)
   3509 						{
   3510 							list[i + j] = variable;
   3511 						}
   3512 
   3513 						return i;
   3514 					}
   3515 				}
   3516 			}
   3517 
   3518 			index = list.size();
   3519 
   3520 			for(unsigned int i = 0; i < registerCount; i++)
   3521 			{
   3522 				list.push_back(variable);
   3523 			}
   3524 		}
   3525 
   3526 		return index;
   3527 	}
   3528 
   3529 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
   3530 	{
   3531 		int index = lookup(list, variable);
   3532 
   3533 		if(index >= 0)
   3534 		{
   3535 			list[index] = 0;
   3536 		}
   3537 	}
   3538 
   3539 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
   3540 	{
   3541 		const TInterfaceBlock *block = type.getInterfaceBlock();
   3542 
   3543 		if(block)
   3544 		{
   3545 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
   3546 			const TFieldList& fields = block->fields();
   3547 			const TString &blockName = block->name();
   3548 			int fieldRegisterIndex = registerIndex;
   3549 
   3550 			if(!type.isInterfaceBlock())
   3551 			{
   3552 				// This is a uniform that's part of a block, let's see if the block is already defined
   3553 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
   3554 				{
   3555 					if(activeUniformBlocks[i].name == blockName.c_str())
   3556 					{
   3557 						// The block is already defined, find the register for the current uniform and return it
   3558 						for(size_t j = 0; j < fields.size(); j++)
   3559 						{
   3560 							const TString &fieldName = fields[j]->name();
   3561 							if(fieldName == name)
   3562 							{
   3563 								return fieldRegisterIndex;
   3564 							}
   3565 
   3566 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
   3567 						}
   3568 
   3569 						ASSERT(false);
   3570 						return fieldRegisterIndex;
   3571 					}
   3572 				}
   3573 			}
   3574 		}
   3575 
   3576 		return -1;
   3577 	}
   3578 
   3579 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder)
   3580 	{
   3581 		const TStructure *structure = type.getStruct();
   3582 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
   3583 
   3584 		if(!structure && !block)
   3585 		{
   3586 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
   3587 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
   3588 			if(blockId >= 0)
   3589 			{
   3590 				blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type)));
   3591 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
   3592 			}
   3593 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
   3594 			bool isSampler = IsSampler(type.getBasicType());
   3595 			if(isSampler && samplersOnly)
   3596 			{
   3597 				for(int i = 0; i < type.totalRegisterCount(); i++)
   3598 				{
   3599 					shader->declareSampler(fieldRegisterIndex + i);
   3600 				}
   3601 			}
   3602 			if(isSampler == samplersOnly)
   3603 			{
   3604 				activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo));
   3605 			}
   3606 		}
   3607 		else if(block)
   3608 		{
   3609 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
   3610 			const TFieldList& fields = block->fields();
   3611 			const TString &blockName = block->name();
   3612 			int fieldRegisterIndex = registerIndex;
   3613 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
   3614 
   3615 			blockId = activeUniformBlocks.size();
   3616 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
   3617 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
   3618 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
   3619 			blockDefinitions.push_back(BlockDefinitionIndexMap());
   3620 
   3621 			Std140BlockEncoder currentBlockEncoder;
   3622 			currentBlockEncoder.enterAggregateType();
   3623 			for(const auto &field : fields)
   3624 			{
   3625 				const TType &fieldType = *(field->type());
   3626 				const TString &fieldName = field->name();
   3627 				if(isUniformBlockMember && (fieldName == name))
   3628 				{
   3629 					registerIndex = fieldRegisterIndex;
   3630 				}
   3631 
   3632 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
   3633 
   3634 				declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, &currentBlockEncoder);
   3635 				fieldRegisterIndex += fieldType.totalRegisterCount();
   3636 			}
   3637 			currentBlockEncoder.exitAggregateType();
   3638 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
   3639 		}
   3640 		else
   3641 		{
   3642 			// Store struct for program link time validation
   3643 			shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo()));
   3644 
   3645 			int fieldRegisterIndex = registerIndex;
   3646 
   3647 			const TFieldList& fields = structure->fields();
   3648 			if(type.isArray() && (structure || type.isInterfaceBlock()))
   3649 			{
   3650 				for(int i = 0; i < type.getArraySize(); i++)
   3651 				{
   3652 					if(encoder)
   3653 					{
   3654 						encoder->enterAggregateType();
   3655 					}
   3656 					for(const auto &field : fields)
   3657 					{
   3658 						const TType &fieldType = *(field->type());
   3659 						const TString &fieldName = field->name();
   3660 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
   3661 
   3662 						declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
   3663 						fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
   3664 					}
   3665 					if(encoder)
   3666 					{
   3667 						encoder->exitAggregateType();
   3668 					}
   3669 				}
   3670 			}
   3671 			else
   3672 			{
   3673 				if(encoder)
   3674 				{
   3675 					encoder->enterAggregateType();
   3676 				}
   3677 				for(const auto &field : fields)
   3678 				{
   3679 					const TType &fieldType = *(field->type());
   3680 					const TString &fieldName = field->name();
   3681 					const TString uniformName = name + "." + fieldName;
   3682 
   3683 					declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
   3684 					fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
   3685 				}
   3686 				if(encoder)
   3687 				{
   3688 					encoder->exitAggregateType();
   3689 				}
   3690 			}
   3691 		}
   3692 	}
   3693 
   3694 	int OutputASM::dim(TIntermNode *v)
   3695 	{
   3696 		TIntermTyped *vector = v->getAsTyped();
   3697 		ASSERT(vector && vector->isRegister());
   3698 		return vector->getNominalSize();
   3699 	}
   3700 
   3701 	int OutputASM::dim2(TIntermNode *m)
   3702 	{
   3703 		TIntermTyped *matrix = m->getAsTyped();
   3704 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
   3705 		return matrix->getSecondarySize();
   3706 	}
   3707 
   3708 	// Returns ~0u if no loop count could be determined
   3709 	unsigned int OutputASM::loopCount(TIntermLoop *node)
   3710 	{
   3711 		// Parse loops of the form:
   3712 		// for(int index = initial; index [comparator] limit; index += increment)
   3713 		TIntermSymbol *index = 0;
   3714 		TOperator comparator = EOpNull;
   3715 		int initial = 0;
   3716 		int limit = 0;
   3717 		int increment = 0;
   3718 
   3719 		// Parse index name and intial value
   3720 		if(node->getInit())
   3721 		{
   3722 			TIntermAggregate *init = node->getInit()->getAsAggregate();
   3723 
   3724 			if(init)
   3725 			{
   3726 				TIntermSequence &sequence = init->getSequence();
   3727 				TIntermTyped *variable = sequence[0]->getAsTyped();
   3728 
   3729 				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
   3730 				{
   3731 					TIntermBinary *assign = variable->getAsBinaryNode();
   3732 
   3733 					if(assign && assign->getOp() == EOpInitialize)
   3734 					{
   3735 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
   3736 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
   3737 
   3738 						if(symbol && constant)
   3739 						{
   3740 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3741 							{
   3742 								index = symbol;
   3743 								initial = constant->getUnionArrayPointer()[0].getIConst();
   3744 							}
   3745 						}
   3746 					}
   3747 				}
   3748 			}
   3749 		}
   3750 
   3751 		// Parse comparator and limit value
   3752 		if(index && node->getCondition())
   3753 		{
   3754 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
   3755 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
   3756 
   3757 			if(left && (left->getId() == index->getId()))
   3758 			{
   3759 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
   3760 
   3761 				if(constant)
   3762 				{
   3763 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3764 					{
   3765 						comparator = test->getOp();
   3766 						limit = constant->getUnionArrayPointer()[0].getIConst();
   3767 					}
   3768 				}
   3769 			}
   3770 		}
   3771 
   3772 		// Parse increment
   3773 		if(index && comparator != EOpNull && node->getExpression())
   3774 		{
   3775 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
   3776 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
   3777 
   3778 			if(binaryTerminal)
   3779 			{
   3780 				TOperator op = binaryTerminal->getOp();
   3781 				TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
   3782 
   3783 				if(constant)
   3784 				{
   3785 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3786 					{
   3787 						int value = constant->getUnionArrayPointer()[0].getIConst();
   3788 
   3789 						switch(op)
   3790 						{
   3791 						case EOpAddAssign: increment = value;  break;
   3792 						case EOpSubAssign: increment = -value; break;
   3793 						default: UNIMPLEMENTED();
   3794 						}
   3795 					}
   3796 				}
   3797 			}
   3798 			else if(unaryTerminal)
   3799 			{
   3800 				TOperator op = unaryTerminal->getOp();
   3801 
   3802 				switch(op)
   3803 				{
   3804 				case EOpPostIncrement: increment = 1;  break;
   3805 				case EOpPostDecrement: increment = -1; break;
   3806 				case EOpPreIncrement:  increment = 1;  break;
   3807 				case EOpPreDecrement:  increment = -1; break;
   3808 				default: UNIMPLEMENTED();
   3809 				}
   3810 			}
   3811 		}
   3812 
   3813 		if(index && comparator != EOpNull && increment != 0)
   3814 		{
   3815 			if(comparator == EOpLessThanEqual)
   3816 			{
   3817 				comparator = EOpLessThan;
   3818 				limit += 1;
   3819 			}
   3820 			else if(comparator == EOpGreaterThanEqual)
   3821 			{
   3822 				comparator = EOpLessThan;
   3823 				limit -= 1;
   3824 				std::swap(initial, limit);
   3825 				increment = -increment;
   3826 			}
   3827 			else if(comparator == EOpGreaterThan)
   3828 			{
   3829 				comparator = EOpLessThan;
   3830 				std::swap(initial, limit);
   3831 				increment = -increment;
   3832 			}
   3833 
   3834 			if(comparator == EOpLessThan)
   3835 			{
   3836 				if(!(initial < limit))   // Never loops
   3837 				{
   3838 					return 0;
   3839 				}
   3840 
   3841 				int iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
   3842 
   3843 				if(iterations < 0)
   3844 				{
   3845 					return ~0u;
   3846 				}
   3847 
   3848 				return iterations;
   3849 			}
   3850 			else UNIMPLEMENTED();   // Falls through
   3851 		}
   3852 
   3853 		return ~0u;
   3854 	}
   3855 
   3856 	bool LoopUnrollable::traverse(TIntermNode *node)
   3857 	{
   3858 		loopDepth = 0;
   3859 		loopUnrollable = true;
   3860 
   3861 		node->traverse(this);
   3862 
   3863 		return loopUnrollable;
   3864 	}
   3865 
   3866 	bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop)
   3867 	{
   3868 		if(visit == PreVisit)
   3869 		{
   3870 			loopDepth++;
   3871 		}
   3872 		else if(visit == PostVisit)
   3873 		{
   3874 			loopDepth++;
   3875 		}
   3876 
   3877 		return true;
   3878 	}
   3879 
   3880 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
   3881 	{
   3882 		if(!loopUnrollable)
   3883 		{
   3884 			return false;
   3885 		}
   3886 
   3887 		if(!loopDepth)
   3888 		{
   3889 			return true;
   3890 		}
   3891 
   3892 		switch(node->getFlowOp())
   3893 		{
   3894 		case EOpKill:
   3895 		case EOpReturn:
   3896 			break;
   3897 		case EOpBreak:
   3898 		case EOpContinue:
   3899 			loopUnrollable = false;
   3900 			break;
   3901 		default: UNREACHABLE(node->getFlowOp());
   3902 		}
   3903 
   3904 		return loopUnrollable;
   3905 	}
   3906 
   3907 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
   3908 	{
   3909 		return loopUnrollable;
   3910 	}
   3911 }
   3912