Home | History | Annotate | Download | only in compiler
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "OutputASM.h"
     16 #include "Common/Math.hpp"
     17 
     18 #include "common/debug.h"
     19 #include "InfoSink.h"
     20 
     21 #include "libGLESv2/Shader.h"
     22 
     23 #include <GLES2/gl2.h>
     24 #include <GLES2/gl2ext.h>
     25 #include <GLES3/gl3.h>
     26 #include <GL/glcorearb.h>
     27 #include <GL/glext.h>
     28 
     29 #include <stdlib.h>
     30 
     31 namespace
     32 {
     33 	GLenum glVariableType(const TType &type)
     34 	{
     35 		switch(type.getBasicType())
     36 		{
     37 		case EbtFloat:
     38 			if(type.isScalar())
     39 			{
     40 				return GL_FLOAT;
     41 			}
     42 			else if(type.isVector())
     43 			{
     44 				switch(type.getNominalSize())
     45 				{
     46 				case 2: return GL_FLOAT_VEC2;
     47 				case 3: return GL_FLOAT_VEC3;
     48 				case 4: return GL_FLOAT_VEC4;
     49 				default: UNREACHABLE(type.getNominalSize());
     50 				}
     51 			}
     52 			else if(type.isMatrix())
     53 			{
     54 				switch(type.getNominalSize())
     55 				{
     56 				case 2:
     57 					switch(type.getSecondarySize())
     58 					{
     59 					case 2: return GL_FLOAT_MAT2;
     60 					case 3: return GL_FLOAT_MAT2x3;
     61 					case 4: return GL_FLOAT_MAT2x4;
     62 					default: UNREACHABLE(type.getSecondarySize());
     63 					}
     64 				case 3:
     65 					switch(type.getSecondarySize())
     66 					{
     67 					case 2: return GL_FLOAT_MAT3x2;
     68 					case 3: return GL_FLOAT_MAT3;
     69 					case 4: return GL_FLOAT_MAT3x4;
     70 					default: UNREACHABLE(type.getSecondarySize());
     71 					}
     72 				case 4:
     73 					switch(type.getSecondarySize())
     74 					{
     75 					case 2: return GL_FLOAT_MAT4x2;
     76 					case 3: return GL_FLOAT_MAT4x3;
     77 					case 4: return GL_FLOAT_MAT4;
     78 					default: UNREACHABLE(type.getSecondarySize());
     79 					}
     80 				default: UNREACHABLE(type.getNominalSize());
     81 				}
     82 			}
     83 			else UNREACHABLE(0);
     84 			break;
     85 		case EbtInt:
     86 			if(type.isScalar())
     87 			{
     88 				return GL_INT;
     89 			}
     90 			else if(type.isVector())
     91 			{
     92 				switch(type.getNominalSize())
     93 				{
     94 				case 2: return GL_INT_VEC2;
     95 				case 3: return GL_INT_VEC3;
     96 				case 4: return GL_INT_VEC4;
     97 				default: UNREACHABLE(type.getNominalSize());
     98 				}
     99 			}
    100 			else UNREACHABLE(0);
    101 			break;
    102 		case EbtUInt:
    103 			if(type.isScalar())
    104 			{
    105 				return GL_UNSIGNED_INT;
    106 			}
    107 			else if(type.isVector())
    108 			{
    109 				switch(type.getNominalSize())
    110 				{
    111 				case 2: return GL_UNSIGNED_INT_VEC2;
    112 				case 3: return GL_UNSIGNED_INT_VEC3;
    113 				case 4: return GL_UNSIGNED_INT_VEC4;
    114 				default: UNREACHABLE(type.getNominalSize());
    115 				}
    116 			}
    117 			else UNREACHABLE(0);
    118 			break;
    119 		case EbtBool:
    120 			if(type.isScalar())
    121 			{
    122 				return GL_BOOL;
    123 			}
    124 			else if(type.isVector())
    125 			{
    126 				switch(type.getNominalSize())
    127 				{
    128 				case 2: return GL_BOOL_VEC2;
    129 				case 3: return GL_BOOL_VEC3;
    130 				case 4: return GL_BOOL_VEC4;
    131 				default: UNREACHABLE(type.getNominalSize());
    132 				}
    133 			}
    134 			else UNREACHABLE(0);
    135 			break;
    136 		case EbtSampler2D:
    137 			return GL_SAMPLER_2D;
    138 		case EbtISampler2D:
    139 			return GL_INT_SAMPLER_2D;
    140 		case EbtUSampler2D:
    141 			return GL_UNSIGNED_INT_SAMPLER_2D;
    142 		case EbtSamplerCube:
    143 			return GL_SAMPLER_CUBE;
    144 		case EbtSampler2DRect:
    145 			return GL_SAMPLER_2D_RECT_ARB;
    146 		case EbtISamplerCube:
    147 			return GL_INT_SAMPLER_CUBE;
    148 		case EbtUSamplerCube:
    149 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
    150 		case EbtSamplerExternalOES:
    151 			return GL_SAMPLER_EXTERNAL_OES;
    152 		case EbtSampler3D:
    153 			return GL_SAMPLER_3D_OES;
    154 		case EbtISampler3D:
    155 			return GL_INT_SAMPLER_3D;
    156 		case EbtUSampler3D:
    157 			return GL_UNSIGNED_INT_SAMPLER_3D;
    158 		case EbtSampler2DArray:
    159 			return GL_SAMPLER_2D_ARRAY;
    160 		case EbtISampler2DArray:
    161 			return GL_INT_SAMPLER_2D_ARRAY;
    162 		case EbtUSampler2DArray:
    163 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
    164 		case EbtSampler2DShadow:
    165 			return GL_SAMPLER_2D_SHADOW;
    166 		case EbtSamplerCubeShadow:
    167 			return GL_SAMPLER_CUBE_SHADOW;
    168 		case EbtSampler2DArrayShadow:
    169 			return GL_SAMPLER_2D_ARRAY_SHADOW;
    170 		default:
    171 			UNREACHABLE(type.getBasicType());
    172 			break;
    173 		}
    174 
    175 		return GL_NONE;
    176 	}
    177 
    178 	GLenum glVariablePrecision(const TType &type)
    179 	{
    180 		if(type.getBasicType() == EbtFloat)
    181 		{
    182 			switch(type.getPrecision())
    183 			{
    184 			case EbpHigh:   return GL_HIGH_FLOAT;
    185 			case EbpMedium: return GL_MEDIUM_FLOAT;
    186 			case EbpLow:    return GL_LOW_FLOAT;
    187 			case EbpUndefined:
    188 				// Should be defined as the default precision by the parser
    189 			default: UNREACHABLE(type.getPrecision());
    190 			}
    191 		}
    192 		else if(type.getBasicType() == EbtInt)
    193 		{
    194 			switch(type.getPrecision())
    195 			{
    196 			case EbpHigh:   return GL_HIGH_INT;
    197 			case EbpMedium: return GL_MEDIUM_INT;
    198 			case EbpLow:    return GL_LOW_INT;
    199 			case EbpUndefined:
    200 				// Should be defined as the default precision by the parser
    201 			default: UNREACHABLE(type.getPrecision());
    202 			}
    203 		}
    204 
    205 		// Other types (boolean, sampler) don't have a precision
    206 		return GL_NONE;
    207 	}
    208 }
    209 
    210 namespace glsl
    211 {
    212 	// Integer to TString conversion
    213 	TString str(int i)
    214 	{
    215 		char buffer[20];
    216 		sprintf(buffer, "%d", i);
    217 		return buffer;
    218 	}
    219 
    220 	class Temporary : public TIntermSymbol
    221 	{
    222 	public:
    223 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
    224 		{
    225 		}
    226 
    227 		~Temporary()
    228 		{
    229 			assembler->freeTemporary(this);
    230 		}
    231 
    232 	private:
    233 		OutputASM *const assembler;
    234 	};
    235 
    236 	class Constant : public TIntermConstantUnion
    237 	{
    238 	public:
    239 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
    240 		{
    241 			constants[0].setFConst(x);
    242 			constants[1].setFConst(y);
    243 			constants[2].setFConst(z);
    244 			constants[3].setFConst(w);
    245 		}
    246 
    247 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
    248 		{
    249 			constants[0].setBConst(b);
    250 		}
    251 
    252 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
    253 		{
    254 			constants[0].setIConst(i);
    255 		}
    256 
    257 		~Constant()
    258 		{
    259 		}
    260 
    261 	private:
    262 		ConstantUnion constants[4];
    263 	};
    264 
    265 	ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) :
    266 		type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)),
    267 		name(name), arraySize(type.getArraySize()), registerIndex(registerIndex)
    268 	{
    269 		if(type.isStruct())
    270 		{
    271 			for(const auto& field : type.getStruct()->fields())
    272 			{
    273 				fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1));
    274 			}
    275 		}
    276 	}
    277 
    278 	Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
    279 		ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
    280 	{
    281 	}
    282 
    283 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
    284 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
    285 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
    286 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
    287 	{
    288 	}
    289 
    290 	BlockLayoutEncoder::BlockLayoutEncoder()
    291 		: mCurrentOffset(0)
    292 	{
    293 	}
    294 
    295 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
    296 	{
    297 		int arrayStride;
    298 		int matrixStride;
    299 
    300 		bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor;
    301 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
    302 
    303 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
    304 		                                 static_cast<int>(arrayStride * BytesPerComponent),
    305 		                                 static_cast<int>(matrixStride * BytesPerComponent),
    306 		                                 (matrixStride > 0) && isRowMajor);
    307 
    308 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
    309 
    310 		return memberInfo;
    311 	}
    312 
    313 	// static
    314 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
    315 	{
    316 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
    317 	}
    318 
    319 	// static
    320 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
    321 	{
    322 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
    323 	}
    324 
    325 	void BlockLayoutEncoder::nextRegister()
    326 	{
    327 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
    328 	}
    329 
    330 	Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder()
    331 	{
    332 	}
    333 
    334 	void Std140BlockEncoder::enterAggregateType()
    335 	{
    336 		nextRegister();
    337 	}
    338 
    339 	void Std140BlockEncoder::exitAggregateType()
    340 	{
    341 		nextRegister();
    342 	}
    343 
    344 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
    345 	{
    346 		size_t baseAlignment = 0;
    347 		int matrixStride = 0;
    348 		int arrayStride = 0;
    349 
    350 		if(type.isMatrix())
    351 		{
    352 			baseAlignment = ComponentsPerRegister;
    353 			matrixStride = ComponentsPerRegister;
    354 
    355 			if(arraySize > 0)
    356 			{
    357 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
    358 				arrayStride = ComponentsPerRegister * numRegisters;
    359 			}
    360 		}
    361 		else if(arraySize > 0)
    362 		{
    363 			baseAlignment = ComponentsPerRegister;
    364 			arrayStride = ComponentsPerRegister;
    365 		}
    366 		else
    367 		{
    368 			const size_t numComponents = type.getElementSize();
    369 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
    370 		}
    371 
    372 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
    373 
    374 		*matrixStrideOut = matrixStride;
    375 		*arrayStrideOut = arrayStride;
    376 	}
    377 
    378 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
    379 	{
    380 		if(arraySize > 0)
    381 		{
    382 			mCurrentOffset += arrayStride * arraySize;
    383 		}
    384 		else if(type.isMatrix())
    385 		{
    386 			ASSERT(matrixStride == ComponentsPerRegister);
    387 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
    388 			mCurrentOffset += ComponentsPerRegister * numRegisters;
    389 		}
    390 		else
    391 		{
    392 			mCurrentOffset += type.getElementSize();
    393 		}
    394 	}
    395 
    396 	Attribute::Attribute()
    397 	{
    398 		type = GL_NONE;
    399 		arraySize = 0;
    400 		registerIndex = 0;
    401 	}
    402 
    403 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int layoutLocation, int registerIndex)
    404 	{
    405 		this->type = type;
    406 		this->name = name;
    407 		this->arraySize = arraySize;
    408 		this->layoutLocation = layoutLocation;
    409 		this->registerIndex = registerIndex;
    410 	}
    411 
    412 	sw::PixelShader *Shader::getPixelShader() const
    413 	{
    414 		return nullptr;
    415 	}
    416 
    417 	sw::VertexShader *Shader::getVertexShader() const
    418 	{
    419 		return nullptr;
    420 	}
    421 
    422 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
    423 	{
    424 		TString name = TFunction::unmangleName(nodeName);
    425 
    426 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect")
    427 		{
    428 			method = IMPLICIT;
    429 		}
    430 		else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj")
    431 		{
    432 			method = IMPLICIT;
    433 			proj = true;
    434 		}
    435 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
    436 		{
    437 			method = LOD;
    438 		}
    439 		else if(name == "texture2DProjLod" || name == "textureProjLod")
    440 		{
    441 			method = LOD;
    442 			proj = true;
    443 		}
    444 		else if(name == "textureSize")
    445 		{
    446 			method = SIZE;
    447 		}
    448 		else if(name == "textureOffset")
    449 		{
    450 			method = IMPLICIT;
    451 			offset = true;
    452 		}
    453 		else if(name == "textureProjOffset")
    454 		{
    455 			method = IMPLICIT;
    456 			offset = true;
    457 			proj = true;
    458 		}
    459 		else if(name == "textureLodOffset")
    460 		{
    461 			method = LOD;
    462 			offset = true;
    463 		}
    464 		else if(name == "textureProjLodOffset")
    465 		{
    466 			method = LOD;
    467 			proj = true;
    468 			offset = true;
    469 		}
    470 		else if(name == "texelFetch")
    471 		{
    472 			method = FETCH;
    473 		}
    474 		else if(name == "texelFetchOffset")
    475 		{
    476 			method = FETCH;
    477 			offset = true;
    478 		}
    479 		else if(name == "textureGrad")
    480 		{
    481 			method = GRAD;
    482 		}
    483 		else if(name == "textureGradOffset")
    484 		{
    485 			method = GRAD;
    486 			offset = true;
    487 		}
    488 		else if(name == "textureProjGrad")
    489 		{
    490 			method = GRAD;
    491 			proj = true;
    492 		}
    493 		else if(name == "textureProjGradOffset")
    494 		{
    495 			method = GRAD;
    496 			proj = true;
    497 			offset = true;
    498 		}
    499 		else UNREACHABLE(0);
    500 	}
    501 
    502 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
    503 	{
    504 		shader = nullptr;
    505 		pixelShader = nullptr;
    506 		vertexShader = nullptr;
    507 
    508 		if(shaderObject)
    509 		{
    510 			shader = shaderObject->getShader();
    511 			pixelShader = shaderObject->getPixelShader();
    512 			vertexShader = shaderObject->getVertexShader();
    513 		}
    514 
    515 		functionArray.push_back(Function(0, "main(", nullptr, nullptr));
    516 		currentFunction = 0;
    517 		outputQualifier = EvqOutput;   // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData
    518 	}
    519 
    520 	OutputASM::~OutputASM()
    521 	{
    522 	}
    523 
    524 	void OutputASM::output()
    525 	{
    526 		if(shader)
    527 		{
    528 			emitShader(GLOBAL);
    529 
    530 			if(functionArray.size() > 1)   // Only call main() when there are other functions
    531 			{
    532 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
    533 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
    534 				callMain->dst.index = 0;   // main()
    535 
    536 				emit(sw::Shader::OPCODE_RET);
    537 			}
    538 
    539 			emitShader(FUNCTION);
    540 		}
    541 	}
    542 
    543 	void OutputASM::emitShader(Scope scope)
    544 	{
    545 		emitScope = scope;
    546 		currentScope = GLOBAL;
    547 		mContext.getTreeRoot()->traverse(this);
    548 	}
    549 
    550 	void OutputASM::freeTemporary(Temporary *temporary)
    551 	{
    552 		free(temporaries, temporary);
    553 	}
    554 
    555 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
    556 	{
    557 		TBasicType baseType = in->getType().getBasicType();
    558 
    559 		switch(op)
    560 		{
    561 		case sw::Shader::OPCODE_NEG:
    562 			switch(baseType)
    563 			{
    564 			case EbtInt:
    565 			case EbtUInt:
    566 				return sw::Shader::OPCODE_INEG;
    567 			case EbtFloat:
    568 			default:
    569 				return op;
    570 			}
    571 		case sw::Shader::OPCODE_ABS:
    572 			switch(baseType)
    573 			{
    574 			case EbtInt:
    575 				return sw::Shader::OPCODE_IABS;
    576 			case EbtFloat:
    577 			default:
    578 				return op;
    579 			}
    580 		case sw::Shader::OPCODE_SGN:
    581 			switch(baseType)
    582 			{
    583 			case EbtInt:
    584 				return sw::Shader::OPCODE_ISGN;
    585 			case EbtFloat:
    586 			default:
    587 				return op;
    588 			}
    589 		case sw::Shader::OPCODE_ADD:
    590 			switch(baseType)
    591 			{
    592 			case EbtInt:
    593 			case EbtUInt:
    594 				return sw::Shader::OPCODE_IADD;
    595 			case EbtFloat:
    596 			default:
    597 				return op;
    598 			}
    599 		case sw::Shader::OPCODE_SUB:
    600 			switch(baseType)
    601 			{
    602 			case EbtInt:
    603 			case EbtUInt:
    604 				return sw::Shader::OPCODE_ISUB;
    605 			case EbtFloat:
    606 			default:
    607 				return op;
    608 			}
    609 		case sw::Shader::OPCODE_MUL:
    610 			switch(baseType)
    611 			{
    612 			case EbtInt:
    613 			case EbtUInt:
    614 				return sw::Shader::OPCODE_IMUL;
    615 			case EbtFloat:
    616 			default:
    617 				return op;
    618 			}
    619 		case sw::Shader::OPCODE_DIV:
    620 			switch(baseType)
    621 			{
    622 			case EbtInt:
    623 				return sw::Shader::OPCODE_IDIV;
    624 			case EbtUInt:
    625 				return sw::Shader::OPCODE_UDIV;
    626 			case EbtFloat:
    627 			default:
    628 				return op;
    629 			}
    630 		case sw::Shader::OPCODE_IMOD:
    631 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
    632 		case sw::Shader::OPCODE_ISHR:
    633 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
    634 		case sw::Shader::OPCODE_MIN:
    635 			switch(baseType)
    636 			{
    637 			case EbtInt:
    638 				return sw::Shader::OPCODE_IMIN;
    639 			case EbtUInt:
    640 				return sw::Shader::OPCODE_UMIN;
    641 			case EbtFloat:
    642 			default:
    643 				return op;
    644 			}
    645 		case sw::Shader::OPCODE_MAX:
    646 			switch(baseType)
    647 			{
    648 			case EbtInt:
    649 				return sw::Shader::OPCODE_IMAX;
    650 			case EbtUInt:
    651 				return sw::Shader::OPCODE_UMAX;
    652 			case EbtFloat:
    653 			default:
    654 				return op;
    655 			}
    656 		default:
    657 			return op;
    658 		}
    659 	}
    660 
    661 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
    662 	{
    663 		// The type of vertex outputs and fragment inputs with the same name must match (validated at link time),
    664 		// so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code).
    665 		switch(symbol->getQualifier())
    666 		{
    667 		case EvqVaryingIn:
    668 		case EvqVaryingOut:
    669 		case EvqInvariantVaryingIn:
    670 		case EvqInvariantVaryingOut:
    671 		case EvqVertexOut:
    672 		case EvqFragmentIn:
    673 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
    674 			{
    675 				declareVarying(symbol, -1);
    676 			}
    677 			break;
    678 		case EvqFragmentOut:
    679 			declareFragmentOutput(symbol);
    680 			break;
    681 		default:
    682 			break;
    683 		}
    684 
    685 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
    686 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
    687 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
    688 		// are considered active, even if they are not referenced in any shader in the program.
    689 		// The uniform block itself is also considered active, even if no member of the block is referenced."
    690 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
    691 		{
    692 			uniformRegister(symbol);
    693 		}
    694 	}
    695 
    696 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
    697 	{
    698 		if(currentScope != emitScope)
    699 		{
    700 			return false;
    701 		}
    702 
    703 		TIntermTyped *result = node;
    704 		TIntermTyped *left = node->getLeft();
    705 		TIntermTyped *right = node->getRight();
    706 		const TType &leftType = left->getType();
    707 		const TType &rightType = right->getType();
    708 
    709 		if(isSamplerRegister(result))
    710 		{
    711 			return false;   // Don't traverse, the register index is determined statically
    712 		}
    713 
    714 		switch(node->getOp())
    715 		{
    716 		case EOpAssign:
    717 			assert(visit == PreVisit);
    718 			right->traverse(this);
    719 			assignLvalue(left, right);
    720 			copy(result, right);
    721 			return false;
    722 		case EOpInitialize:
    723 			assert(visit == PreVisit);
    724 			// Constant arrays go into the constant register file.
    725 			if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1)
    726 			{
    727 				for(int i = 0; i < left->totalRegisterCount(); i++)
    728 				{
    729 					emit(sw::Shader::OPCODE_DEF, left, i, right, i);
    730 				}
    731 			}
    732 			else
    733 			{
    734 				right->traverse(this);
    735 				copy(left, right);
    736 			}
    737 			return false;
    738 		case EOpMatrixTimesScalarAssign:
    739 			assert(visit == PreVisit);
    740 			right->traverse(this);
    741 			for(int i = 0; i < leftType.getNominalSize(); i++)
    742 			{
    743 				emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
    744 			}
    745 
    746 			assignLvalue(left, result);
    747 			return false;
    748 		case EOpVectorTimesMatrixAssign:
    749 			assert(visit == PreVisit);
    750 			{
    751 				// The left operand may contain a swizzle serving double-duty as
    752 				// swizzle and writemask, so it's important that we traverse it
    753 				// first. Otherwise we may end up never setting up our left
    754 				// operand correctly.
    755 				left->traverse(this);
    756 				right->traverse(this);
    757 				int size = leftType.getNominalSize();
    758 
    759 				for(int i = 0; i < size; i++)
    760 				{
    761 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
    762 					dot->dst.mask = 1 << i;
    763 				}
    764 
    765 				assignLvalue(left, result);
    766 			}
    767 			return false;
    768 		case EOpMatrixTimesMatrixAssign:
    769 			assert(visit == PreVisit);
    770 			{
    771 				right->traverse(this);
    772 				int dim = leftType.getNominalSize();
    773 
    774 				for(int i = 0; i < dim; i++)
    775 				{
    776 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    777 					mul->src[1].swizzle = 0x00;
    778 
    779 					for(int j = 1; j < dim; j++)
    780 					{
    781 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
    782 						mad->src[1].swizzle = j * 0x55;
    783 					}
    784 				}
    785 
    786 				assignLvalue(left, result);
    787 			}
    788 			return false;
    789 		case EOpIndexDirect:
    790 		case EOpIndexIndirect:
    791 		case EOpIndexDirectStruct:
    792 		case EOpIndexDirectInterfaceBlock:
    793 			assert(visit == PreVisit);
    794 			evaluateRvalue(node);
    795 			return false;
    796 		case EOpVectorSwizzle:
    797 			if(visit == PostVisit)
    798 			{
    799 				int swizzle = 0;
    800 				TIntermAggregate *components = right->getAsAggregate();
    801 
    802 				if(components)
    803 				{
    804 					TIntermSequence &sequence = components->getSequence();
    805 					int component = 0;
    806 
    807 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
    808 					{
    809 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
    810 
    811 						if(element)
    812 						{
    813 							int i = element->getUnionArrayPointer()[0].getIConst();
    814 							swizzle |= i << (component * 2);
    815 							component++;
    816 						}
    817 						else UNREACHABLE(0);
    818 					}
    819 				}
    820 				else UNREACHABLE(0);
    821 
    822 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
    823 				mov->src[0].swizzle = swizzle;
    824 			}
    825 			break;
    826 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
    827 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
    828 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
    829 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
    830 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
    831 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
    832 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
    833 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
    834 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
    835 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
    836 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
    837 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
    838 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
    839 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
    840 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
    841 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
    842 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
    843 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
    844 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
    845 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
    846 		case EOpEqual:
    847 			if(visit == PostVisit)
    848 			{
    849 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
    850 
    851 				for(int index = 1; index < left->totalRegisterCount(); index++)
    852 				{
    853 					Temporary equal(this);
    854 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
    855 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
    856 				}
    857 			}
    858 			break;
    859 		case EOpNotEqual:
    860 			if(visit == PostVisit)
    861 			{
    862 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
    863 
    864 				for(int index = 1; index < left->totalRegisterCount(); index++)
    865 				{
    866 					Temporary notEqual(this);
    867 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
    868 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
    869 				}
    870 			}
    871 			break;
    872 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
    873 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
    874 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
    875 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
    876 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
    877 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
    878 		case EOpMatrixTimesScalar:
    879 			if(visit == PostVisit)
    880 			{
    881 				if(left->isMatrix())
    882 				{
    883 					for(int i = 0; i < leftType.getNominalSize(); i++)
    884 					{
    885 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
    886 					}
    887 				}
    888 				else if(right->isMatrix())
    889 				{
    890 					for(int i = 0; i < rightType.getNominalSize(); i++)
    891 					{
    892 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    893 					}
    894 				}
    895 				else UNREACHABLE(0);
    896 			}
    897 			break;
    898 		case EOpVectorTimesMatrix:
    899 			if(visit == PostVisit)
    900 			{
    901 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
    902 
    903 				int size = rightType.getNominalSize();
    904 				for(int i = 0; i < size; i++)
    905 				{
    906 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
    907 					dot->dst.mask = 1 << i;
    908 				}
    909 			}
    910 			break;
    911 		case EOpMatrixTimesVector:
    912 			if(visit == PostVisit)
    913 			{
    914 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
    915 				mul->src[1].swizzle = 0x00;
    916 
    917 				int size = rightType.getNominalSize();
    918 				for(int i = 1; i < size; i++)
    919 				{
    920 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
    921 					mad->src[1].swizzle = i * 0x55;
    922 				}
    923 			}
    924 			break;
    925 		case EOpMatrixTimesMatrix:
    926 			if(visit == PostVisit)
    927 			{
    928 				int dim = leftType.getNominalSize();
    929 
    930 				int size = rightType.getNominalSize();
    931 				for(int i = 0; i < size; i++)
    932 				{
    933 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
    934 					mul->src[1].swizzle = 0x00;
    935 
    936 					for(int j = 1; j < dim; j++)
    937 					{
    938 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
    939 						mad->src[1].swizzle = j * 0x55;
    940 					}
    941 				}
    942 			}
    943 			break;
    944 		case EOpLogicalOr:
    945 			if(trivial(right, 6))
    946 			{
    947 				if(visit == PostVisit)
    948 				{
    949 					emit(sw::Shader::OPCODE_OR, result, left, right);
    950 				}
    951 			}
    952 			else   // Short-circuit evaluation
    953 			{
    954 				if(visit == InVisit)
    955 				{
    956 					emit(sw::Shader::OPCODE_MOV, result, left);
    957 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
    958 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
    959 				}
    960 				else if(visit == PostVisit)
    961 				{
    962 					emit(sw::Shader::OPCODE_MOV, result, right);
    963 					emit(sw::Shader::OPCODE_ENDIF);
    964 				}
    965 			}
    966 			break;
    967 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
    968 		case EOpLogicalAnd:
    969 			if(trivial(right, 6))
    970 			{
    971 				if(visit == PostVisit)
    972 				{
    973 					emit(sw::Shader::OPCODE_AND, result, left, right);
    974 				}
    975 			}
    976 			else   // Short-circuit evaluation
    977 			{
    978 				if(visit == InVisit)
    979 				{
    980 					emit(sw::Shader::OPCODE_MOV, result, left);
    981 					emit(sw::Shader::OPCODE_IF, 0, result);
    982 				}
    983 				else if(visit == PostVisit)
    984 				{
    985 					emit(sw::Shader::OPCODE_MOV, result, right);
    986 					emit(sw::Shader::OPCODE_ENDIF);
    987 				}
    988 			}
    989 			break;
    990 		default: UNREACHABLE(node->getOp());
    991 		}
    992 
    993 		return true;
    994 	}
    995 
    996 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
    997 	{
    998 		switch(size)
    999 		{
   1000 		case 1: // Used for cofactor computation only
   1001 			{
   1002 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
   1003 				bool isMov = (row == col);
   1004 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
   1005 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
   1006 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
   1007 				mov->dst.mask = 1 << outRow;
   1008 			}
   1009 			break;
   1010 		case 2:
   1011 			{
   1012 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
   1013 
   1014 				bool isCofactor = (col >= 0) && (row >= 0);
   1015 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
   1016 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
   1017 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
   1018 
   1019 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
   1020 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
   1021 				det->dst.mask = 1 << outRow;
   1022 			}
   1023 			break;
   1024 		case 3:
   1025 			{
   1026 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
   1027 
   1028 				bool isCofactor = (col >= 0) && (row >= 0);
   1029 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
   1030 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
   1031 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
   1032 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
   1033 
   1034 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
   1035 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
   1036 				det->dst.mask = 1 << outRow;
   1037 			}
   1038 			break;
   1039 		case 4:
   1040 			{
   1041 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
   1042 				det->dst.mask = 1 << outRow;
   1043 			}
   1044 			break;
   1045 		default:
   1046 			UNREACHABLE(size);
   1047 			break;
   1048 		}
   1049 	}
   1050 
   1051 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
   1052 	{
   1053 		if(currentScope != emitScope)
   1054 		{
   1055 			return false;
   1056 		}
   1057 
   1058 		TIntermTyped *result = node;
   1059 		TIntermTyped *arg = node->getOperand();
   1060 		TBasicType basicType = arg->getType().getBasicType();
   1061 
   1062 		union
   1063 		{
   1064 			float f;
   1065 			int i;
   1066 		} one_value;
   1067 
   1068 		if(basicType == EbtInt || basicType == EbtUInt)
   1069 		{
   1070 			one_value.i = 1;
   1071 		}
   1072 		else
   1073 		{
   1074 			one_value.f = 1.0f;
   1075 		}
   1076 
   1077 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
   1078 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
   1079 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
   1080 
   1081 		switch(node->getOp())
   1082 		{
   1083 		case EOpNegative:
   1084 			if(visit == PostVisit)
   1085 			{
   1086 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
   1087 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1088 				{
   1089 					emit(negOpcode, result, index, arg, index);
   1090 				}
   1091 			}
   1092 			break;
   1093 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
   1094 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
   1095 		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
   1096 		case EOpPostIncrement:
   1097 			if(visit == PostVisit)
   1098 			{
   1099 				copy(result, arg);
   1100 
   1101 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
   1102 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1103 				{
   1104 					emit(addOpcode, arg, index, arg, index, &one);
   1105 				}
   1106 
   1107 				assignLvalue(arg, arg);
   1108 			}
   1109 			break;
   1110 		case EOpPostDecrement:
   1111 			if(visit == PostVisit)
   1112 			{
   1113 				copy(result, arg);
   1114 
   1115 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
   1116 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1117 				{
   1118 					emit(subOpcode, arg, index, arg, index, &one);
   1119 				}
   1120 
   1121 				assignLvalue(arg, arg);
   1122 			}
   1123 			break;
   1124 		case EOpPreIncrement:
   1125 			if(visit == PostVisit)
   1126 			{
   1127 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
   1128 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1129 				{
   1130 					emit(addOpcode, result, index, arg, index, &one);
   1131 				}
   1132 
   1133 				assignLvalue(arg, result);
   1134 			}
   1135 			break;
   1136 		case EOpPreDecrement:
   1137 			if(visit == PostVisit)
   1138 			{
   1139 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
   1140 				for(int index = 0; index < arg->totalRegisterCount(); index++)
   1141 				{
   1142 					emit(subOpcode, result, index, arg, index, &one);
   1143 				}
   1144 
   1145 				assignLvalue(arg, result);
   1146 			}
   1147 			break;
   1148 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
   1149 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
   1150 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
   1151 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
   1152 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
   1153 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
   1154 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
   1155 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
   1156 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
   1157 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
   1158 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
   1159 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
   1160 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
   1161 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
   1162 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
   1163 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
   1164 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
   1165 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
   1166 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
   1167 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
   1168 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
   1169 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
   1170 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
   1171 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
   1172 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
   1173 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
   1174 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
   1175 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
   1176 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
   1177 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
   1178 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
   1179 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
   1180 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
   1181 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
   1182 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
   1183 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
   1184 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
   1185 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
   1186 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
   1187 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
   1188 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
   1189 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
   1190 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
   1191 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
   1192 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
   1193 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
   1194 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
   1195 		case EOpTranspose:
   1196 			if(visit == PostVisit)
   1197 			{
   1198 				int numCols = arg->getNominalSize();
   1199 				int numRows = arg->getSecondarySize();
   1200 				for(int i = 0; i < numCols; ++i)
   1201 				{
   1202 					for(int j = 0; j < numRows; ++j)
   1203 					{
   1204 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
   1205 						mov->src[0].swizzle = 0x55 * j;
   1206 						mov->dst.mask = 1 << i;
   1207 					}
   1208 				}
   1209 			}
   1210 			break;
   1211 		case EOpDeterminant:
   1212 			if(visit == PostVisit)
   1213 			{
   1214 				int size = arg->getNominalSize();
   1215 				ASSERT(size == arg->getSecondarySize());
   1216 
   1217 				emitDeterminant(result, arg, size);
   1218 			}
   1219 			break;
   1220 		case EOpInverse:
   1221 			if(visit == PostVisit)
   1222 			{
   1223 				int size = arg->getNominalSize();
   1224 				ASSERT(size == arg->getSecondarySize());
   1225 
   1226 				// Compute transposed matrix of cofactors
   1227 				for(int i = 0; i < size; ++i)
   1228 				{
   1229 					for(int j = 0; j < size; ++j)
   1230 					{
   1231 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
   1232 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
   1233 						emitDeterminant(result, arg, size - 1, j, i, i, j);
   1234 					}
   1235 				}
   1236 
   1237 				// Compute 1 / determinant
   1238 				Temporary invDet(this);
   1239 				emitDeterminant(&invDet, arg, size);
   1240 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
   1241 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
   1242 				div->src[1].swizzle = 0x00; // xxxx
   1243 
   1244 				// Divide transposed matrix of cofactors by determinant
   1245 				for(int i = 0; i < size; ++i)
   1246 				{
   1247 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
   1248 				}
   1249 			}
   1250 			break;
   1251 		default: UNREACHABLE(node->getOp());
   1252 		}
   1253 
   1254 		return true;
   1255 	}
   1256 
   1257 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
   1258 	{
   1259 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
   1260 		{
   1261 			return false;
   1262 		}
   1263 
   1264 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
   1265 
   1266 		TIntermTyped *result = node;
   1267 		const TType &resultType = node->getType();
   1268 		TIntermSequence &arg = node->getSequence();
   1269 		size_t argumentCount = arg.size();
   1270 
   1271 		switch(node->getOp())
   1272 		{
   1273 		case EOpSequence:             break;
   1274 		case EOpDeclaration:          break;
   1275 		case EOpInvariantDeclaration: break;
   1276 		case EOpPrototype:            break;
   1277 		case EOpComma:
   1278 			if(visit == PostVisit)
   1279 			{
   1280 				copy(result, arg[1]);
   1281 			}
   1282 			break;
   1283 		case EOpFunction:
   1284 			if(visit == PreVisit)
   1285 			{
   1286 				const TString &name = node->getName();
   1287 
   1288 				if(emitScope == FUNCTION)
   1289 				{
   1290 					if(functionArray.size() > 1)   // No need for a label when there's only main()
   1291 					{
   1292 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
   1293 						label->dst.type = sw::Shader::PARAMETER_LABEL;
   1294 
   1295 						const Function *function = findFunction(name);
   1296 						ASSERT(function);   // Should have been added during global pass
   1297 						label->dst.index = function->label;
   1298 						currentFunction = function->label;
   1299 					}
   1300 				}
   1301 				else if(emitScope == GLOBAL)
   1302 				{
   1303 					if(name != "main(")
   1304 					{
   1305 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
   1306 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
   1307 					}
   1308 				}
   1309 				else UNREACHABLE(emitScope);
   1310 
   1311 				currentScope = FUNCTION;
   1312 			}
   1313 			else if(visit == PostVisit)
   1314 			{
   1315 				if(emitScope == FUNCTION)
   1316 				{
   1317 					if(functionArray.size() > 1)   // No need to return when there's only main()
   1318 					{
   1319 						emit(sw::Shader::OPCODE_RET);
   1320 					}
   1321 				}
   1322 
   1323 				currentScope = GLOBAL;
   1324 			}
   1325 			break;
   1326 		case EOpFunctionCall:
   1327 			if(visit == PostVisit)
   1328 			{
   1329 				if(node->isUserDefined())
   1330 				{
   1331 					const TString &name = node->getName();
   1332 					const Function *function = findFunction(name);
   1333 
   1334 					if(!function)
   1335 					{
   1336 						mContext.error(node->getLine(), "function definition not found", name.c_str());
   1337 						return false;
   1338 					}
   1339 
   1340 					TIntermSequence &arguments = *function->arg;
   1341 
   1342 					for(size_t i = 0; i < argumentCount; i++)
   1343 					{
   1344 						TIntermTyped *in = arguments[i]->getAsTyped();
   1345 
   1346 						if(in->getQualifier() == EvqIn ||
   1347 						   in->getQualifier() == EvqInOut ||
   1348 						   in->getQualifier() == EvqConstReadOnly)
   1349 						{
   1350 							copy(in, arg[i]);
   1351 						}
   1352 					}
   1353 
   1354 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
   1355 					call->dst.type = sw::Shader::PARAMETER_LABEL;
   1356 					call->dst.index = function->label;
   1357 
   1358 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
   1359 					{
   1360 						copy(result, function->ret);
   1361 					}
   1362 
   1363 					for(size_t i = 0; i < argumentCount; i++)
   1364 					{
   1365 						TIntermTyped *argument = arguments[i]->getAsTyped();
   1366 						TIntermTyped *out = arg[i]->getAsTyped();
   1367 
   1368 						if(argument->getQualifier() == EvqOut ||
   1369 						   argument->getQualifier() == EvqInOut)
   1370 						{
   1371 							assignLvalue(out, argument);
   1372 						}
   1373 					}
   1374 				}
   1375 				else
   1376 				{
   1377 					const TextureFunction textureFunction(node->getName());
   1378 					TIntermTyped *s = arg[0]->getAsTyped();
   1379 					TIntermTyped *t = arg[1]->getAsTyped();
   1380 
   1381 					Temporary coord(this);
   1382 
   1383 					if(textureFunction.proj)
   1384 					{
   1385 						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
   1386 						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
   1387 						rcp->dst.mask = 0x7;
   1388 
   1389 						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
   1390 						mul->dst.mask = 0x7;
   1391 
   1392 						if(IsShadowSampler(s->getBasicType()))
   1393 						{
   1394 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
   1395 							Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord);
   1396 							mov->src[0].swizzle = 0xA4;
   1397 						}
   1398 					}
   1399 					else
   1400 					{
   1401 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
   1402 
   1403 						if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3)
   1404 						{
   1405 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
   1406 							mov->src[0].swizzle = 0xA4;
   1407 						}
   1408 					}
   1409 
   1410 					switch(textureFunction.method)
   1411 					{
   1412 					case TextureFunction::IMPLICIT:
   1413 						if(!textureFunction.offset)
   1414 						{
   1415 							if(argumentCount == 2)
   1416 							{
   1417 								emit(sw::Shader::OPCODE_TEX, result, &coord, s);
   1418 							}
   1419 							else if(argumentCount == 3)   // Bias
   1420 							{
   1421 								emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]);
   1422 							}
   1423 							else UNREACHABLE(argumentCount);
   1424 						}
   1425 						else   // Offset
   1426 						{
   1427 							if(argumentCount == 3)
   1428 							{
   1429 								emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]);
   1430 							}
   1431 							else if(argumentCount == 4)   // Bias
   1432 							{
   1433 								emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]);
   1434 							}
   1435 							else UNREACHABLE(argumentCount);
   1436 						}
   1437 						break;
   1438 					case TextureFunction::LOD:
   1439 						if(!textureFunction.offset && argumentCount == 3)
   1440 						{
   1441 							emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]);
   1442 						}
   1443 						else if(argumentCount == 4)   // Offset
   1444 						{
   1445 							emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]);
   1446 						}
   1447 						else UNREACHABLE(argumentCount);
   1448 						break;
   1449 					case TextureFunction::FETCH:
   1450 						if(!textureFunction.offset && argumentCount == 3)
   1451 						{
   1452 							emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]);
   1453 						}
   1454 						else if(argumentCount == 4)   // Offset
   1455 						{
   1456 							emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]);
   1457 						}
   1458 						else UNREACHABLE(argumentCount);
   1459 						break;
   1460 					case TextureFunction::GRAD:
   1461 						if(!textureFunction.offset && argumentCount == 4)
   1462 						{
   1463 							emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]);
   1464 						}
   1465 						else if(argumentCount == 5)   // Offset
   1466 						{
   1467 							emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]);
   1468 						}
   1469 						else UNREACHABLE(argumentCount);
   1470 						break;
   1471 					case TextureFunction::SIZE:
   1472 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s);
   1473 						break;
   1474 					default:
   1475 						UNREACHABLE(textureFunction.method);
   1476 					}
   1477 				}
   1478 			}
   1479 			break;
   1480 		case EOpParameters:
   1481 			break;
   1482 		case EOpConstructFloat:
   1483 		case EOpConstructVec2:
   1484 		case EOpConstructVec3:
   1485 		case EOpConstructVec4:
   1486 		case EOpConstructBool:
   1487 		case EOpConstructBVec2:
   1488 		case EOpConstructBVec3:
   1489 		case EOpConstructBVec4:
   1490 		case EOpConstructInt:
   1491 		case EOpConstructIVec2:
   1492 		case EOpConstructIVec3:
   1493 		case EOpConstructIVec4:
   1494 		case EOpConstructUInt:
   1495 		case EOpConstructUVec2:
   1496 		case EOpConstructUVec3:
   1497 		case EOpConstructUVec4:
   1498 			if(visit == PostVisit)
   1499 			{
   1500 				int component = 0;
   1501 				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
   1502 				int arrayComponents = result->getType().getElementSize();
   1503 				for(size_t i = 0; i < argumentCount; i++)
   1504 				{
   1505 					TIntermTyped *argi = arg[i]->getAsTyped();
   1506 					int size = argi->getNominalSize();
   1507 					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
   1508 					int swizzle = component - (arrayIndex * arrayComponents);
   1509 
   1510 					if(!argi->isMatrix())
   1511 					{
   1512 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
   1513 						mov->dst.mask = (0xF << swizzle) & 0xF;
   1514 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
   1515 
   1516 						component += size;
   1517 					}
   1518 					else if(!result->isMatrix()) // Construct a non matrix from a matrix
   1519 					{
   1520 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
   1521 						mov->dst.mask = (0xF << swizzle) & 0xF;
   1522 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
   1523 
   1524 						// At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3
   1525 						if(result->getNominalSize() > size)
   1526 						{
   1527 							Instruction *mov = emitCast(result, arrayIndex, argi, 1);
   1528 							mov->dst.mask = (0xF << (swizzle + size)) & 0xF;
   1529 							// mat2: xxxy (0x40), mat3: xxxx (0x00)
   1530 							mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2);
   1531 						}
   1532 
   1533 						component += size;
   1534 					}
   1535 					else   // Matrix
   1536 					{
   1537 						int column = 0;
   1538 
   1539 						while(component < resultType.getNominalSize())
   1540 						{
   1541 							Instruction *mov = emitCast(result, arrayIndex, argi, column);
   1542 							mov->dst.mask = (0xF << swizzle) & 0xF;
   1543 							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
   1544 
   1545 							column++;
   1546 							component += size;
   1547 						}
   1548 					}
   1549 				}
   1550 			}
   1551 			break;
   1552 		case EOpConstructMat2:
   1553 		case EOpConstructMat2x3:
   1554 		case EOpConstructMat2x4:
   1555 		case EOpConstructMat3x2:
   1556 		case EOpConstructMat3:
   1557 		case EOpConstructMat3x4:
   1558 		case EOpConstructMat4x2:
   1559 		case EOpConstructMat4x3:
   1560 		case EOpConstructMat4:
   1561 			if(visit == PostVisit)
   1562 			{
   1563 				TIntermTyped *arg0 = arg[0]->getAsTyped();
   1564 				const int outCols = result->getNominalSize();
   1565 				const int outRows = result->getSecondarySize();
   1566 
   1567 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
   1568 				{
   1569 					for(int i = 0; i < outCols; i++)
   1570 					{
   1571 						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
   1572 						if (i < outRows)
   1573 						{
   1574 							// Insert the scalar value on the main diagonal.
   1575 							// For non-square matrices, Avoid emitting in
   1576 							// a column which doesn't /have/ a main diagonal
   1577 							// element, even though it would be fairly benign --
   1578 							// it's not necessarily trivial for downstream
   1579 							// passes to see that this is redundant and strip it
   1580 							// out.
   1581 							Instruction *mov = emitCast(result, i, arg0, 0);
   1582 							mov->dst.mask = 1 << i;
   1583 							ASSERT(mov->src[0].swizzle == 0x00);
   1584 						}
   1585 					}
   1586 				}
   1587 				else if(arg0->isMatrix())
   1588 				{
   1589 					int arraySize = result->isArray() ? result->getArraySize() : 1;
   1590 
   1591 					for(int n = 0; n < arraySize; n++)
   1592 					{
   1593 						TIntermTyped *argi = arg[n]->getAsTyped();
   1594 						const int inCols = argi->getNominalSize();
   1595 						const int inRows = argi->getSecondarySize();
   1596 
   1597 						for(int i = 0; i < outCols; i++)
   1598 						{
   1599 							if(i >= inCols || outRows > inRows)
   1600 							{
   1601 								// Initialize to identity matrix
   1602 								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
   1603 								emitCast(result, i + n * outCols, &col, 0);
   1604 							}
   1605 
   1606 							if(i < inCols)
   1607 							{
   1608 								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
   1609 								mov->dst.mask = 0xF >> (4 - inRows);
   1610 							}
   1611 						}
   1612 					}
   1613 				}
   1614 				else
   1615 				{
   1616 					int column = 0;
   1617 					int row = 0;
   1618 
   1619 					for(size_t i = 0; i < argumentCount; i++)
   1620 					{
   1621 						TIntermTyped *argi = arg[i]->getAsTyped();
   1622 						int size = argi->getNominalSize();
   1623 						int element = 0;
   1624 
   1625 						while(element < size)
   1626 						{
   1627 							Instruction *mov = emitCast(result, column, argi, 0);
   1628 							mov->dst.mask = (0xF << row) & 0xF;
   1629 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
   1630 
   1631 							int end = row + size - element;
   1632 							column = end >= outRows ? column + 1 : column;
   1633 							element = element + outRows - row;
   1634 							row = end >= outRows ? 0 : end;
   1635 						}
   1636 					}
   1637 				}
   1638 			}
   1639 			break;
   1640 		case EOpConstructStruct:
   1641 			if(visit == PostVisit)
   1642 			{
   1643 				int offset = 0;
   1644 				for(size_t i = 0; i < argumentCount; i++)
   1645 				{
   1646 					TIntermTyped *argi = arg[i]->getAsTyped();
   1647 					int size = argi->totalRegisterCount();
   1648 
   1649 					for(int index = 0; index < size; index++)
   1650 					{
   1651 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
   1652 						mov->dst.mask = writeMask(result, offset + index);
   1653 					}
   1654 
   1655 					offset += size;
   1656 				}
   1657 			}
   1658 			break;
   1659 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
   1660 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
   1661 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
   1662 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
   1663 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
   1664 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
   1665 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
   1666 		case EOpModf:
   1667 			if(visit == PostVisit)
   1668 			{
   1669 				TIntermTyped* arg1 = arg[1]->getAsTyped();
   1670 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
   1671 				assignLvalue(arg1, arg1);
   1672 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
   1673 			}
   1674 			break;
   1675 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
   1676 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
   1677 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
   1678 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
   1679 		case EOpClamp:
   1680 			if(visit == PostVisit)
   1681 			{
   1682 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
   1683 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
   1684 			}
   1685 			break;
   1686 		case EOpMix:
   1687 			if(visit == PostVisit)
   1688 			{
   1689 				if(arg[2]->getAsTyped()->getBasicType() == EbtBool)
   1690 				{
   1691 					emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]);
   1692 				}
   1693 				else
   1694 				{
   1695 					emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]);
   1696 				}
   1697 			}
   1698 			break;
   1699 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
   1700 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
   1701 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
   1702 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
   1703 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
   1704 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
   1705 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
   1706 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
   1707 		case EOpMul:
   1708 			if(visit == PostVisit)
   1709 			{
   1710 				TIntermTyped *arg0 = arg[0]->getAsTyped();
   1711 				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
   1712 				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
   1713 
   1714 				int size = arg0->getNominalSize();
   1715 				for(int i = 0; i < size; i++)
   1716 				{
   1717 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
   1718 				}
   1719 			}
   1720 			break;
   1721 		case EOpOuterProduct:
   1722 			if(visit == PostVisit)
   1723 			{
   1724 				for(int i = 0; i < dim(arg[1]); i++)
   1725 				{
   1726 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
   1727 					mul->src[1].swizzle = 0x55 * i;
   1728 				}
   1729 			}
   1730 			break;
   1731 		default: UNREACHABLE(node->getOp());
   1732 		}
   1733 
   1734 		return true;
   1735 	}
   1736 
   1737 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
   1738 	{
   1739 		if(currentScope != emitScope)
   1740 		{
   1741 			return false;
   1742 		}
   1743 
   1744 		TIntermTyped *condition = node->getCondition();
   1745 		TIntermNode *trueBlock = node->getTrueBlock();
   1746 		TIntermNode *falseBlock = node->getFalseBlock();
   1747 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
   1748 
   1749 		condition->traverse(this);
   1750 
   1751 		if(node->usesTernaryOperator())
   1752 		{
   1753 			if(constantCondition)
   1754 			{
   1755 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   1756 
   1757 				if(trueCondition)
   1758 				{
   1759 					trueBlock->traverse(this);
   1760 					copy(node, trueBlock);
   1761 				}
   1762 				else
   1763 				{
   1764 					falseBlock->traverse(this);
   1765 					copy(node, falseBlock);
   1766 				}
   1767 			}
   1768 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
   1769 			{
   1770 				trueBlock->traverse(this);
   1771 				falseBlock->traverse(this);
   1772 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
   1773 			}
   1774 			else
   1775 			{
   1776 				emit(sw::Shader::OPCODE_IF, 0, condition);
   1777 
   1778 				if(trueBlock)
   1779 				{
   1780 					trueBlock->traverse(this);
   1781 					copy(node, trueBlock);
   1782 				}
   1783 
   1784 				if(falseBlock)
   1785 				{
   1786 					emit(sw::Shader::OPCODE_ELSE);
   1787 					falseBlock->traverse(this);
   1788 					copy(node, falseBlock);
   1789 				}
   1790 
   1791 				emit(sw::Shader::OPCODE_ENDIF);
   1792 			}
   1793 		}
   1794 		else  // if/else statement
   1795 		{
   1796 			if(constantCondition)
   1797 			{
   1798 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   1799 
   1800 				if(trueCondition)
   1801 				{
   1802 					if(trueBlock)
   1803 					{
   1804 						trueBlock->traverse(this);
   1805 					}
   1806 				}
   1807 				else
   1808 				{
   1809 					if(falseBlock)
   1810 					{
   1811 						falseBlock->traverse(this);
   1812 					}
   1813 				}
   1814 			}
   1815 			else
   1816 			{
   1817 				emit(sw::Shader::OPCODE_IF, 0, condition);
   1818 
   1819 				if(trueBlock)
   1820 				{
   1821 					trueBlock->traverse(this);
   1822 				}
   1823 
   1824 				if(falseBlock)
   1825 				{
   1826 					emit(sw::Shader::OPCODE_ELSE);
   1827 					falseBlock->traverse(this);
   1828 				}
   1829 
   1830 				emit(sw::Shader::OPCODE_ENDIF);
   1831 			}
   1832 		}
   1833 
   1834 		return false;
   1835 	}
   1836 
   1837 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
   1838 	{
   1839 		if(currentScope != emitScope)
   1840 		{
   1841 			return false;
   1842 		}
   1843 
   1844 		LoopInfo loop(node);
   1845 
   1846 		if(loop.iterations == 0)
   1847 		{
   1848 			return false;
   1849 		}
   1850 
   1851 		bool unroll = (loop.iterations <= 4);
   1852 
   1853 		TIntermNode *init = node->getInit();
   1854 		TIntermTyped *condition = node->getCondition();
   1855 		TIntermTyped *expression = node->getExpression();
   1856 		TIntermNode *body = node->getBody();
   1857 		Constant True(true);
   1858 
   1859 		if(loop.isDeterministic())
   1860 		{
   1861 			 deterministicVariables.insert(loop.index->getId());
   1862 
   1863 			 if(!unroll)
   1864 			 {
   1865 				 emit(sw::Shader::OPCODE_SCALAR);   // Unrolled loops don't have an ENDWHILE to disable scalar mode.
   1866 			 }
   1867 		}
   1868 
   1869 		if(node->getType() == ELoopDoWhile)
   1870 		{
   1871 			Temporary iterate(this);
   1872 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
   1873 
   1874 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
   1875 
   1876 			if(body)
   1877 			{
   1878 				body->traverse(this);
   1879 			}
   1880 
   1881 			emit(sw::Shader::OPCODE_TEST);
   1882 
   1883 			condition->traverse(this);
   1884 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
   1885 
   1886 			emit(sw::Shader::OPCODE_ENDWHILE);
   1887 		}
   1888 		else
   1889 		{
   1890 			if(init)
   1891 			{
   1892 				init->traverse(this);
   1893 			}
   1894 
   1895 			if(unroll)
   1896 			{
   1897 				mContext.info(node->getLine(), "loop unrolled", "for");
   1898 
   1899 				for(unsigned int i = 0; i < loop.iterations; i++)
   1900 				{
   1901 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
   1902 
   1903 					if(body)
   1904 					{
   1905 						body->traverse(this);
   1906 					}
   1907 
   1908 					if(expression)
   1909 					{
   1910 						expression->traverse(this);
   1911 					}
   1912 				}
   1913 			}
   1914 			else
   1915 			{
   1916 				if(condition)
   1917 				{
   1918 					condition->traverse(this);
   1919 				}
   1920 				else
   1921 				{
   1922 					condition = &True;
   1923 				}
   1924 
   1925 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
   1926 
   1927 				if(body)
   1928 				{
   1929 					body->traverse(this);
   1930 				}
   1931 
   1932 				emit(sw::Shader::OPCODE_TEST);
   1933 
   1934 				if(loop.isDeterministic())
   1935 				{
   1936 					emit(sw::Shader::OPCODE_SCALAR);
   1937 				}
   1938 
   1939 				if(expression)
   1940 				{
   1941 					expression->traverse(this);
   1942 				}
   1943 
   1944 				if(condition)
   1945 				{
   1946 					condition->traverse(this);
   1947 				}
   1948 
   1949 				emit(sw::Shader::OPCODE_ENDWHILE);
   1950 			}
   1951 		}
   1952 
   1953 		if(loop.isDeterministic())
   1954 		{
   1955 			 deterministicVariables.erase(loop.index->getId());
   1956 		}
   1957 
   1958 		return false;
   1959 	}
   1960 
   1961 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
   1962 	{
   1963 		if(currentScope != emitScope)
   1964 		{
   1965 			return false;
   1966 		}
   1967 
   1968 		switch(node->getFlowOp())
   1969 		{
   1970 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
   1971 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
   1972 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
   1973 		case EOpReturn:
   1974 			if(visit == PostVisit)
   1975 			{
   1976 				TIntermTyped *value = node->getExpression();
   1977 
   1978 				if(value)
   1979 				{
   1980 					copy(functionArray[currentFunction].ret, value);
   1981 				}
   1982 
   1983 				emit(sw::Shader::OPCODE_LEAVE);
   1984 			}
   1985 			break;
   1986 		default: UNREACHABLE(node->getFlowOp());
   1987 		}
   1988 
   1989 		return true;
   1990 	}
   1991 
   1992 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
   1993 	{
   1994 		if(currentScope != emitScope)
   1995 		{
   1996 			return false;
   1997 		}
   1998 
   1999 		TIntermTyped* switchValue = node->getInit();
   2000 		TIntermAggregate* opList = node->getStatementList();
   2001 
   2002 		if(!switchValue || !opList)
   2003 		{
   2004 			return false;
   2005 		}
   2006 
   2007 		switchValue->traverse(this);
   2008 
   2009 		emit(sw::Shader::OPCODE_SWITCH);
   2010 
   2011 		TIntermSequence& sequence = opList->getSequence();
   2012 		TIntermSequence::iterator it = sequence.begin();
   2013 		TIntermSequence::iterator defaultIt = sequence.end();
   2014 		int nbCases = 0;
   2015 		for(; it != sequence.end(); ++it)
   2016 		{
   2017 			TIntermCase* currentCase = (*it)->getAsCaseNode();
   2018 			if(currentCase)
   2019 			{
   2020 				TIntermSequence::iterator caseIt = it;
   2021 
   2022 				TIntermTyped* condition = currentCase->getCondition();
   2023 				if(condition) // non default case
   2024 				{
   2025 					if(nbCases != 0)
   2026 					{
   2027 						emit(sw::Shader::OPCODE_ELSE);
   2028 					}
   2029 
   2030 					condition->traverse(this);
   2031 					Temporary result(this);
   2032 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
   2033 					emit(sw::Shader::OPCODE_IF, 0, &result);
   2034 					nbCases++;
   2035 
   2036 					// Emit the code for this case and all subsequent cases until we hit a break statement.
   2037 					// TODO: This can repeat a lot of code for switches with many fall-through cases.
   2038 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
   2039 					{
   2040 						(*caseIt)->traverse(this);
   2041 
   2042 						// Stop if we encounter an unconditional branch (break, continue, return, or kill).
   2043 						// TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}).
   2044 						// Note that this eliminates useless operations but shouldn't affect correctness.
   2045 						if((*caseIt)->getAsBranchNode())
   2046 						{
   2047 							break;
   2048 						}
   2049 					}
   2050 				}
   2051 				else
   2052 				{
   2053 					defaultIt = it; // The default case might not be the last case, keep it for last
   2054 				}
   2055 			}
   2056 		}
   2057 
   2058 		// If there's a default case, traverse it here
   2059 		if(defaultIt != sequence.end())
   2060 		{
   2061 			emit(sw::Shader::OPCODE_ELSE);
   2062 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
   2063 			{
   2064 				(*defaultIt)->traverse(this);
   2065 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
   2066 				{
   2067 					break;
   2068 				}
   2069 			}
   2070 		}
   2071 
   2072 		for(int i = 0; i < nbCases; ++i)
   2073 		{
   2074 			emit(sw::Shader::OPCODE_ENDIF);
   2075 		}
   2076 
   2077 		emit(sw::Shader::OPCODE_ENDSWITCH);
   2078 
   2079 		return false;
   2080 	}
   2081 
   2082 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
   2083 	{
   2084 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
   2085 	}
   2086 
   2087 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
   2088 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
   2089 	{
   2090 		Instruction *instruction = new Instruction(op);
   2091 
   2092 		if(dst)
   2093 		{
   2094 			destination(instruction->dst, dst, dstIndex);
   2095 		}
   2096 
   2097 		if(src0)
   2098 		{
   2099 			TIntermTyped* src = src0->getAsTyped();
   2100 			instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow);
   2101 		}
   2102 
   2103 		source(instruction->src[0], src0, index0);
   2104 		source(instruction->src[1], src1, index1);
   2105 		source(instruction->src[2], src2, index2);
   2106 		source(instruction->src[3], src3, index3);
   2107 		source(instruction->src[4], src4, index4);
   2108 
   2109 		shader->append(instruction);
   2110 
   2111 		return instruction;
   2112 	}
   2113 
   2114 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
   2115 	{
   2116 		return emitCast(dst, 0, src, 0);
   2117 	}
   2118 
   2119 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
   2120 	{
   2121 		switch(src->getBasicType())
   2122 		{
   2123 		case EbtBool:
   2124 			switch(dst->getBasicType())
   2125 			{
   2126 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
   2127 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
   2128 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
   2129 			default:       break;
   2130 			}
   2131 			break;
   2132 		case EbtInt:
   2133 			switch(dst->getBasicType())
   2134 			{
   2135 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
   2136 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
   2137 			default:       break;
   2138 			}
   2139 			break;
   2140 		case EbtUInt:
   2141 			switch(dst->getBasicType())
   2142 			{
   2143 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
   2144 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
   2145 			default:       break;
   2146 			}
   2147 			break;
   2148 		case EbtFloat:
   2149 			switch(dst->getBasicType())
   2150 			{
   2151 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
   2152 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
   2153 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
   2154 			default:      break;
   2155 			}
   2156 			break;
   2157 		default:
   2158 			break;
   2159 		}
   2160 
   2161 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
   2162 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
   2163 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
   2164 
   2165 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
   2166 	}
   2167 
   2168 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
   2169 	{
   2170 		for(int index = 0; index < dst->elementRegisterCount(); index++)
   2171 		{
   2172 			emit(op, dst, index, src0, index, src1, index, src2, index);
   2173 		}
   2174 	}
   2175 
   2176 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
   2177 	{
   2178 		emitBinary(op, result, src0, src1);
   2179 		assignLvalue(lhs, result);
   2180 	}
   2181 
   2182 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
   2183 	{
   2184 		sw::Shader::Opcode opcode;
   2185 		switch(left->getAsTyped()->getBasicType())
   2186 		{
   2187 		case EbtBool:
   2188 		case EbtInt:
   2189 			opcode = sw::Shader::OPCODE_ICMP;
   2190 			break;
   2191 		case EbtUInt:
   2192 			opcode = sw::Shader::OPCODE_UCMP;
   2193 			break;
   2194 		default:
   2195 			opcode = sw::Shader::OPCODE_CMP;
   2196 			break;
   2197 		}
   2198 
   2199 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
   2200 		cmp->control = cmpOp;
   2201 	}
   2202 
   2203 	int componentCount(const TType &type, int registers)
   2204 	{
   2205 		if(registers == 0)
   2206 		{
   2207 			return 0;
   2208 		}
   2209 
   2210 		if(type.isArray() && registers >= type.elementRegisterCount())
   2211 		{
   2212 			int index = registers / type.elementRegisterCount();
   2213 			registers -= index * type.elementRegisterCount();
   2214 			return index * type.getElementSize() + componentCount(type, registers);
   2215 		}
   2216 
   2217 		if(type.isStruct() || type.isInterfaceBlock())
   2218 		{
   2219 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
   2220 			int elements = 0;
   2221 
   2222 			for(const auto &field : fields)
   2223 			{
   2224 				const TType &fieldType = *(field->type());
   2225 
   2226 				if(fieldType.totalRegisterCount() <= registers)
   2227 				{
   2228 					registers -= fieldType.totalRegisterCount();
   2229 					elements += fieldType.getObjectSize();
   2230 				}
   2231 				else   // Register within this field
   2232 				{
   2233 					return elements + componentCount(fieldType, registers);
   2234 				}
   2235 			}
   2236 		}
   2237 		else if(type.isMatrix())
   2238 		{
   2239 			return registers * type.registerSize();
   2240 		}
   2241 
   2242 		UNREACHABLE(0);
   2243 		return 0;
   2244 	}
   2245 
   2246 	int registerSize(const TType &type, int registers)
   2247 	{
   2248 		if(registers == 0)
   2249 		{
   2250 			if(type.isStruct())
   2251 			{
   2252 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
   2253 			}
   2254 			else if(type.isInterfaceBlock())
   2255 			{
   2256 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
   2257 			}
   2258 
   2259 			return type.registerSize();
   2260 		}
   2261 
   2262 		if(type.isArray() && registers >= type.elementRegisterCount())
   2263 		{
   2264 			int index = registers / type.elementRegisterCount();
   2265 			registers -= index * type.elementRegisterCount();
   2266 			return registerSize(type, registers);
   2267 		}
   2268 
   2269 		if(type.isStruct() || type.isInterfaceBlock())
   2270 		{
   2271 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
   2272 			int elements = 0;
   2273 
   2274 			for(const auto &field : fields)
   2275 			{
   2276 				const TType &fieldType = *(field->type());
   2277 
   2278 				if(fieldType.totalRegisterCount() <= registers)
   2279 				{
   2280 					registers -= fieldType.totalRegisterCount();
   2281 					elements += fieldType.getObjectSize();
   2282 				}
   2283 				else   // Register within this field
   2284 				{
   2285 					return registerSize(fieldType, registers);
   2286 				}
   2287 			}
   2288 		}
   2289 		else if(type.isMatrix())
   2290 		{
   2291 			return registerSize(type, 0);
   2292 		}
   2293 
   2294 		UNREACHABLE(0);
   2295 		return 0;
   2296 	}
   2297 
   2298 	int OutputASM::getBlockId(TIntermTyped *arg)
   2299 	{
   2300 		if(arg)
   2301 		{
   2302 			const TType &type = arg->getType();
   2303 			TInterfaceBlock* block = type.getInterfaceBlock();
   2304 			if(block && (type.getQualifier() == EvqUniform))
   2305 			{
   2306 				// Make sure the uniform block is declared
   2307 				uniformRegister(arg);
   2308 
   2309 				const char* blockName = block->name().c_str();
   2310 
   2311 				// Fetch uniform block index from array of blocks
   2312 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
   2313 				{
   2314 					if(blockName == it->name)
   2315 					{
   2316 						return it->blockId;
   2317 					}
   2318 				}
   2319 
   2320 				ASSERT(false);
   2321 			}
   2322 		}
   2323 
   2324 		return -1;
   2325 	}
   2326 
   2327 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
   2328 	{
   2329 		const TType &type = arg->getType();
   2330 		int blockId = getBlockId(arg);
   2331 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
   2332 		if(blockId != -1)
   2333 		{
   2334 			argumentInfo.bufferIndex = 0;
   2335 			for(int i = 0; i < blockId; ++i)
   2336 			{
   2337 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
   2338 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
   2339 			}
   2340 
   2341 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
   2342 
   2343 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
   2344 			BlockDefinitionIndexMap::const_iterator it = itEnd;
   2345 
   2346 			argumentInfo.clampedIndex = index;
   2347 			if(type.isInterfaceBlock())
   2348 			{
   2349 				// Offset index to the beginning of the selected instance
   2350 				int blockRegisters = type.elementRegisterCount();
   2351 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
   2352 				argumentInfo.bufferIndex += bufferOffset;
   2353 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
   2354 			}
   2355 
   2356 			int regIndex = registerIndex(arg);
   2357 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
   2358 			{
   2359 				it = blockDefinition.find(i);
   2360 				if(it != itEnd)
   2361 				{
   2362 					argumentInfo.clampedIndex -= (i - regIndex);
   2363 					break;
   2364 				}
   2365 			}
   2366 			ASSERT(it != itEnd);
   2367 
   2368 			argumentInfo.typedMemberInfo = it->second;
   2369 
   2370 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
   2371 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
   2372 		}
   2373 		else
   2374 		{
   2375 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
   2376 		}
   2377 
   2378 		return argumentInfo;
   2379 	}
   2380 
   2381 	void OutputASM::source(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
   2382 	{
   2383 		if(argument)
   2384 		{
   2385 			TIntermTyped *arg = argument->getAsTyped();
   2386 			Temporary unpackedUniform(this);
   2387 
   2388 			const TType& srcType = arg->getType();
   2389 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
   2390 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
   2391 			{
   2392 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
   2393 				const TType &memberType = argumentInfo.typedMemberInfo.type;
   2394 
   2395 				if(memberType.getBasicType() == EbtBool)
   2396 				{
   2397 					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
   2398 
   2399 					// Convert the packed bool, which is currently an int, to a true bool
   2400 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
   2401 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
   2402 					instruction->dst.index = registerIndex(&unpackedUniform);
   2403 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
   2404 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
   2405 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
   2406 
   2407 					shader->append(instruction);
   2408 
   2409 					arg = &unpackedUniform;
   2410 					index = 0;
   2411 				}
   2412 				else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix())
   2413 				{
   2414 					int numCols = memberType.getNominalSize();
   2415 					int numRows = memberType.getSecondarySize();
   2416 
   2417 					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
   2418 
   2419 					unsigned int dstIndex = registerIndex(&unpackedUniform);
   2420 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
   2421 					int arrayIndex = argumentInfo.clampedIndex / numCols;
   2422 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
   2423 
   2424 					for(int j = 0; j < numRows; ++j)
   2425 					{
   2426 						// Transpose the row major matrix
   2427 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
   2428 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
   2429 						instruction->dst.index = dstIndex;
   2430 						instruction->dst.mask = 1 << j;
   2431 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
   2432 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
   2433 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
   2434 						instruction->src[0].swizzle = srcSwizzle;
   2435 
   2436 						shader->append(instruction);
   2437 					}
   2438 
   2439 					arg = &unpackedUniform;
   2440 					index = 0;
   2441 				}
   2442 			}
   2443 
   2444 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
   2445 			const TType &type = argumentInfo.typedMemberInfo.type;
   2446 
   2447 			int size = registerSize(type, argumentInfo.clampedIndex);
   2448 
   2449 			parameter.type = registerType(arg);
   2450 			parameter.bufferIndex = argumentInfo.bufferIndex;
   2451 
   2452 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
   2453 			{
   2454 				int component = componentCount(type, argumentInfo.clampedIndex);
   2455 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
   2456 
   2457 				for(int i = 0; i < 4; i++)
   2458 				{
   2459 					if(size == 1)   // Replicate
   2460 					{
   2461 						parameter.value[i] = constants[component + 0].getAsFloat();
   2462 					}
   2463 					else if(i < size)
   2464 					{
   2465 						parameter.value[i] = constants[component + i].getAsFloat();
   2466 					}
   2467 					else
   2468 					{
   2469 						parameter.value[i] = 0.0f;
   2470 					}
   2471 				}
   2472 			}
   2473 			else
   2474 			{
   2475 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
   2476 
   2477 				if(parameter.bufferIndex != -1)
   2478 				{
   2479 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
   2480 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
   2481 				}
   2482 			}
   2483 
   2484 			if(!IsSampler(arg->getBasicType()))
   2485 			{
   2486 				parameter.swizzle = readSwizzle(arg, size);
   2487 			}
   2488 		}
   2489 	}
   2490 
   2491 	void OutputASM::destination(sw::Shader::DestinationParameter &parameter, TIntermTyped *arg, int index)
   2492 	{
   2493 		parameter.type = registerType(arg);
   2494 		parameter.index = registerIndex(arg) + index;
   2495 		parameter.mask = writeMask(arg, index);
   2496 	}
   2497 
   2498 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
   2499 	{
   2500 		for(int index = 0; index < dst->totalRegisterCount(); index++)
   2501 		{
   2502 			Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
   2503 		}
   2504 	}
   2505 
   2506 	int swizzleElement(int swizzle, int index)
   2507 	{
   2508 		return (swizzle >> (index * 2)) & 0x03;
   2509 	}
   2510 
   2511 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
   2512 	{
   2513 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
   2514 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
   2515 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
   2516 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
   2517 	}
   2518 
   2519 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
   2520 	{
   2521 		if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
   2522 		   (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize()))))
   2523 		{
   2524 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
   2525 		}
   2526 
   2527 		TIntermBinary *binary = dst->getAsBinaryNode();
   2528 
   2529 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
   2530 		{
   2531 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
   2532 
   2533 			lvalue(insert->dst, dst);
   2534 
   2535 			insert->src[0].type = insert->dst.type;
   2536 			insert->src[0].index = insert->dst.index;
   2537 			insert->src[0].rel = insert->dst.rel;
   2538 			source(insert->src[1], src);
   2539 			source(insert->src[2], binary->getRight());
   2540 
   2541 			shader->append(insert);
   2542 		}
   2543 		else
   2544 		{
   2545 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
   2546 
   2547 			int swizzle = lvalue(mov1->dst, dst);
   2548 
   2549 			source(mov1->src[0], src);
   2550 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
   2551 
   2552 			shader->append(mov1);
   2553 
   2554 			for(int offset = 1; offset < dst->totalRegisterCount(); offset++)
   2555 			{
   2556 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
   2557 
   2558 				mov->dst = mov1->dst;
   2559 				mov->dst.index += offset;
   2560 				mov->dst.mask = writeMask(dst, offset);
   2561 
   2562 				source(mov->src[0], src, offset);
   2563 
   2564 				shader->append(mov);
   2565 			}
   2566 		}
   2567 	}
   2568 
   2569 	void OutputASM::evaluateRvalue(TIntermTyped *node)
   2570 	{
   2571 		TIntermBinary *binary = node->getAsBinaryNode();
   2572 
   2573 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar())
   2574 		{
   2575 			Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT);
   2576 
   2577 			destination(insert->dst, node);
   2578 
   2579 			Temporary address(this);
   2580 			unsigned char mask;
   2581 			TIntermTyped *root = nullptr;
   2582 			unsigned int offset = 0;
   2583 			int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node);
   2584 
   2585 			source(insert->src[0], root, offset);
   2586 			insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle);
   2587 
   2588 			source(insert->src[1], binary->getRight());
   2589 
   2590 			shader->append(insert);
   2591 		}
   2592 		else
   2593 		{
   2594 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
   2595 
   2596 			destination(mov1->dst, node, 0);
   2597 
   2598 			Temporary address(this);
   2599 			unsigned char mask;
   2600 			TIntermTyped *root = nullptr;
   2601 			unsigned int offset = 0;
   2602 			int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node);
   2603 
   2604 			source(mov1->src[0], root, offset);
   2605 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
   2606 
   2607 			shader->append(mov1);
   2608 
   2609 			for(int i = 1; i < node->totalRegisterCount(); i++)
   2610 			{
   2611 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i);
   2612 				mov->src[0].rel = mov1->src[0].rel;
   2613 			}
   2614 		}
   2615 	}
   2616 
   2617 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node)
   2618 	{
   2619 		Temporary address(this);
   2620 		TIntermTyped *root = nullptr;
   2621 		unsigned int offset = 0;
   2622 		unsigned char mask = 0xF;
   2623 		int swizzle = lvalue(root, offset, dst.rel, mask, address, node);
   2624 
   2625 		dst.type = registerType(root);
   2626 		dst.index = registerIndex(root) + offset;
   2627 		dst.mask = mask;
   2628 
   2629 		return swizzle;
   2630 	}
   2631 
   2632 	int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node)
   2633 	{
   2634 		TIntermTyped *result = node;
   2635 		TIntermBinary *binary = node->getAsBinaryNode();
   2636 		TIntermSymbol *symbol = node->getAsSymbolNode();
   2637 
   2638 		if(binary)
   2639 		{
   2640 			TIntermTyped *left = binary->getLeft();
   2641 			TIntermTyped *right = binary->getRight();
   2642 
   2643 			int leftSwizzle = lvalue(root, offset, rel, mask, address, left);   // Resolve the l-value of the left side
   2644 
   2645 			switch(binary->getOp())
   2646 			{
   2647 			case EOpIndexDirect:
   2648 				{
   2649 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
   2650 
   2651 					if(left->isRegister())
   2652 					{
   2653 						int leftMask = mask;
   2654 
   2655 						mask = 1;
   2656 						while((leftMask & mask) == 0)
   2657 						{
   2658 							mask = mask << 1;
   2659 						}
   2660 
   2661 						int element = swizzleElement(leftSwizzle, rightIndex);
   2662 						mask = 1 << element;
   2663 
   2664 						return element;
   2665 					}
   2666 					else if(left->isArray() || left->isMatrix())
   2667 					{
   2668 						offset += rightIndex * result->totalRegisterCount();
   2669 						return 0xE4;
   2670 					}
   2671 					else UNREACHABLE(0);
   2672 				}
   2673 				break;
   2674 			case EOpIndexIndirect:
   2675 				{
   2676 					right->traverse(this);
   2677 
   2678 					if(left->isRegister())
   2679 					{
   2680 						// Requires INSERT instruction (handled by calling function)
   2681 					}
   2682 					else if(left->isArray() || left->isMatrix())
   2683 					{
   2684 						int scale = result->totalRegisterCount();
   2685 
   2686 						if(rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
   2687 						{
   2688 							if(left->totalRegisterCount() > 1)
   2689 							{
   2690 								sw::Shader::SourceParameter relativeRegister;
   2691 								source(relativeRegister, right);
   2692 
   2693 								int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0;
   2694 
   2695 								rel.index = relativeRegister.index;
   2696 								rel.type = relativeRegister.type;
   2697 								rel.scale = scale;
   2698 								rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0);
   2699 							}
   2700 						}
   2701 						else if(rel.index != registerIndex(&address))   // Move the previous index register to the address register
   2702 						{
   2703 							if(scale == 1)
   2704 							{
   2705 								Constant oldScale((int)rel.scale);
   2706 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
   2707 								mad->src[0].index = rel.index;
   2708 								mad->src[0].type = rel.type;
   2709 							}
   2710 							else
   2711 							{
   2712 								Constant oldScale((int)rel.scale);
   2713 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
   2714 								mul->src[0].index = rel.index;
   2715 								mul->src[0].type = rel.type;
   2716 
   2717 								Constant newScale(scale);
   2718 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
   2719 							}
   2720 
   2721 							rel.type = sw::Shader::PARAMETER_TEMP;
   2722 							rel.index = registerIndex(&address);
   2723 							rel.scale = 1;
   2724 						}
   2725 						else   // Just add the new index to the address register
   2726 						{
   2727 							if(scale == 1)
   2728 							{
   2729 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
   2730 							}
   2731 							else
   2732 							{
   2733 								Constant newScale(scale);
   2734 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
   2735 							}
   2736 						}
   2737 					}
   2738 					else UNREACHABLE(0);
   2739 				}
   2740 				break;
   2741 			case EOpIndexDirectStruct:
   2742 			case EOpIndexDirectInterfaceBlock:
   2743 				{
   2744 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
   2745 					                           left->getType().getStruct()->fields() :
   2746 					                           left->getType().getInterfaceBlock()->fields();
   2747 					int index = right->getAsConstantUnion()->getIConst(0);
   2748 					int fieldOffset = 0;
   2749 
   2750 					for(int i = 0; i < index; i++)
   2751 					{
   2752 						fieldOffset += fields[i]->type()->totalRegisterCount();
   2753 					}
   2754 
   2755 					offset += fieldOffset;
   2756 					mask = writeMask(result);
   2757 
   2758 					return 0xE4;
   2759 				}
   2760 				break;
   2761 			case EOpVectorSwizzle:
   2762 				{
   2763 					ASSERT(left->isRegister());
   2764 
   2765 					int leftMask = mask;
   2766 
   2767 					int swizzle = 0;
   2768 					int rightMask = 0;
   2769 
   2770 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
   2771 
   2772 					for(unsigned int i = 0; i < sequence.size(); i++)
   2773 					{
   2774 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
   2775 
   2776 						int element = swizzleElement(leftSwizzle, index);
   2777 						rightMask = rightMask | (1 << element);
   2778 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
   2779 					}
   2780 
   2781 					mask = leftMask & rightMask;
   2782 
   2783 					return swizzle;
   2784 				}
   2785 				break;
   2786 			default:
   2787 				UNREACHABLE(binary->getOp());   // Not an l-value operator
   2788 				break;
   2789 			}
   2790 		}
   2791 		else if(symbol)
   2792 		{
   2793 			root = symbol;
   2794 			offset = 0;
   2795 			mask = writeMask(symbol);
   2796 
   2797 			return 0xE4;
   2798 		}
   2799 		else
   2800 		{
   2801 			node->traverse(this);
   2802 
   2803 			root = node;
   2804 			offset = 0;
   2805 			mask = writeMask(node);
   2806 
   2807 			return 0xE4;
   2808 		}
   2809 
   2810 		return 0xE4;
   2811 	}
   2812 
   2813 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
   2814 	{
   2815 		if(isSamplerRegister(operand))
   2816 		{
   2817 			return sw::Shader::PARAMETER_SAMPLER;
   2818 		}
   2819 
   2820 		const TQualifier qualifier = operand->getQualifier();
   2821 		if((qualifier == EvqFragColor) || (qualifier == EvqFragData))
   2822 		{
   2823 			if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) ||
   2824 			   ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData)))
   2825 			{
   2826 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
   2827 			}
   2828 			outputQualifier = qualifier;
   2829 		}
   2830 
   2831 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
   2832 		{
   2833 			// Constant arrays are in the constant register file.
   2834 			if(operand->isArray() && operand->getArraySize() > 1)
   2835 			{
   2836 				return sw::Shader::PARAMETER_CONST;
   2837 			}
   2838 			else
   2839 			{
   2840 				return sw::Shader::PARAMETER_TEMP;
   2841 			}
   2842 		}
   2843 
   2844 		switch(qualifier)
   2845 		{
   2846 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
   2847 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
   2848 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
   2849 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
   2850 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
   2851 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
   2852 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
   2853 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
   2854 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
   2855 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
   2856 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
   2857 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
   2858 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
   2859 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
   2860 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
   2861 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
   2862 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
   2863 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
   2864 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
   2865 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
   2866 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
   2867 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
   2868 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
   2869 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
   2870 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
   2871 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
   2872 		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
   2873 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
   2874 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
   2875 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
   2876 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
   2877 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
   2878 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
   2879 		default: UNREACHABLE(qualifier);
   2880 		}
   2881 
   2882 		return sw::Shader::PARAMETER_VOID;
   2883 	}
   2884 
   2885 	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
   2886 	{
   2887 		const TQualifier qualifier = operand->getQualifier();
   2888 		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
   2889 	}
   2890 
   2891 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
   2892 	{
   2893 		if(isSamplerRegister(operand))
   2894 		{
   2895 			return samplerRegister(operand);
   2896 		}
   2897 		else if(operand->getType().totalSamplerRegisterCount() > 0) // Struct containing a sampler
   2898 		{
   2899 			samplerRegister(operand); // Make sure the sampler is declared
   2900 		}
   2901 
   2902 		switch(operand->getQualifier())
   2903 		{
   2904 		case EvqTemporary:           return temporaryRegister(operand);
   2905 		case EvqGlobal:              return temporaryRegister(operand);
   2906 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
   2907 		case EvqAttribute:           return attributeRegister(operand);
   2908 		case EvqVaryingIn:           return varyingRegister(operand);
   2909 		case EvqVaryingOut:          return varyingRegister(operand);
   2910 		case EvqVertexIn:            return attributeRegister(operand);
   2911 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
   2912 		case EvqVertexOut:           return varyingRegister(operand);
   2913 		case EvqFragmentIn:          return varyingRegister(operand);
   2914 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
   2915 		case EvqInvariantVaryingOut: return varyingRegister(operand);
   2916 		case EvqSmooth:              return varyingRegister(operand);
   2917 		case EvqFlat:                return varyingRegister(operand);
   2918 		case EvqCentroidOut:         return varyingRegister(operand);
   2919 		case EvqSmoothIn:            return varyingRegister(operand);
   2920 		case EvqFlatIn:              return varyingRegister(operand);
   2921 		case EvqCentroidIn:          return varyingRegister(operand);
   2922 		case EvqUniform:             return uniformRegister(operand);
   2923 		case EvqIn:                  return temporaryRegister(operand);
   2924 		case EvqOut:                 return temporaryRegister(operand);
   2925 		case EvqInOut:               return temporaryRegister(operand);
   2926 		case EvqConstReadOnly:       return temporaryRegister(operand);
   2927 		case EvqPosition:            return varyingRegister(operand);
   2928 		case EvqPointSize:           return varyingRegister(operand);
   2929 		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
   2930 		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
   2931 		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
   2932 		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
   2933 		case EvqPointCoord:          return varyingRegister(operand);
   2934 		case EvqFragColor:           return 0;
   2935 		case EvqFragData:            return fragmentOutputRegister(operand);
   2936 		case EvqFragDepth:           return 0;
   2937 		default: UNREACHABLE(operand->getQualifier());
   2938 		}
   2939 
   2940 		return 0;
   2941 	}
   2942 
   2943 	int OutputASM::writeMask(TIntermTyped *destination, int index)
   2944 	{
   2945 		if(destination->getQualifier() == EvqPointSize)
   2946 		{
   2947 			return 0x2;   // Point size stored in the y component
   2948 		}
   2949 
   2950 		return 0xF >> (4 - registerSize(destination->getType(), index));
   2951 	}
   2952 
   2953 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
   2954 	{
   2955 		if(argument->getQualifier() == EvqPointSize)
   2956 		{
   2957 			return 0x55;   // Point size stored in the y component
   2958 		}
   2959 
   2960 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
   2961 
   2962 		return swizzleSize[size];
   2963 	}
   2964 
   2965 	// Conservatively checks whether an expression is fast to compute and has no side effects
   2966 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
   2967 	{
   2968 		if(!expression->isRegister())
   2969 		{
   2970 			return false;
   2971 		}
   2972 
   2973 		return cost(expression, budget) >= 0;
   2974 	}
   2975 
   2976 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
   2977 	int OutputASM::cost(TIntermNode *expression, int budget)
   2978 	{
   2979 		if(budget < 0)
   2980 		{
   2981 			return budget;
   2982 		}
   2983 
   2984 		if(expression->getAsSymbolNode())
   2985 		{
   2986 			return budget;
   2987 		}
   2988 		else if(expression->getAsConstantUnion())
   2989 		{
   2990 			return budget;
   2991 		}
   2992 		else if(expression->getAsBinaryNode())
   2993 		{
   2994 			TIntermBinary *binary = expression->getAsBinaryNode();
   2995 
   2996 			switch(binary->getOp())
   2997 			{
   2998 			case EOpVectorSwizzle:
   2999 			case EOpIndexDirect:
   3000 			case EOpIndexDirectStruct:
   3001 			case EOpIndexDirectInterfaceBlock:
   3002 				return cost(binary->getLeft(), budget - 0);
   3003 			case EOpAdd:
   3004 			case EOpSub:
   3005 			case EOpMul:
   3006 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
   3007 			default:
   3008 				return -1;
   3009 			}
   3010 		}
   3011 		else if(expression->getAsUnaryNode())
   3012 		{
   3013 			TIntermUnary *unary = expression->getAsUnaryNode();
   3014 
   3015 			switch(unary->getOp())
   3016 			{
   3017 			case EOpAbs:
   3018 			case EOpNegative:
   3019 				return cost(unary->getOperand(), budget - 1);
   3020 			default:
   3021 				return -1;
   3022 			}
   3023 		}
   3024 		else if(expression->getAsSelectionNode())
   3025 		{
   3026 			TIntermSelection *selection = expression->getAsSelectionNode();
   3027 
   3028 			if(selection->usesTernaryOperator())
   3029 			{
   3030 				TIntermTyped *condition = selection->getCondition();
   3031 				TIntermNode *trueBlock = selection->getTrueBlock();
   3032 				TIntermNode *falseBlock = selection->getFalseBlock();
   3033 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
   3034 
   3035 				if(constantCondition)
   3036 				{
   3037 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
   3038 
   3039 					if(trueCondition)
   3040 					{
   3041 						return cost(trueBlock, budget - 0);
   3042 					}
   3043 					else
   3044 					{
   3045 						return cost(falseBlock, budget - 0);
   3046 					}
   3047 				}
   3048 				else
   3049 				{
   3050 					return cost(trueBlock, cost(falseBlock, budget - 2));
   3051 				}
   3052 			}
   3053 		}
   3054 
   3055 		return -1;
   3056 	}
   3057 
   3058 	const Function *OutputASM::findFunction(const TString &name)
   3059 	{
   3060 		for(unsigned int f = 0; f < functionArray.size(); f++)
   3061 		{
   3062 			if(functionArray[f].name == name)
   3063 			{
   3064 				return &functionArray[f];
   3065 			}
   3066 		}
   3067 
   3068 		return 0;
   3069 	}
   3070 
   3071 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
   3072 	{
   3073 		int index = allocate(temporaries, temporary);
   3074 		if(index >= sw::NUM_TEMPORARY_REGISTERS)
   3075 		{
   3076 			mContext.error(temporary->getLine(),
   3077 				"Too many temporary registers required to compile shader",
   3078 				pixelShader ? "pixel shader" : "vertex shader");
   3079 		}
   3080 		return index;
   3081 	}
   3082 
   3083 	void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat)
   3084 	{
   3085 		if(type.isStruct())
   3086 		{
   3087 			const TFieldList &fields = type.getStruct()->fields();
   3088 			int fieldVar = var;
   3089 			for(const auto &field : fields)
   3090 			{
   3091 				const TType& fieldType = *(field->type());
   3092 				setPixelShaderInputs(fieldType, fieldVar, flat);
   3093 				fieldVar += fieldType.totalRegisterCount();
   3094 			}
   3095 		}
   3096 		else
   3097 		{
   3098 			for(int i = 0; i < type.totalRegisterCount(); i++)
   3099 			{
   3100 				pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
   3101 			}
   3102 		}
   3103 	}
   3104 
   3105 	int OutputASM::varyingRegister(TIntermTyped *varying)
   3106 	{
   3107 		int var = lookup(varyings, varying);
   3108 
   3109 		if(var == -1)
   3110 		{
   3111 			var = allocate(varyings, varying);
   3112 			int registerCount = varying->totalRegisterCount();
   3113 
   3114 			if(pixelShader)
   3115 			{
   3116 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
   3117 				{
   3118 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
   3119 					return 0;
   3120 				}
   3121 
   3122 				if(varying->getQualifier() == EvqPointCoord)
   3123 				{
   3124 					ASSERT(varying->isRegister());
   3125 					pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
   3126 				}
   3127 				else
   3128 				{
   3129 					setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying));
   3130 				}
   3131 			}
   3132 			else if(vertexShader)
   3133 			{
   3134 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
   3135 				{
   3136 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
   3137 					return 0;
   3138 				}
   3139 
   3140 				if(varying->getQualifier() == EvqPosition)
   3141 				{
   3142 					ASSERT(varying->isRegister());
   3143 					vertexShader->setPositionRegister(var);
   3144 				}
   3145 				else if(varying->getQualifier() == EvqPointSize)
   3146 				{
   3147 					ASSERT(varying->isRegister());
   3148 					vertexShader->setPointSizeRegister(var);
   3149 				}
   3150 				else
   3151 				{
   3152 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
   3153 				}
   3154 			}
   3155 			else UNREACHABLE(0);
   3156 
   3157 			declareVarying(varying, var);
   3158 		}
   3159 
   3160 		return var;
   3161 	}
   3162 
   3163 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
   3164 	{
   3165 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
   3166 		{
   3167 			TIntermSymbol *symbol = varying->getAsSymbolNode();
   3168 			declareVarying(varying->getType(), symbol->getSymbol(), reg);
   3169 		}
   3170 	}
   3171 
   3172 	void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex)
   3173 	{
   3174 		const char *name = varyingName.c_str();
   3175 		VaryingList &activeVaryings = shaderObject->varyings;
   3176 
   3177 		TStructure* structure = type.getStruct();
   3178 		if(structure)
   3179 		{
   3180 			int fieldRegisterIndex = registerIndex;
   3181 
   3182 			const TFieldList &fields = type.getStruct()->fields();
   3183 			for(const auto &field : fields)
   3184 			{
   3185 				const TType& fieldType = *(field->type());
   3186 				declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex);
   3187 				if(fieldRegisterIndex >= 0)
   3188 				{
   3189 					fieldRegisterIndex += fieldType.totalRegisterCount();
   3190 				}
   3191 			}
   3192 		}
   3193 		else
   3194 		{
   3195 			// Check if this varying has been declared before without having a register assigned
   3196 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
   3197 			{
   3198 				if(v->name == name)
   3199 				{
   3200 					if(registerIndex >= 0)
   3201 					{
   3202 						ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex);
   3203 						v->registerIndex = registerIndex;
   3204 					}
   3205 
   3206 					return;
   3207 				}
   3208 			}
   3209 
   3210 			activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0));
   3211 		}
   3212 	}
   3213 
   3214 	void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput)
   3215 	{
   3216 		int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location;
   3217 		int registerCount = fragmentOutput->totalRegisterCount();
   3218 		if(requestedLocation < 0)
   3219 		{
   3220 			ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier
   3221 			return; // No requested location
   3222 		}
   3223 		else if((requestedLocation + registerCount) > sw::RENDERTARGETS)
   3224 		{
   3225 			mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader");
   3226 		}
   3227 		else
   3228 		{
   3229 			int currentIndex = lookup(fragmentOutputs, fragmentOutput);
   3230 			if(requestedLocation != currentIndex)
   3231 			{
   3232 				if(currentIndex != -1)
   3233 				{
   3234 					mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader");
   3235 				}
   3236 				else
   3237 				{
   3238 					if(fragmentOutputs.size() <= (size_t)requestedLocation)
   3239 					{
   3240 						while(fragmentOutputs.size() < (size_t)requestedLocation)
   3241 						{
   3242 							fragmentOutputs.push_back(nullptr);
   3243 						}
   3244 						for(int i = 0; i < registerCount; i++)
   3245 						{
   3246 							fragmentOutputs.push_back(fragmentOutput);
   3247 						}
   3248 					}
   3249 					else
   3250 					{
   3251 						for(int i = 0; i < registerCount; i++)
   3252 						{
   3253 							if(!fragmentOutputs[requestedLocation + i])
   3254 							{
   3255 								fragmentOutputs[requestedLocation + i] = fragmentOutput;
   3256 							}
   3257 							else
   3258 							{
   3259 								mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader");
   3260 								return;
   3261 							}
   3262 						}
   3263 					}
   3264 				}
   3265 			}
   3266 		}
   3267 	}
   3268 
   3269 	int OutputASM::uniformRegister(TIntermTyped *uniform)
   3270 	{
   3271 		const TType &type = uniform->getType();
   3272 		ASSERT(!IsSampler(type.getBasicType()));
   3273 		TInterfaceBlock *block = type.getAsInterfaceBlock();
   3274 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
   3275 		ASSERT(symbol || block);
   3276 
   3277 		if(symbol || block)
   3278 		{
   3279 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
   3280 			bool isBlockMember = (!block && parentBlock);
   3281 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
   3282 
   3283 			if(index == -1 || isBlockMember)
   3284 			{
   3285 				if(index == -1)
   3286 				{
   3287 					index = allocate(uniforms, uniform);
   3288 				}
   3289 
   3290 				// Verify if the current uniform is a member of an already declared block
   3291 				const TString &name = symbol ? symbol->getSymbol() : block->name();
   3292 				int blockMemberIndex = blockMemberLookup(type, name, index);
   3293 				if(blockMemberIndex == -1)
   3294 				{
   3295 					declareUniform(type, name, index, false);
   3296 				}
   3297 				else
   3298 				{
   3299 					index = blockMemberIndex;
   3300 				}
   3301 			}
   3302 
   3303 			return index;
   3304 		}
   3305 
   3306 		return 0;
   3307 	}
   3308 
   3309 	int OutputASM::attributeRegister(TIntermTyped *attribute)
   3310 	{
   3311 		ASSERT(!attribute->isArray());
   3312 
   3313 		int index = lookup(attributes, attribute);
   3314 
   3315 		if(index == -1)
   3316 		{
   3317 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
   3318 			ASSERT(symbol);
   3319 
   3320 			if(symbol)
   3321 			{
   3322 				index = allocate(attributes, attribute);
   3323 				const TType &type = attribute->getType();
   3324 				int registerCount = attribute->totalRegisterCount();
   3325 				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
   3326 				switch(type.getBasicType())
   3327 				{
   3328 				case EbtInt:
   3329 					attribType = sw::VertexShader::ATTRIBTYPE_INT;
   3330 					break;
   3331 				case EbtUInt:
   3332 					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
   3333 					break;
   3334 				case EbtFloat:
   3335 				default:
   3336 					break;
   3337 				}
   3338 
   3339 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
   3340 				{
   3341 					for(int i = 0; i < registerCount; i++)
   3342 					{
   3343 						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
   3344 					}
   3345 				}
   3346 
   3347 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
   3348 
   3349 				const char *name = symbol->getSymbol().c_str();
   3350 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
   3351 			}
   3352 		}
   3353 
   3354 		return index;
   3355 	}
   3356 
   3357 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
   3358 	{
   3359 		return allocate(fragmentOutputs, fragmentOutput);
   3360 	}
   3361 
   3362 	int OutputASM::samplerRegister(TIntermTyped *sampler)
   3363 	{
   3364 		const TType &type = sampler->getType();
   3365 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
   3366 
   3367 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
   3368 		TIntermBinary *binary = sampler->getAsBinaryNode();
   3369 
   3370 		if(symbol)
   3371 		{
   3372 			switch(type.getQualifier())
   3373 			{
   3374 			case EvqUniform:
   3375 				return samplerRegister(symbol);
   3376 			case EvqIn:
   3377 			case EvqConstReadOnly:
   3378 				// Function arguments are not (uniform) sampler registers
   3379 				return -1;
   3380 			default:
   3381 				UNREACHABLE(type.getQualifier());
   3382 			}
   3383 		}
   3384 		else if(binary)
   3385 		{
   3386 			TIntermTyped *left = binary->getLeft();
   3387 			TIntermTyped *right = binary->getRight();
   3388 			const TType &leftType = left->getType();
   3389 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
   3390 			int offset = 0;
   3391 
   3392 			switch(binary->getOp())
   3393 			{
   3394 			case EOpIndexDirect:
   3395 				ASSERT(left->isArray());
   3396 				offset = index * leftType.samplerRegisterCount();
   3397 				break;
   3398 			case EOpIndexDirectStruct:
   3399 				ASSERT(leftType.isStruct());
   3400 				{
   3401 					const TFieldList &fields = leftType.getStruct()->fields();
   3402 
   3403 					for(int i = 0; i < index; i++)
   3404 					{
   3405 						offset += fields[i]->type()->totalSamplerRegisterCount();
   3406 					}
   3407 				}
   3408 				break;
   3409 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
   3410 				return -1;
   3411 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
   3412 			default:
   3413 				UNREACHABLE(binary->getOp());
   3414 				return -1;
   3415 			}
   3416 
   3417 			int base = samplerRegister(left);
   3418 
   3419 			if(base < 0)
   3420 			{
   3421 				return -1;
   3422 			}
   3423 
   3424 			return base + offset;
   3425 		}
   3426 
   3427 		UNREACHABLE(0);
   3428 		return -1;   // Not a (uniform) sampler register
   3429 	}
   3430 
   3431 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
   3432 	{
   3433 		const TType &type = sampler->getType();
   3434 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
   3435 
   3436 		int index = lookup(samplers, sampler);
   3437 
   3438 		if(index == -1)
   3439 		{
   3440 			index = allocate(samplers, sampler, true);
   3441 
   3442 			if(sampler->getQualifier() == EvqUniform)
   3443 			{
   3444 				const char *name = sampler->getSymbol().c_str();
   3445 				declareUniform(type, name, index, true);
   3446 			}
   3447 		}
   3448 
   3449 		return index;
   3450 	}
   3451 
   3452 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
   3453 	{
   3454 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
   3455 	}
   3456 
   3457 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
   3458 	{
   3459 		for(unsigned int i = 0; i < list.size(); i++)
   3460 		{
   3461 			if(list[i] == variable)
   3462 			{
   3463 				return i;   // Pointer match
   3464 			}
   3465 		}
   3466 
   3467 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
   3468 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
   3469 
   3470 		if(varBlock)
   3471 		{
   3472 			for(unsigned int i = 0; i < list.size(); i++)
   3473 			{
   3474 				if(list[i])
   3475 				{
   3476 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
   3477 
   3478 					if(listBlock)
   3479 					{
   3480 						if(listBlock->name() == varBlock->name())
   3481 						{
   3482 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
   3483 							ASSERT(listBlock->fields() == varBlock->fields());
   3484 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
   3485 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
   3486 
   3487 							return i;
   3488 						}
   3489 					}
   3490 				}
   3491 			}
   3492 		}
   3493 		else if(varSymbol)
   3494 		{
   3495 			for(unsigned int i = 0; i < list.size(); i++)
   3496 			{
   3497 				if(list[i])
   3498 				{
   3499 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
   3500 
   3501 					if(listSymbol)
   3502 					{
   3503 						if(listSymbol->getId() == varSymbol->getId())
   3504 						{
   3505 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
   3506 							ASSERT(listSymbol->getType() == varSymbol->getType());
   3507 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
   3508 
   3509 							return i;
   3510 						}
   3511 					}
   3512 				}
   3513 			}
   3514 		}
   3515 
   3516 		return -1;
   3517 	}
   3518 
   3519 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
   3520 	{
   3521 		for(unsigned int i = 0; i < list.size(); i++)
   3522 		{
   3523 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
   3524 			{
   3525 				return i;   // Pointer match
   3526 			}
   3527 		}
   3528 		return -1;
   3529 	}
   3530 
   3531 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly)
   3532 	{
   3533 		int index = lookup(list, variable);
   3534 
   3535 		if(index == -1)
   3536 		{
   3537 			unsigned int registerCount = variable->blockRegisterCount(samplersOnly);
   3538 
   3539 			for(unsigned int i = 0; i < list.size(); i++)
   3540 			{
   3541 				if(list[i] == 0)
   3542 				{
   3543 					unsigned int j = 1;
   3544 					for( ; j < registerCount && (i + j) < list.size(); j++)
   3545 					{
   3546 						if(list[i + j] != 0)
   3547 						{
   3548 							break;
   3549 						}
   3550 					}
   3551 
   3552 					if(j == registerCount)   // Found free slots
   3553 					{
   3554 						for(unsigned int j = 0; j < registerCount; j++)
   3555 						{
   3556 							list[i + j] = variable;
   3557 						}
   3558 
   3559 						return i;
   3560 					}
   3561 				}
   3562 			}
   3563 
   3564 			index = list.size();
   3565 
   3566 			for(unsigned int i = 0; i < registerCount; i++)
   3567 			{
   3568 				list.push_back(variable);
   3569 			}
   3570 		}
   3571 
   3572 		return index;
   3573 	}
   3574 
   3575 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
   3576 	{
   3577 		int index = lookup(list, variable);
   3578 
   3579 		if(index >= 0)
   3580 		{
   3581 			list[index] = 0;
   3582 		}
   3583 	}
   3584 
   3585 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
   3586 	{
   3587 		const TInterfaceBlock *block = type.getInterfaceBlock();
   3588 
   3589 		if(block)
   3590 		{
   3591 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
   3592 			const TFieldList& fields = block->fields();
   3593 			const TString &blockName = block->name();
   3594 			int fieldRegisterIndex = registerIndex;
   3595 
   3596 			if(!type.isInterfaceBlock())
   3597 			{
   3598 				// This is a uniform that's part of a block, let's see if the block is already defined
   3599 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
   3600 				{
   3601 					if(activeUniformBlocks[i].name == blockName.c_str())
   3602 					{
   3603 						// The block is already defined, find the register for the current uniform and return it
   3604 						for(size_t j = 0; j < fields.size(); j++)
   3605 						{
   3606 							const TString &fieldName = fields[j]->name();
   3607 							if(fieldName == name)
   3608 							{
   3609 								return fieldRegisterIndex;
   3610 							}
   3611 
   3612 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
   3613 						}
   3614 
   3615 						ASSERT(false);
   3616 						return fieldRegisterIndex;
   3617 					}
   3618 				}
   3619 			}
   3620 		}
   3621 
   3622 		return -1;
   3623 	}
   3624 
   3625 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder)
   3626 	{
   3627 		const TStructure *structure = type.getStruct();
   3628 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
   3629 
   3630 		if(!structure && !block)
   3631 		{
   3632 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
   3633 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
   3634 			if(blockId >= 0)
   3635 			{
   3636 				blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type)));
   3637 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
   3638 			}
   3639 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
   3640 			bool isSampler = IsSampler(type.getBasicType());
   3641 			if(isSampler && samplersOnly)
   3642 			{
   3643 				for(int i = 0; i < type.totalRegisterCount(); i++)
   3644 				{
   3645 					shader->declareSampler(fieldRegisterIndex + i);
   3646 				}
   3647 			}
   3648 			if(isSampler == samplersOnly)
   3649 			{
   3650 				activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo));
   3651 			}
   3652 		}
   3653 		else if(block)
   3654 		{
   3655 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
   3656 			const TFieldList& fields = block->fields();
   3657 			const TString &blockName = block->name();
   3658 			int fieldRegisterIndex = registerIndex;
   3659 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
   3660 
   3661 			blockId = activeUniformBlocks.size();
   3662 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
   3663 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
   3664 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
   3665 			blockDefinitions.push_back(BlockDefinitionIndexMap());
   3666 
   3667 			Std140BlockEncoder currentBlockEncoder;
   3668 			currentBlockEncoder.enterAggregateType();
   3669 			for(const auto &field : fields)
   3670 			{
   3671 				const TType &fieldType = *(field->type());
   3672 				const TString &fieldName = field->name();
   3673 				if(isUniformBlockMember && (fieldName == name))
   3674 				{
   3675 					registerIndex = fieldRegisterIndex;
   3676 				}
   3677 
   3678 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
   3679 
   3680 				declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, &currentBlockEncoder);
   3681 				fieldRegisterIndex += fieldType.totalRegisterCount();
   3682 			}
   3683 			currentBlockEncoder.exitAggregateType();
   3684 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
   3685 		}
   3686 		else
   3687 		{
   3688 			// Store struct for program link time validation
   3689 			shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo()));
   3690 
   3691 			int fieldRegisterIndex = registerIndex;
   3692 
   3693 			const TFieldList& fields = structure->fields();
   3694 			if(type.isArray() && (structure || type.isInterfaceBlock()))
   3695 			{
   3696 				for(int i = 0; i < type.getArraySize(); i++)
   3697 				{
   3698 					if(encoder)
   3699 					{
   3700 						encoder->enterAggregateType();
   3701 					}
   3702 					for(const auto &field : fields)
   3703 					{
   3704 						const TType &fieldType = *(field->type());
   3705 						const TString &fieldName = field->name();
   3706 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
   3707 
   3708 						declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
   3709 						fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
   3710 					}
   3711 					if(encoder)
   3712 					{
   3713 						encoder->exitAggregateType();
   3714 					}
   3715 				}
   3716 			}
   3717 			else
   3718 			{
   3719 				if(encoder)
   3720 				{
   3721 					encoder->enterAggregateType();
   3722 				}
   3723 				for(const auto &field : fields)
   3724 				{
   3725 					const TType &fieldType = *(field->type());
   3726 					const TString &fieldName = field->name();
   3727 					const TString uniformName = name + "." + fieldName;
   3728 
   3729 					declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
   3730 					fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
   3731 				}
   3732 				if(encoder)
   3733 				{
   3734 					encoder->exitAggregateType();
   3735 				}
   3736 			}
   3737 		}
   3738 	}
   3739 
   3740 	int OutputASM::dim(TIntermNode *v)
   3741 	{
   3742 		TIntermTyped *vector = v->getAsTyped();
   3743 		ASSERT(vector && vector->isRegister());
   3744 		return vector->getNominalSize();
   3745 	}
   3746 
   3747 	int OutputASM::dim2(TIntermNode *m)
   3748 	{
   3749 		TIntermTyped *matrix = m->getAsTyped();
   3750 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
   3751 		return matrix->getSecondarySize();
   3752 	}
   3753 
   3754 	// Sets iterations to ~0u if no loop count could be statically determined.
   3755 	OutputASM::LoopInfo::LoopInfo(TIntermLoop *node)
   3756 	{
   3757 		// Parse loops of the form:
   3758 		// for(int index = initial; index [comparator] limit; index [op] increment)
   3759 
   3760 		// Parse index name and intial value
   3761 		if(node->getInit())
   3762 		{
   3763 			TIntermAggregate *init = node->getInit()->getAsAggregate();
   3764 
   3765 			if(init)
   3766 			{
   3767 				TIntermSequence &sequence = init->getSequence();
   3768 				TIntermTyped *variable = sequence[0]->getAsTyped();
   3769 
   3770 				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
   3771 				{
   3772 					TIntermBinary *assign = variable->getAsBinaryNode();
   3773 
   3774 					if(assign && assign->getOp() == EOpInitialize)
   3775 					{
   3776 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
   3777 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
   3778 
   3779 						if(symbol && constant)
   3780 						{
   3781 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3782 							{
   3783 								index = symbol;
   3784 								initial = constant->getUnionArrayPointer()[0].getIConst();
   3785 							}
   3786 						}
   3787 					}
   3788 				}
   3789 			}
   3790 		}
   3791 
   3792 		// Parse comparator and limit value
   3793 		if(index && node->getCondition())
   3794 		{
   3795 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
   3796 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
   3797 
   3798 			if(left && (left->getId() == index->getId()))
   3799 			{
   3800 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
   3801 
   3802 				if(constant)
   3803 				{
   3804 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3805 					{
   3806 						comparator = test->getOp();
   3807 						limit = constant->getUnionArrayPointer()[0].getIConst();
   3808 					}
   3809 				}
   3810 			}
   3811 		}
   3812 
   3813 		// Parse increment
   3814 		if(index && comparator != EOpNull && node->getExpression())
   3815 		{
   3816 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
   3817 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
   3818 
   3819 			if(binaryTerminal)
   3820 			{
   3821 				TIntermSymbol *operand = binaryTerminal->getLeft()->getAsSymbolNode();
   3822 
   3823 				if(operand && operand->getId() == index->getId())
   3824 				{
   3825 					TOperator op = binaryTerminal->getOp();
   3826 					TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
   3827 
   3828 					if(constant)
   3829 					{
   3830 						if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
   3831 						{
   3832 							int value = constant->getUnionArrayPointer()[0].getIConst();
   3833 
   3834 							switch(op)
   3835 							{
   3836 							case EOpAddAssign: increment = value;  break;
   3837 							case EOpSubAssign: increment = -value; break;
   3838 							default:           increment = 0;      break;   // Rare cases left unhandled. Treated as non-deterministic.
   3839 							}
   3840 						}
   3841 					}
   3842 				}
   3843 			}
   3844 			else if(unaryTerminal)
   3845 			{
   3846 				TIntermSymbol *operand = unaryTerminal->getOperand()->getAsSymbolNode();
   3847 
   3848 				if(operand && operand->getId() == index->getId())
   3849 				{
   3850 					TOperator op = unaryTerminal->getOp();
   3851 
   3852 					switch(op)
   3853 					{
   3854 					case EOpPostIncrement: increment = 1;  break;
   3855 					case EOpPostDecrement: increment = -1; break;
   3856 					case EOpPreIncrement:  increment = 1;  break;
   3857 					case EOpPreDecrement:  increment = -1; break;
   3858 					default:               increment = 0;  break;   // Rare cases left unhandled. Treated as non-deterministic.
   3859 					}
   3860 				}
   3861 			}
   3862 		}
   3863 
   3864 		if(index && comparator != EOpNull && increment != 0)
   3865 		{
   3866 			// Check the loop body for return statements or changes to the index variable that make it non-deterministic.
   3867 			LoopUnrollable loopUnrollable;
   3868 			bool unrollable = loopUnrollable.traverse(node, index->getId());
   3869 
   3870 			if(!unrollable)
   3871 			{
   3872 				iterations = ~0u;
   3873 				return;
   3874 			}
   3875 
   3876 			if(comparator == EOpLessThanEqual)
   3877 			{
   3878 				comparator = EOpLessThan;
   3879 				limit += 1;
   3880 			}
   3881 			else if(comparator == EOpGreaterThanEqual)
   3882 			{
   3883 				comparator = EOpLessThan;
   3884 				limit -= 1;
   3885 				std::swap(initial, limit);
   3886 				increment = -increment;
   3887 			}
   3888 			else if(comparator == EOpGreaterThan)
   3889 			{
   3890 				comparator = EOpLessThan;
   3891 				std::swap(initial, limit);
   3892 				increment = -increment;
   3893 			}
   3894 
   3895 			if(comparator == EOpLessThan)
   3896 			{
   3897 				if(!(initial < limit))   // Never loops
   3898 				{
   3899 					iterations = 0;
   3900 				}
   3901 				else if(increment < 0)
   3902 				{
   3903 					iterations = ~0u;
   3904 				}
   3905 				else
   3906 				{
   3907 					iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
   3908 				}
   3909 			}
   3910 			else
   3911 			{
   3912 				// Rare cases left unhandled. Treated as non-deterministic.
   3913 				iterations = ~0u;
   3914 			}
   3915 		}
   3916 	}
   3917 
   3918 	bool LoopUnrollable::traverse(TIntermLoop *loop, int indexId)
   3919 	{
   3920 		loopUnrollable = true;
   3921 
   3922 		loopIndexId = indexId;
   3923 		TIntermNode *body = loop->getBody();
   3924 
   3925 		if(body)
   3926 		{
   3927 			body->traverse(this);
   3928 		}
   3929 
   3930 		return loopUnrollable;
   3931 	}
   3932 
   3933 	void LoopUnrollable::visitSymbol(TIntermSymbol *node)
   3934 	{
   3935 		// Check that the loop index is not used as the argument to a function out or inout parameter.
   3936 		if(node->getId() == loopIndexId)
   3937 		{
   3938 			if(node->getQualifier() == EvqOut || node->getQualifier() == EvqInOut)
   3939 			{
   3940 				loopUnrollable = false;
   3941 			}
   3942 		}
   3943 	}
   3944 
   3945 	bool LoopUnrollable::visitBinary(Visit visit, TIntermBinary *node)
   3946 	{
   3947 		if(!loopUnrollable)
   3948 		{
   3949 			return false;
   3950 		}
   3951 
   3952 		// Check that the loop index is not statically assigned to.
   3953 		TIntermSymbol *symbol = node->getLeft()->getAsSymbolNode();
   3954 		loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId));
   3955 
   3956 		return loopUnrollable;
   3957 	}
   3958 
   3959 	bool LoopUnrollable::visitUnary(Visit visit, TIntermUnary *node)
   3960 	{
   3961 		if(!loopUnrollable)
   3962 		{
   3963 			return false;
   3964 		}
   3965 
   3966 		// Check that the loop index is not statically assigned to.
   3967 		TIntermSymbol *symbol = node->getOperand()->getAsSymbolNode();
   3968 		loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId));
   3969 
   3970 		return loopUnrollable;
   3971 	}
   3972 
   3973 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
   3974 	{
   3975 		if(!loopUnrollable)
   3976 		{
   3977 			return false;
   3978 		}
   3979 
   3980 		switch(node->getFlowOp())
   3981 		{
   3982 		case EOpKill:
   3983 		case EOpReturn:
   3984 		case EOpBreak:
   3985 		case EOpContinue:
   3986 			loopUnrollable = false;
   3987 			break;
   3988 		default: UNREACHABLE(node->getFlowOp());
   3989 		}
   3990 
   3991 		return loopUnrollable;
   3992 	}
   3993 
   3994 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
   3995 	{
   3996 		return loopUnrollable;
   3997 	}
   3998 }
   3999