Home | History | Annotate | Download | only in spirv_assembly
      1 /*-------------------------------------------------------------------------
      2  * Vulkan Conformance Tests
      3  * ------------------------
      4  *
      5  * Copyright (c) 2015 Google Inc.
      6  * Copyright (c) 2016 The Khronos Group Inc.
      7  *
      8  * Licensed under the Apache License, Version 2.0 (the "License");
      9  * you may not use this file except in compliance with the License.
     10  * You may obtain a copy of the License at
     11  *
     12  *      http://www.apache.org/licenses/LICENSE-2.0
     13  *
     14  * Unless required by applicable law or agreed to in writing, software
     15  * distributed under the License is distributed on an "AS IS" BASIS,
     16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     17  * See the License for the specific language governing permissions and
     18  * limitations under the License.
     19  *
     20  *//*!
     21  * \file
     22  * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
     23  *//*--------------------------------------------------------------------*/
     24 
     25 #include "vktSpvAsmInstructionTests.hpp"
     26 
     27 #include "tcuCommandLine.hpp"
     28 #include "tcuFormatUtil.hpp"
     29 #include "tcuFloat.hpp"
     30 #include "tcuRGBA.hpp"
     31 #include "tcuStringTemplate.hpp"
     32 #include "tcuTestLog.hpp"
     33 #include "tcuVectorUtil.hpp"
     34 #include "tcuInterval.hpp"
     35 
     36 #include "vkDefs.hpp"
     37 #include "vkDeviceUtil.hpp"
     38 #include "vkMemUtil.hpp"
     39 #include "vkPlatform.hpp"
     40 #include "vkPrograms.hpp"
     41 #include "vkQueryUtil.hpp"
     42 #include "vkRef.hpp"
     43 #include "vkRefUtil.hpp"
     44 #include "vkStrUtil.hpp"
     45 #include "vkTypeUtil.hpp"
     46 
     47 #include "deStringUtil.hpp"
     48 #include "deUniquePtr.hpp"
     49 #include "deMath.h"
     50 #include "tcuStringTemplate.hpp"
     51 
     52 #include "vktSpvAsm16bitStorageTests.hpp"
     53 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
     54 #include "vktSpvAsmConditionalBranchTests.hpp"
     55 #include "vktSpvAsmIndexingTests.hpp"
     56 #include "vktSpvAsmComputeShaderCase.hpp"
     57 #include "vktSpvAsmComputeShaderTestUtil.hpp"
     58 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
     59 #include "vktSpvAsmVariablePointersTests.hpp"
     60 #include "vktSpvAsmSpirvVersionTests.hpp"
     61 #include "vktTestCaseUtil.hpp"
     62 #include "vktSpvAsmLoopDepLenTests.hpp"
     63 #include "vktSpvAsmLoopDepInfTests.hpp"
     64 
     65 #include <cmath>
     66 #include <limits>
     67 #include <map>
     68 #include <string>
     69 #include <sstream>
     70 #include <utility>
     71 
     72 namespace vkt
     73 {
     74 namespace SpirVAssembly
     75 {
     76 
     77 namespace
     78 {
     79 
     80 using namespace vk;
     81 using std::map;
     82 using std::string;
     83 using std::vector;
     84 using tcu::IVec3;
     85 using tcu::IVec4;
     86 using tcu::RGBA;
     87 using tcu::TestLog;
     88 using tcu::TestStatus;
     89 using tcu::Vec4;
     90 using de::UniquePtr;
     91 using tcu::StringTemplate;
     92 using tcu::Vec4;
     93 
     94 template<typename T>
     95 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
     96 {
     97 	T* const typedPtr = (T*)dst;
     98 	for (int ndx = 0; ndx < numValues; ndx++)
     99 		typedPtr[offset + ndx] = randomScalar<T>(rnd, minValue, maxValue);
    100 }
    101 
    102 // Filter is a function that returns true if a value should pass, false otherwise.
    103 template<typename T, typename FilterT>
    104 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
    105 {
    106 	T* const typedPtr = (T*)dst;
    107 	T value;
    108 	for (int ndx = 0; ndx < numValues; ndx++)
    109 	{
    110 		do
    111 			value = randomScalar<T>(rnd, minValue, maxValue);
    112 		while (!filter(value));
    113 
    114 		typedPtr[offset + ndx] = value;
    115 	}
    116 }
    117 
    118 // Gets a 64-bit integer with a more logarithmic distribution
    119 deInt64 randomInt64LogDistributed (de::Random& rnd)
    120 {
    121 	deInt64 val = rnd.getUint64();
    122 	val &= (1ull << rnd.getInt(1, 63)) - 1;
    123 	if (rnd.getBool())
    124 		val = -val;
    125 	return val;
    126 }
    127 
    128 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
    129 {
    130 	for (int ndx = 0; ndx < numValues; ndx++)
    131 		dst[ndx] = randomInt64LogDistributed(rnd);
    132 }
    133 
    134 template<typename FilterT>
    135 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
    136 {
    137 	for (int ndx = 0; ndx < numValues; ndx++)
    138 	{
    139 		deInt64 value;
    140 		do {
    141 			value = randomInt64LogDistributed(rnd);
    142 		} while (!filter(value));
    143 		dst[ndx] = value;
    144 	}
    145 }
    146 
    147 inline bool filterNonNegative (const deInt64 value)
    148 {
    149 	return value >= 0;
    150 }
    151 
    152 inline bool filterPositive (const deInt64 value)
    153 {
    154 	return value > 0;
    155 }
    156 
    157 inline bool filterNotZero (const deInt64 value)
    158 {
    159 	return value != 0;
    160 }
    161 
    162 static void floorAll (vector<float>& values)
    163 {
    164 	for (size_t i = 0; i < values.size(); i++)
    165 		values[i] = deFloatFloor(values[i]);
    166 }
    167 
    168 static void floorAll (vector<Vec4>& values)
    169 {
    170 	for (size_t i = 0; i < values.size(); i++)
    171 		values[i] = floor(values[i]);
    172 }
    173 
    174 struct CaseParameter
    175 {
    176 	const char*		name;
    177 	string			param;
    178 
    179 	CaseParameter	(const char* case_, const string& param_) : name(case_), param(param_) {}
    180 };
    181 
    182 // Assembly code used for testing OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
    183 //
    184 // #version 430
    185 //
    186 // layout(std140, set = 0, binding = 0) readonly buffer Input {
    187 //   float elements[];
    188 // } input_data;
    189 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
    190 //   float elements[];
    191 // } output_data;
    192 //
    193 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
    194 //
    195 // void main() {
    196 //   uint x = gl_GlobalInvocationID.x;
    197 //   output_data.elements[x] = -input_data.elements[x];
    198 // }
    199 
    200 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
    201 {
    202 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
    203 	ComputeShaderSpec				spec;
    204 	de::Random						rnd				(deStringHash(group->getName()));
    205 	const int						numElements		= 100;
    206 	vector<float>					positiveFloats	(numElements, 0);
    207 	vector<float>					negativeFloats	(numElements, 0);
    208 
    209 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
    210 
    211 	for (size_t ndx = 0; ndx < numElements; ++ndx)
    212 		negativeFloats[ndx] = -positiveFloats[ndx];
    213 
    214 	spec.assembly =
    215 		string(getComputeAsmShaderPreamble()) +
    216 
    217 		"OpSource GLSL 430\n"
    218 		"OpName %main           \"main\"\n"
    219 		"OpName %id             \"gl_GlobalInvocationID\"\n"
    220 
    221 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
    222 
    223 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
    224 
    225 		+ string(getComputeAsmInputOutputBuffer()) +
    226 
    227 		"%id        = OpVariable %uvec3ptr Input\n"
    228 		"%zero      = OpConstant %i32 0\n"
    229 
    230 		"%main      = OpFunction %void None %voidf\n"
    231 		"%label     = OpLabel\n"
    232 		"%idval     = OpLoad %uvec3 %id\n"
    233 		"%x         = OpCompositeExtract %u32 %idval 0\n"
    234 
    235 		"             OpNop\n" // Inside a function body
    236 
    237 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
    238 		"%inval     = OpLoad %f32 %inloc\n"
    239 		"%neg       = OpFNegate %f32 %inval\n"
    240 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
    241 		"             OpStore %outloc %neg\n"
    242 		"             OpReturn\n"
    243 		"             OpFunctionEnd\n";
    244 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
    245 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
    246 	spec.numWorkGroups = IVec3(numElements, 1, 1);
    247 
    248 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
    249 
    250 	return group.release();
    251 }
    252 
    253 bool compareFUnord (const std::vector<BufferSp>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog& log)
    254 {
    255 	if (outputAllocs.size() != 1)
    256 		return false;
    257 
    258 	vector<deUint8>	input1Bytes;
    259 	vector<deUint8>	input2Bytes;
    260 	vector<deUint8>	expectedBytes;
    261 
    262 	inputs[0]->getBytes(input1Bytes);
    263 	inputs[1]->getBytes(input2Bytes);
    264 	expectedOutputs[0]->getBytes(expectedBytes);
    265 
    266 	const deInt32* const	expectedOutputAsInt		= reinterpret_cast<const deInt32* const>(&expectedBytes.front());
    267 	const deInt32* const	outputAsInt				= static_cast<const deInt32* const>(outputAllocs[0]->getHostPtr());
    268 	const float* const		input1AsFloat			= reinterpret_cast<const float* const>(&input1Bytes.front());
    269 	const float* const		input2AsFloat			= reinterpret_cast<const float* const>(&input2Bytes.front());
    270 	bool returnValue								= true;
    271 
    272 	for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
    273 	{
    274 		if (outputAsInt[idx] != expectedOutputAsInt[idx])
    275 		{
    276 			log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
    277 			returnValue = false;
    278 		}
    279 	}
    280 	return returnValue;
    281 }
    282 
    283 typedef VkBool32 (*compareFuncType) (float, float);
    284 
    285 struct OpFUnordCase
    286 {
    287 	const char*		name;
    288 	const char*		opCode;
    289 	compareFuncType	compareFunc;
    290 
    291 					OpFUnordCase			(const char* _name, const char* _opCode, compareFuncType _compareFunc)
    292 						: name				(_name)
    293 						, opCode			(_opCode)
    294 						, compareFunc		(_compareFunc) {}
    295 };
    296 
    297 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
    298 do { \
    299     struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
    300     cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
    301 } while (deGetFalse())
    302 
    303 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx)
    304 {
    305 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opfunord", "Test the OpFUnord* opcodes"));
    306 	de::Random						rnd				(deStringHash(group->getName()));
    307 	const int						numElements		= 100;
    308 	vector<OpFUnordCase>			cases;
    309 
    310 	const StringTemplate			shaderTemplate	(
    311 
    312 		string(getComputeAsmShaderPreamble()) +
    313 
    314 		"OpSource GLSL 430\n"
    315 		"OpName %main           \"main\"\n"
    316 		"OpName %id             \"gl_GlobalInvocationID\"\n"
    317 
    318 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
    319 
    320 		"OpDecorate %buf BufferBlock\n"
    321 		"OpDecorate %buf2 BufferBlock\n"
    322 		"OpDecorate %indata1 DescriptorSet 0\n"
    323 		"OpDecorate %indata1 Binding 0\n"
    324 		"OpDecorate %indata2 DescriptorSet 0\n"
    325 		"OpDecorate %indata2 Binding 1\n"
    326 		"OpDecorate %outdata DescriptorSet 0\n"
    327 		"OpDecorate %outdata Binding 2\n"
    328 		"OpDecorate %f32arr ArrayStride 4\n"
    329 		"OpDecorate %i32arr ArrayStride 4\n"
    330 		"OpMemberDecorate %buf 0 Offset 0\n"
    331 		"OpMemberDecorate %buf2 0 Offset 0\n"
    332 
    333 		+ string(getComputeAsmCommonTypes()) +
    334 
    335 		"%buf        = OpTypeStruct %f32arr\n"
    336 		"%bufptr     = OpTypePointer Uniform %buf\n"
    337 		"%indata1    = OpVariable %bufptr Uniform\n"
    338 		"%indata2    = OpVariable %bufptr Uniform\n"
    339 
    340 		"%buf2       = OpTypeStruct %i32arr\n"
    341 		"%buf2ptr    = OpTypePointer Uniform %buf2\n"
    342 		"%outdata    = OpVariable %buf2ptr Uniform\n"
    343 
    344 		"%id        = OpVariable %uvec3ptr Input\n"
    345 		"%zero      = OpConstant %i32 0\n"
    346 		"%consti1   = OpConstant %i32 1\n"
    347 		"%constf1   = OpConstant %f32 1.0\n"
    348 
    349 		"%main      = OpFunction %void None %voidf\n"
    350 		"%label     = OpLabel\n"
    351 		"%idval     = OpLoad %uvec3 %id\n"
    352 		"%x         = OpCompositeExtract %u32 %idval 0\n"
    353 
    354 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
    355 		"%inval1    = OpLoad %f32 %inloc1\n"
    356 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
    357 		"%inval2    = OpLoad %f32 %inloc2\n"
    358 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
    359 
    360 		"%result    = ${OPCODE} %bool %inval1 %inval2\n"
    361 		"%int_res   = OpSelect %i32 %result %consti1 %zero\n"
    362 		"             OpStore %outloc %int_res\n"
    363 
    364 		"             OpReturn\n"
    365 		"             OpFunctionEnd\n");
    366 
    367 	ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
    368 	ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
    369 	ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
    370 	ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
    371 	ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
    372 	ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
    373 
    374 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
    375 	{
    376 		map<string, string>			specializations;
    377 		ComputeShaderSpec			spec;
    378 		const float					NaN				= std::numeric_limits<float>::quiet_NaN();
    379 		vector<float>				inputFloats1	(numElements, 0);
    380 		vector<float>				inputFloats2	(numElements, 0);
    381 		vector<deInt32>				expectedInts	(numElements, 0);
    382 
    383 		specializations["OPCODE"]	= cases[caseNdx].opCode;
    384 		spec.assembly				= shaderTemplate.specialize(specializations);
    385 
    386 		fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
    387 		for (size_t ndx = 0; ndx < numElements; ++ndx)
    388 		{
    389 			switch (ndx % 6)
    390 			{
    391 				case 0:		inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
    392 				case 1:		inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
    393 				case 2:		inputFloats2[ndx] = inputFloats1[ndx]; break;
    394 				case 3:		inputFloats2[ndx] = NaN; break;
    395 				case 4:		inputFloats2[ndx] = inputFloats1[ndx];	inputFloats1[ndx] = NaN; break;
    396 				case 5:		inputFloats2[ndx] = NaN;				inputFloats1[ndx] = NaN; break;
    397 			}
    398 			expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
    399 		}
    400 
    401 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
    402 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
    403 		spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
    404 		spec.numWorkGroups = IVec3(numElements, 1, 1);
    405 		spec.verifyIO = &compareFUnord;
    406 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
    407 	}
    408 
    409 	return group.release();
    410 }
    411 
    412 struct OpAtomicCase
    413 {
    414 	const char*		name;
    415 	const char*		assembly;
    416 	OpAtomicType	opAtomic;
    417 	deInt32			numOutputElements;
    418 
    419 					OpAtomicCase			(const char* _name, const char* _assembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
    420 						: name				(_name)
    421 						, assembly			(_assembly)
    422 						, opAtomic			(_opAtomic)
    423 						, numOutputElements	(_numOutputElements) {}
    424 };
    425 
    426 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer)
    427 {
    428 	de::MovePtr<tcu::TestCaseGroup>	group				(new tcu::TestCaseGroup(testCtx,
    429 																				useStorageBuffer ? "opatomic_storage_buffer" : "opatomic",
    430 																				"Test the OpAtomic* opcodes"));
    431 	const int						numElements			= 65535;
    432 	vector<OpAtomicCase>			cases;
    433 
    434 	const StringTemplate			shaderTemplate	(
    435 
    436 		string("OpCapability Shader\n") +
    437 		(useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
    438 		"OpMemoryModel Logical GLSL450\n"
    439 		"OpEntryPoint GLCompute %main \"main\" %id\n"
    440 		"OpExecutionMode %main LocalSize 1 1 1\n" +
    441 
    442 		"OpSource GLSL 430\n"
    443 		"OpName %main           \"main\"\n"
    444 		"OpName %id             \"gl_GlobalInvocationID\"\n"
    445 
    446 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
    447 
    448 		"OpDecorate %buf ${BLOCK_DECORATION}\n"
    449 		"OpDecorate %indata DescriptorSet 0\n"
    450 		"OpDecorate %indata Binding 0\n"
    451 		"OpDecorate %i32arr ArrayStride 4\n"
    452 		"OpMemberDecorate %buf 0 Offset 0\n"
    453 
    454 		"OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
    455 		"OpDecorate %sum DescriptorSet 0\n"
    456 		"OpDecorate %sum Binding 1\n"
    457 		"OpMemberDecorate %sumbuf 0 Coherent\n"
    458 		"OpMemberDecorate %sumbuf 0 Offset 0\n"
    459 
    460 		+ getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
    461 
    462 		"%buf       = OpTypeStruct %i32arr\n"
    463 		"%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
    464 		"%indata    = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
    465 
    466 		"%sumbuf    = OpTypeStruct %i32arr\n"
    467 		"%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
    468 		"%sum       = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
    469 
    470 		"%id        = OpVariable %uvec3ptr Input\n"
    471 		"%minusone  = OpConstant %i32 -1\n"
    472 		"%zero      = OpConstant %i32 0\n"
    473 		"%one       = OpConstant %u32 1\n"
    474 		"%two       = OpConstant %i32 2\n"
    475 
    476 		"%main      = OpFunction %void None %voidf\n"
    477 		"%label     = OpLabel\n"
    478 		"%idval     = OpLoad %uvec3 %id\n"
    479 		"%x         = OpCompositeExtract %u32 %idval 0\n"
    480 
    481 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
    482 		"%inval     = OpLoad %i32 %inloc\n"
    483 
    484 		"%outloc    = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
    485 		"${INSTRUCTION}"
    486 
    487 		"             OpReturn\n"
    488 		"             OpFunctionEnd\n");
    489 
    490 	#define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
    491 	do { \
    492 		DE_STATIC_ASSERT((NUM_OUTPUT_ELEMENTS) == 1 || (NUM_OUTPUT_ELEMENTS) == numElements); \
    493 		cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
    494 	} while (deGetFalse())
    495 	#define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, 1)
    496 	#define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, numElements)
    497 
    498 	ADD_OPATOMIC_CASE_1(iadd,	"%unused    = OpAtomicIAdd %i32 %outloc %one %zero %inval\n", OPATOMIC_IADD );
    499 	ADD_OPATOMIC_CASE_1(isub,	"%unused    = OpAtomicISub %i32 %outloc %one %zero %inval\n", OPATOMIC_ISUB );
    500 	ADD_OPATOMIC_CASE_1(iinc,	"%unused    = OpAtomicIIncrement %i32 %outloc %one %zero\n",  OPATOMIC_IINC );
    501 	ADD_OPATOMIC_CASE_1(idec,	"%unused    = OpAtomicIDecrement %i32 %outloc %one %zero\n",  OPATOMIC_IDEC );
    502 	ADD_OPATOMIC_CASE_N(load,	"%inval2    = OpAtomicLoad %i32 %inloc %zero %zero\n"
    503 								"             OpStore %outloc %inval2\n",  OPATOMIC_LOAD );
    504 	ADD_OPATOMIC_CASE_N(store,	"             OpAtomicStore %outloc %zero %zero %inval\n",  OPATOMIC_STORE );
    505 	ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
    506 								"             OpStore %outloc %even\n"
    507 								"%unused    = OpAtomicCompareExchange %i32 %outloc %one %zero %zero %minusone %zero\n",  OPATOMIC_COMPEX );
    508 
    509 	#undef ADD_OPATOMIC_CASE
    510 	#undef ADD_OPATOMIC_CASE_1
    511 	#undef ADD_OPATOMIC_CASE_N
    512 
    513 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
    514 	{
    515 		map<string, string>			specializations;
    516 		ComputeShaderSpec			spec;
    517 		vector<deInt32>				inputInts		(numElements, 0);
    518 		vector<deInt32>				expected		(cases[caseNdx].numOutputElements, -1);
    519 
    520 		specializations["INDEX"]				= (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
    521 		specializations["INSTRUCTION"]			= cases[caseNdx].assembly;
    522 		specializations["BLOCK_DECORATION"]		= useStorageBuffer ? "Block" : "BufferBlock";
    523 		specializations["BLOCK_POINTER_TYPE"]	= useStorageBuffer ? "StorageBuffer" : "Uniform";
    524 		spec.assembly							= shaderTemplate.specialize(specializations);
    525 
    526 		if (useStorageBuffer)
    527 			spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
    528 
    529 		spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
    530 		spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
    531 		spec.numWorkGroups = IVec3(numElements, 1, 1);
    532 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
    533 	}
    534 
    535 	return group.release();
    536 }
    537 
    538 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
    539 {
    540 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
    541 	ComputeShaderSpec				spec;
    542 	de::Random						rnd				(deStringHash(group->getName()));
    543 	const int						numElements		= 100;
    544 	vector<float>					positiveFloats	(numElements, 0);
    545 	vector<float>					negativeFloats	(numElements, 0);
    546 
    547 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
    548 
    549 	for (size_t ndx = 0; ndx < numElements; ++ndx)
    550 		negativeFloats[ndx] = -positiveFloats[ndx];
    551 
    552 	spec.assembly =
    553 		string(getComputeAsmShaderPreamble()) +
    554 
    555 		"%fname1 = OpString \"negateInputs.comp\"\n"
    556 		"%fname2 = OpString \"negateInputs\"\n"
    557 
    558 		"OpSource GLSL 430\n"
    559 		"OpName %main           \"main\"\n"
    560 		"OpName %id             \"gl_GlobalInvocationID\"\n"
    561 
    562 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
    563 
    564 		+ string(getComputeAsmInputOutputBufferTraits()) +
    565 
    566 		"OpLine %fname1 0 0\n" // At the earliest possible position
    567 
    568 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
    569 
    570 		"OpLine %fname1 0 1\n" // Multiple OpLines in sequence
    571 		"OpLine %fname2 1 0\n" // Different filenames
    572 		"OpLine %fname1 1000 100000\n"
    573 
    574 		"%id        = OpVariable %uvec3ptr Input\n"
    575 		"%zero      = OpConstant %i32 0\n"
    576 
    577 		"OpLine %fname1 1 1\n" // Before a function
    578 
    579 		"%main      = OpFunction %void None %voidf\n"
    580 		"%label     = OpLabel\n"
    581 
    582 		"OpLine %fname1 1 1\n" // In a function
    583 
    584 		"%idval     = OpLoad %uvec3 %id\n"
    585 		"%x         = OpCompositeExtract %u32 %idval 0\n"
    586 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
    587 		"%inval     = OpLoad %f32 %inloc\n"
    588 		"%neg       = OpFNegate %f32 %inval\n"
    589 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
    590 		"             OpStore %outloc %neg\n"
    591 		"             OpReturn\n"
    592 		"             OpFunctionEnd\n";
    593 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
    594 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
    595 	spec.numWorkGroups = IVec3(numElements, 1, 1);
    596 
    597 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
    598 
    599 	return group.release();
    600 }
    601 
    602 bool veryfiBinaryShader (const ProgramBinary& binary)
    603 {
    604 	const size_t	paternCount			= 3u;
    605 	bool paternsCheck[paternCount]		=
    606 	{
    607 		false, false, false
    608 	};
    609 	const string patersns[paternCount]	=
    610 	{
    611 		"VULKAN CTS",
    612 		"Negative values",
    613 		"Date: 2017/09/21"
    614 	};
    615 	size_t			paternNdx		= 0u;
    616 
    617 	for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
    618 	{
    619 		if (false == paternsCheck[paternNdx] &&
    620 			patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
    621 			deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
    622 		{
    623 			paternsCheck[paternNdx]= true;
    624 			paternNdx++;
    625 			if (paternNdx == paternCount)
    626 				break;
    627 		}
    628 	}
    629 
    630 	for (size_t ndx = 0u; ndx < paternCount; ++ndx)
    631 	{
    632 		if (!paternsCheck[ndx])
    633 			return false;
    634 	}
    635 
    636 	return true;
    637 }
    638 
    639 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
    640 {
    641 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
    642 	ComputeShaderSpec				spec;
    643 	de::Random						rnd				(deStringHash(group->getName()));
    644 	const int						numElements		= 10;
    645 	vector<float>					positiveFloats	(numElements, 0);
    646 	vector<float>					negativeFloats	(numElements, 0);
    647 
    648 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
    649 
    650 	for (size_t ndx = 0; ndx < numElements; ++ndx)
    651 		negativeFloats[ndx] = -positiveFloats[ndx];
    652 
    653 	spec.assembly =
    654 		string(getComputeAsmShaderPreamble()) +
    655 		"%fname = OpString \"negateInputs.comp\"\n"
    656 
    657 		"OpSource GLSL 430\n"
    658 		"OpName %main           \"main\"\n"
    659 		"OpName %id             \"gl_GlobalInvocationID\"\n"
    660 		"OpModuleProcessed \"VULKAN CTS\"\n"					//OpModuleProcessed;
    661 		"OpModuleProcessed \"Negative values\"\n"
    662 		"OpModuleProcessed \"Date: 2017/09/21\"\n"
    663 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
    664 
    665 		+ string(getComputeAsmInputOutputBufferTraits())
    666 
    667 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
    668 
    669 		"OpLine %fname 0 1\n"
    670 
    671 		"OpLine %fname 1000 1\n"
    672 
    673 		"%id        = OpVariable %uvec3ptr Input\n"
    674 		"%zero      = OpConstant %i32 0\n"
    675 		"%main      = OpFunction %void None %voidf\n"
    676 
    677 		"%label     = OpLabel\n"
    678 		"%idval     = OpLoad %uvec3 %id\n"
    679 		"%x         = OpCompositeExtract %u32 %idval 0\n"
    680 
    681 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
    682 		"%inval     = OpLoad %f32 %inloc\n"
    683 		"%neg       = OpFNegate %f32 %inval\n"
    684 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
    685 		"             OpStore %outloc %neg\n"
    686 		"             OpReturn\n"
    687 		"             OpFunctionEnd\n";
    688 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
    689 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
    690 	spec.numWorkGroups = IVec3(numElements, 1, 1);
    691 	spec.verifyBinary = veryfiBinaryShader;
    692 	spec.spirvVersion = SPIRV_VERSION_1_3;
    693 
    694 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpModuleProcessed Tests", spec));
    695 
    696 	return group.release();
    697 }
    698 
    699 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
    700 {
    701 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
    702 	ComputeShaderSpec				spec;
    703 	de::Random						rnd				(deStringHash(group->getName()));
    704 	const int						numElements		= 100;
    705 	vector<float>					positiveFloats	(numElements, 0);
    706 	vector<float>					negativeFloats	(numElements, 0);
    707 
    708 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
    709 
    710 	for (size_t ndx = 0; ndx < numElements; ++ndx)
    711 		negativeFloats[ndx] = -positiveFloats[ndx];
    712 
    713 	spec.assembly =
    714 		string(getComputeAsmShaderPreamble()) +
    715 
    716 		"%fname = OpString \"negateInputs.comp\"\n"
    717 
    718 		"OpSource GLSL 430\n"
    719 		"OpName %main           \"main\"\n"
    720 		"OpName %id             \"gl_GlobalInvocationID\"\n"
    721 
    722 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
    723 
    724 		+ string(getComputeAsmInputOutputBufferTraits()) +
    725 
    726 		"OpNoLine\n" // At the earliest possible position, without preceding OpLine
    727 
    728 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
    729 
    730 		"OpLine %fname 0 1\n"
    731 		"OpNoLine\n" // Immediately following a preceding OpLine
    732 
    733 		"OpLine %fname 1000 1\n"
    734 
    735 		"%id        = OpVariable %uvec3ptr Input\n"
    736 		"%zero      = OpConstant %i32 0\n"
    737 
    738 		"OpNoLine\n" // Contents after the previous OpLine
    739 
    740 		"%main      = OpFunction %void None %voidf\n"
    741 		"%label     = OpLabel\n"
    742 		"%idval     = OpLoad %uvec3 %id\n"
    743 		"%x         = OpCompositeExtract %u32 %idval 0\n"
    744 
    745 		"OpNoLine\n" // Multiple OpNoLine
    746 		"OpNoLine\n"
    747 		"OpNoLine\n"
    748 
    749 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
    750 		"%inval     = OpLoad %f32 %inloc\n"
    751 		"%neg       = OpFNegate %f32 %inval\n"
    752 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
    753 		"             OpStore %outloc %neg\n"
    754 		"             OpReturn\n"
    755 		"             OpFunctionEnd\n";
    756 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
    757 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
    758 	spec.numWorkGroups = IVec3(numElements, 1, 1);
    759 
    760 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
    761 
    762 	return group.release();
    763 }
    764 
    765 // Compare instruction for the contraction compute case.
    766 // Returns true if the output is what is expected from the test case.
    767 bool compareNoContractCase(const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
    768 {
    769 	if (outputAllocs.size() != 1)
    770 		return false;
    771 
    772 	// Only size is needed because we are not comparing the exact values.
    773 	size_t byteSize = expectedOutputs[0]->getByteSize();
    774 
    775 	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
    776 
    777 	for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
    778 		if (outputAsFloat[i] != 0.f &&
    779 			outputAsFloat[i] != -ldexp(1, -24)) {
    780 			return false;
    781 		}
    782 	}
    783 
    784 	return true;
    785 }
    786 
    787 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
    788 {
    789 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
    790 	vector<CaseParameter>			cases;
    791 	const int						numElements		= 100;
    792 	vector<float>					inputFloats1	(numElements, 0);
    793 	vector<float>					inputFloats2	(numElements, 0);
    794 	vector<float>					outputFloats	(numElements, 0);
    795 	const StringTemplate			shaderTemplate	(
    796 		string(getComputeAsmShaderPreamble()) +
    797 
    798 		"OpName %main           \"main\"\n"
    799 		"OpName %id             \"gl_GlobalInvocationID\"\n"
    800 
    801 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
    802 
    803 		"${DECORATION}\n"
    804 
    805 		"OpDecorate %buf BufferBlock\n"
    806 		"OpDecorate %indata1 DescriptorSet 0\n"
    807 		"OpDecorate %indata1 Binding 0\n"
    808 		"OpDecorate %indata2 DescriptorSet 0\n"
    809 		"OpDecorate %indata2 Binding 1\n"
    810 		"OpDecorate %outdata DescriptorSet 0\n"
    811 		"OpDecorate %outdata Binding 2\n"
    812 		"OpDecorate %f32arr ArrayStride 4\n"
    813 		"OpMemberDecorate %buf 0 Offset 0\n"
    814 
    815 		+ string(getComputeAsmCommonTypes()) +
    816 
    817 		"%buf        = OpTypeStruct %f32arr\n"
    818 		"%bufptr     = OpTypePointer Uniform %buf\n"
    819 		"%indata1    = OpVariable %bufptr Uniform\n"
    820 		"%indata2    = OpVariable %bufptr Uniform\n"
    821 		"%outdata    = OpVariable %bufptr Uniform\n"
    822 
    823 		"%id         = OpVariable %uvec3ptr Input\n"
    824 		"%zero       = OpConstant %i32 0\n"
    825 		"%c_f_m1     = OpConstant %f32 -1.\n"
    826 
    827 		"%main       = OpFunction %void None %voidf\n"
    828 		"%label      = OpLabel\n"
    829 		"%idval      = OpLoad %uvec3 %id\n"
    830 		"%x          = OpCompositeExtract %u32 %idval 0\n"
    831 		"%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
    832 		"%inval1     = OpLoad %f32 %inloc1\n"
    833 		"%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
    834 		"%inval2     = OpLoad %f32 %inloc2\n"
    835 		"%mul        = OpFMul %f32 %inval1 %inval2\n"
    836 		"%add        = OpFAdd %f32 %mul %c_f_m1\n"
    837 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
    838 		"              OpStore %outloc %add\n"
    839 		"              OpReturn\n"
    840 		"              OpFunctionEnd\n");
    841 
    842 	cases.push_back(CaseParameter("multiplication",	"OpDecorate %mul NoContraction"));
    843 	cases.push_back(CaseParameter("addition",		"OpDecorate %add NoContraction"));
    844 	cases.push_back(CaseParameter("both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
    845 
    846 	for (size_t ndx = 0; ndx < numElements; ++ndx)
    847 	{
    848 		inputFloats1[ndx]	= 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
    849 		inputFloats2[ndx]	= 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
    850 		// Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
    851 		// conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
    852 		// So the final result will be 0.f or 0x1p-24.
    853 		// If the operation is combined into a precise fused multiply-add, then the result would be
    854 		// 2^-46 (0xa8800000).
    855 		outputFloats[ndx]	= 0.f;
    856 	}
    857 
    858 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
    859 	{
    860 		map<string, string>		specializations;
    861 		ComputeShaderSpec		spec;
    862 
    863 		specializations["DECORATION"] = cases[caseNdx].param;
    864 		spec.assembly = shaderTemplate.specialize(specializations);
    865 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
    866 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
    867 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
    868 		spec.numWorkGroups = IVec3(numElements, 1, 1);
    869 		// Check against the two possible answers based on rounding mode.
    870 		spec.verifyIO = &compareNoContractCase;
    871 
    872 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
    873 	}
    874 	return group.release();
    875 }
    876 
    877 bool compareFRem(const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
    878 {
    879 	if (outputAllocs.size() != 1)
    880 		return false;
    881 
    882 	vector<deUint8>	expectedBytes;
    883 	expectedOutputs[0]->getBytes(expectedBytes);
    884 
    885 	const float*	expectedOutputAsFloat	= reinterpret_cast<const float*>(&expectedBytes.front());
    886 	const float*	outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
    887 
    888 	for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
    889 	{
    890 		const float f0 = expectedOutputAsFloat[idx];
    891 		const float f1 = outputAsFloat[idx];
    892 		// \todo relative error needs to be fairly high because FRem may be implemented as
    893 		// (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
    894 		if (deFloatAbs((f1 - f0) / f0) > 0.02)
    895 			return false;
    896 	}
    897 
    898 	return true;
    899 }
    900 
    901 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
    902 {
    903 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
    904 	ComputeShaderSpec				spec;
    905 	de::Random						rnd				(deStringHash(group->getName()));
    906 	const int						numElements		= 200;
    907 	vector<float>					inputFloats1	(numElements, 0);
    908 	vector<float>					inputFloats2	(numElements, 0);
    909 	vector<float>					outputFloats	(numElements, 0);
    910 
    911 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
    912 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
    913 
    914 	for (size_t ndx = 0; ndx < numElements; ++ndx)
    915 	{
    916 		// Guard against divisors near zero.
    917 		if (std::fabs(inputFloats2[ndx]) < 1e-3)
    918 			inputFloats2[ndx] = 8.f;
    919 
    920 		// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
    921 		outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
    922 	}
    923 
    924 	spec.assembly =
    925 		string(getComputeAsmShaderPreamble()) +
    926 
    927 		"OpName %main           \"main\"\n"
    928 		"OpName %id             \"gl_GlobalInvocationID\"\n"
    929 
    930 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
    931 
    932 		"OpDecorate %buf BufferBlock\n"
    933 		"OpDecorate %indata1 DescriptorSet 0\n"
    934 		"OpDecorate %indata1 Binding 0\n"
    935 		"OpDecorate %indata2 DescriptorSet 0\n"
    936 		"OpDecorate %indata2 Binding 1\n"
    937 		"OpDecorate %outdata DescriptorSet 0\n"
    938 		"OpDecorate %outdata Binding 2\n"
    939 		"OpDecorate %f32arr ArrayStride 4\n"
    940 		"OpMemberDecorate %buf 0 Offset 0\n"
    941 
    942 		+ string(getComputeAsmCommonTypes()) +
    943 
    944 		"%buf        = OpTypeStruct %f32arr\n"
    945 		"%bufptr     = OpTypePointer Uniform %buf\n"
    946 		"%indata1    = OpVariable %bufptr Uniform\n"
    947 		"%indata2    = OpVariable %bufptr Uniform\n"
    948 		"%outdata    = OpVariable %bufptr Uniform\n"
    949 
    950 		"%id        = OpVariable %uvec3ptr Input\n"
    951 		"%zero      = OpConstant %i32 0\n"
    952 
    953 		"%main      = OpFunction %void None %voidf\n"
    954 		"%label     = OpLabel\n"
    955 		"%idval     = OpLoad %uvec3 %id\n"
    956 		"%x         = OpCompositeExtract %u32 %idval 0\n"
    957 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
    958 		"%inval1    = OpLoad %f32 %inloc1\n"
    959 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
    960 		"%inval2    = OpLoad %f32 %inloc2\n"
    961 		"%rem       = OpFRem %f32 %inval1 %inval2\n"
    962 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
    963 		"             OpStore %outloc %rem\n"
    964 		"             OpReturn\n"
    965 		"             OpFunctionEnd\n";
    966 
    967 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
    968 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
    969 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
    970 	spec.numWorkGroups = IVec3(numElements, 1, 1);
    971 	spec.verifyIO = &compareFRem;
    972 
    973 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
    974 
    975 	return group.release();
    976 }
    977 
    978 bool compareNMin (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
    979 {
    980 	if (outputAllocs.size() != 1)
    981 		return false;
    982 
    983 	const BufferSp&			expectedOutput			(expectedOutputs[0]);
    984 	std::vector<deUint8>	data;
    985 	expectedOutput->getBytes(data);
    986 
    987 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
    988 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
    989 
    990 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
    991 	{
    992 		const float f0 = expectedOutputAsFloat[idx];
    993 		const float f1 = outputAsFloat[idx];
    994 
    995 		// For NMin, we accept NaN as output if both inputs were NaN.
    996 		// Otherwise the NaN is the wrong choise, as on architectures that
    997 		// do not handle NaN, those are huge values.
    998 		if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
    999 			return false;
   1000 	}
   1001 
   1002 	return true;
   1003 }
   1004 
   1005 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
   1006 {
   1007 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
   1008 	ComputeShaderSpec				spec;
   1009 	de::Random						rnd				(deStringHash(group->getName()));
   1010 	const int						numElements		= 200;
   1011 	vector<float>					inputFloats1	(numElements, 0);
   1012 	vector<float>					inputFloats2	(numElements, 0);
   1013 	vector<float>					outputFloats	(numElements, 0);
   1014 
   1015 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
   1016 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
   1017 
   1018 	// Make the first case a full-NAN case.
   1019 	inputFloats1[0] = TCU_NAN;
   1020 	inputFloats2[0] = TCU_NAN;
   1021 
   1022 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   1023 	{
   1024 		// By default, pick the smallest
   1025 		outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
   1026 
   1027 		// Make half of the cases NaN cases
   1028 		if ((ndx & 1) == 0)
   1029 		{
   1030 			// Alternate between the NaN operand
   1031 			if ((ndx & 2) == 0)
   1032 			{
   1033 				outputFloats[ndx] = inputFloats2[ndx];
   1034 				inputFloats1[ndx] = TCU_NAN;
   1035 			}
   1036 			else
   1037 			{
   1038 				outputFloats[ndx] = inputFloats1[ndx];
   1039 				inputFloats2[ndx] = TCU_NAN;
   1040 			}
   1041 		}
   1042 	}
   1043 
   1044 	spec.assembly =
   1045 		"OpCapability Shader\n"
   1046 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
   1047 		"OpMemoryModel Logical GLSL450\n"
   1048 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   1049 		"OpExecutionMode %main LocalSize 1 1 1\n"
   1050 
   1051 		"OpName %main           \"main\"\n"
   1052 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   1053 
   1054 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1055 
   1056 		"OpDecorate %buf BufferBlock\n"
   1057 		"OpDecorate %indata1 DescriptorSet 0\n"
   1058 		"OpDecorate %indata1 Binding 0\n"
   1059 		"OpDecorate %indata2 DescriptorSet 0\n"
   1060 		"OpDecorate %indata2 Binding 1\n"
   1061 		"OpDecorate %outdata DescriptorSet 0\n"
   1062 		"OpDecorate %outdata Binding 2\n"
   1063 		"OpDecorate %f32arr ArrayStride 4\n"
   1064 		"OpMemberDecorate %buf 0 Offset 0\n"
   1065 
   1066 		+ string(getComputeAsmCommonTypes()) +
   1067 
   1068 		"%buf        = OpTypeStruct %f32arr\n"
   1069 		"%bufptr     = OpTypePointer Uniform %buf\n"
   1070 		"%indata1    = OpVariable %bufptr Uniform\n"
   1071 		"%indata2    = OpVariable %bufptr Uniform\n"
   1072 		"%outdata    = OpVariable %bufptr Uniform\n"
   1073 
   1074 		"%id        = OpVariable %uvec3ptr Input\n"
   1075 		"%zero      = OpConstant %i32 0\n"
   1076 
   1077 		"%main      = OpFunction %void None %voidf\n"
   1078 		"%label     = OpLabel\n"
   1079 		"%idval     = OpLoad %uvec3 %id\n"
   1080 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   1081 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
   1082 		"%inval1    = OpLoad %f32 %inloc1\n"
   1083 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
   1084 		"%inval2    = OpLoad %f32 %inloc2\n"
   1085 		"%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
   1086 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   1087 		"             OpStore %outloc %rem\n"
   1088 		"             OpReturn\n"
   1089 		"             OpFunctionEnd\n";
   1090 
   1091 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
   1092 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
   1093 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   1094 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   1095 	spec.verifyIO = &compareNMin;
   1096 
   1097 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
   1098 
   1099 	return group.release();
   1100 }
   1101 
   1102 bool compareNMax (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
   1103 {
   1104 	if (outputAllocs.size() != 1)
   1105 		return false;
   1106 
   1107 	const BufferSp&			expectedOutput			= expectedOutputs[0];
   1108 	std::vector<deUint8>	data;
   1109 	expectedOutput->getBytes(data);
   1110 
   1111 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
   1112 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
   1113 
   1114 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
   1115 	{
   1116 		const float f0 = expectedOutputAsFloat[idx];
   1117 		const float f1 = outputAsFloat[idx];
   1118 
   1119 		// For NMax, NaN is considered acceptable result, since in
   1120 		// architectures that do not handle NaNs, those are huge values.
   1121 		if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
   1122 			return false;
   1123 	}
   1124 
   1125 	return true;
   1126 }
   1127 
   1128 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
   1129 {
   1130 	de::MovePtr<tcu::TestCaseGroup>	group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
   1131 	ComputeShaderSpec				spec;
   1132 	de::Random						rnd				(deStringHash(group->getName()));
   1133 	const int						numElements		= 200;
   1134 	vector<float>					inputFloats1	(numElements, 0);
   1135 	vector<float>					inputFloats2	(numElements, 0);
   1136 	vector<float>					outputFloats	(numElements, 0);
   1137 
   1138 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
   1139 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
   1140 
   1141 	// Make the first case a full-NAN case.
   1142 	inputFloats1[0] = TCU_NAN;
   1143 	inputFloats2[0] = TCU_NAN;
   1144 
   1145 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   1146 	{
   1147 		// By default, pick the biggest
   1148 		outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
   1149 
   1150 		// Make half of the cases NaN cases
   1151 		if ((ndx & 1) == 0)
   1152 		{
   1153 			// Alternate between the NaN operand
   1154 			if ((ndx & 2) == 0)
   1155 			{
   1156 				outputFloats[ndx] = inputFloats2[ndx];
   1157 				inputFloats1[ndx] = TCU_NAN;
   1158 			}
   1159 			else
   1160 			{
   1161 				outputFloats[ndx] = inputFloats1[ndx];
   1162 				inputFloats2[ndx] = TCU_NAN;
   1163 			}
   1164 		}
   1165 	}
   1166 
   1167 	spec.assembly =
   1168 		"OpCapability Shader\n"
   1169 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
   1170 		"OpMemoryModel Logical GLSL450\n"
   1171 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   1172 		"OpExecutionMode %main LocalSize 1 1 1\n"
   1173 
   1174 		"OpName %main           \"main\"\n"
   1175 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   1176 
   1177 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1178 
   1179 		"OpDecorate %buf BufferBlock\n"
   1180 		"OpDecorate %indata1 DescriptorSet 0\n"
   1181 		"OpDecorate %indata1 Binding 0\n"
   1182 		"OpDecorate %indata2 DescriptorSet 0\n"
   1183 		"OpDecorate %indata2 Binding 1\n"
   1184 		"OpDecorate %outdata DescriptorSet 0\n"
   1185 		"OpDecorate %outdata Binding 2\n"
   1186 		"OpDecorate %f32arr ArrayStride 4\n"
   1187 		"OpMemberDecorate %buf 0 Offset 0\n"
   1188 
   1189 		+ string(getComputeAsmCommonTypes()) +
   1190 
   1191 		"%buf        = OpTypeStruct %f32arr\n"
   1192 		"%bufptr     = OpTypePointer Uniform %buf\n"
   1193 		"%indata1    = OpVariable %bufptr Uniform\n"
   1194 		"%indata2    = OpVariable %bufptr Uniform\n"
   1195 		"%outdata    = OpVariable %bufptr Uniform\n"
   1196 
   1197 		"%id        = OpVariable %uvec3ptr Input\n"
   1198 		"%zero      = OpConstant %i32 0\n"
   1199 
   1200 		"%main      = OpFunction %void None %voidf\n"
   1201 		"%label     = OpLabel\n"
   1202 		"%idval     = OpLoad %uvec3 %id\n"
   1203 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   1204 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
   1205 		"%inval1    = OpLoad %f32 %inloc1\n"
   1206 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
   1207 		"%inval2    = OpLoad %f32 %inloc2\n"
   1208 		"%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
   1209 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   1210 		"             OpStore %outloc %rem\n"
   1211 		"             OpReturn\n"
   1212 		"             OpFunctionEnd\n";
   1213 
   1214 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
   1215 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
   1216 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   1217 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   1218 	spec.verifyIO = &compareNMax;
   1219 
   1220 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
   1221 
   1222 	return group.release();
   1223 }
   1224 
   1225 bool compareNClamp (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
   1226 {
   1227 	if (outputAllocs.size() != 1)
   1228 		return false;
   1229 
   1230 	const BufferSp&			expectedOutput			= expectedOutputs[0];
   1231 	std::vector<deUint8>	data;
   1232 	expectedOutput->getBytes(data);
   1233 
   1234 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
   1235 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
   1236 
   1237 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
   1238 	{
   1239 		const float e0 = expectedOutputAsFloat[idx * 2];
   1240 		const float e1 = expectedOutputAsFloat[idx * 2 + 1];
   1241 		const float res = outputAsFloat[idx];
   1242 
   1243 		// For NClamp, we have two possible outcomes based on
   1244 		// whether NaNs are handled or not.
   1245 		// If either min or max value is NaN, the result is undefined,
   1246 		// so this test doesn't stress those. If the clamped value is
   1247 		// NaN, and NaNs are handled, the result is min; if NaNs are not
   1248 		// handled, they are big values that result in max.
   1249 		// If all three parameters are NaN, the result should be NaN.
   1250 		if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
   1251 			 (deFloatAbs(e0 - res) < 0.00001f) ||
   1252 			 (deFloatAbs(e1 - res) < 0.00001f)))
   1253 			return false;
   1254 	}
   1255 
   1256 	return true;
   1257 }
   1258 
   1259 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
   1260 {
   1261 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
   1262 	ComputeShaderSpec				spec;
   1263 	de::Random						rnd				(deStringHash(group->getName()));
   1264 	const int						numElements		= 200;
   1265 	vector<float>					inputFloats1	(numElements, 0);
   1266 	vector<float>					inputFloats2	(numElements, 0);
   1267 	vector<float>					inputFloats3	(numElements, 0);
   1268 	vector<float>					outputFloats	(numElements * 2, 0);
   1269 
   1270 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
   1271 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
   1272 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
   1273 
   1274 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   1275 	{
   1276 		// Results are only defined if max value is bigger than min value.
   1277 		if (inputFloats2[ndx] > inputFloats3[ndx])
   1278 		{
   1279 			float t = inputFloats2[ndx];
   1280 			inputFloats2[ndx] = inputFloats3[ndx];
   1281 			inputFloats3[ndx] = t;
   1282 		}
   1283 
   1284 		// By default, do the clamp, setting both possible answers
   1285 		float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
   1286 
   1287 		float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
   1288 		float maxResB = maxResA;
   1289 
   1290 		// Alternate between the NaN cases
   1291 		if (ndx & 1)
   1292 		{
   1293 			inputFloats1[ndx] = TCU_NAN;
   1294 			// If NaN is handled, the result should be same as the clamp minimum.
   1295 			// If NaN is not handled, the result should clamp to the clamp maximum.
   1296 			maxResA = inputFloats2[ndx];
   1297 			maxResB = inputFloats3[ndx];
   1298 		}
   1299 		else
   1300 		{
   1301 			// Not a NaN case - only one legal result.
   1302 			maxResA = defaultRes;
   1303 			maxResB = defaultRes;
   1304 		}
   1305 
   1306 		outputFloats[ndx * 2] = maxResA;
   1307 		outputFloats[ndx * 2 + 1] = maxResB;
   1308 	}
   1309 
   1310 	// Make the first case a full-NAN case.
   1311 	inputFloats1[0] = TCU_NAN;
   1312 	inputFloats2[0] = TCU_NAN;
   1313 	inputFloats3[0] = TCU_NAN;
   1314 	outputFloats[0] = TCU_NAN;
   1315 	outputFloats[1] = TCU_NAN;
   1316 
   1317 	spec.assembly =
   1318 		"OpCapability Shader\n"
   1319 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
   1320 		"OpMemoryModel Logical GLSL450\n"
   1321 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   1322 		"OpExecutionMode %main LocalSize 1 1 1\n"
   1323 
   1324 		"OpName %main           \"main\"\n"
   1325 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   1326 
   1327 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1328 
   1329 		"OpDecorate %buf BufferBlock\n"
   1330 		"OpDecorate %indata1 DescriptorSet 0\n"
   1331 		"OpDecorate %indata1 Binding 0\n"
   1332 		"OpDecorate %indata2 DescriptorSet 0\n"
   1333 		"OpDecorate %indata2 Binding 1\n"
   1334 		"OpDecorate %indata3 DescriptorSet 0\n"
   1335 		"OpDecorate %indata3 Binding 2\n"
   1336 		"OpDecorate %outdata DescriptorSet 0\n"
   1337 		"OpDecorate %outdata Binding 3\n"
   1338 		"OpDecorate %f32arr ArrayStride 4\n"
   1339 		"OpMemberDecorate %buf 0 Offset 0\n"
   1340 
   1341 		+ string(getComputeAsmCommonTypes()) +
   1342 
   1343 		"%buf        = OpTypeStruct %f32arr\n"
   1344 		"%bufptr     = OpTypePointer Uniform %buf\n"
   1345 		"%indata1    = OpVariable %bufptr Uniform\n"
   1346 		"%indata2    = OpVariable %bufptr Uniform\n"
   1347 		"%indata3    = OpVariable %bufptr Uniform\n"
   1348 		"%outdata    = OpVariable %bufptr Uniform\n"
   1349 
   1350 		"%id        = OpVariable %uvec3ptr Input\n"
   1351 		"%zero      = OpConstant %i32 0\n"
   1352 
   1353 		"%main      = OpFunction %void None %voidf\n"
   1354 		"%label     = OpLabel\n"
   1355 		"%idval     = OpLoad %uvec3 %id\n"
   1356 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   1357 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
   1358 		"%inval1    = OpLoad %f32 %inloc1\n"
   1359 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
   1360 		"%inval2    = OpLoad %f32 %inloc2\n"
   1361 		"%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
   1362 		"%inval3    = OpLoad %f32 %inloc3\n"
   1363 		"%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
   1364 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   1365 		"             OpStore %outloc %rem\n"
   1366 		"             OpReturn\n"
   1367 		"             OpFunctionEnd\n";
   1368 
   1369 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
   1370 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
   1371 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
   1372 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   1373 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   1374 	spec.verifyIO = &compareNClamp;
   1375 
   1376 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
   1377 
   1378 	return group.release();
   1379 }
   1380 
   1381 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
   1382 {
   1383 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
   1384 	de::Random						rnd				(deStringHash(group->getName()));
   1385 	const int						numElements		= 200;
   1386 
   1387 	const struct CaseParams
   1388 	{
   1389 		const char*		name;
   1390 		const char*		failMessage;		// customized status message
   1391 		qpTestResult	failResult;			// override status on failure
   1392 		int				op1Min, op1Max;		// operand ranges
   1393 		int				op2Min, op2Max;
   1394 	} cases[] =
   1395 	{
   1396 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	0,		65536,	0,		100 },
   1397 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			-65536,	65536,	-100,	100 },	// see below
   1398 	};
   1399 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
   1400 
   1401 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
   1402 	{
   1403 		const CaseParams&	params		= cases[caseNdx];
   1404 		ComputeShaderSpec	spec;
   1405 		vector<deInt32>		inputInts1	(numElements, 0);
   1406 		vector<deInt32>		inputInts2	(numElements, 0);
   1407 		vector<deInt32>		outputInts	(numElements, 0);
   1408 
   1409 		fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
   1410 		fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
   1411 
   1412 		for (int ndx = 0; ndx < numElements; ++ndx)
   1413 		{
   1414 			// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
   1415 			outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
   1416 		}
   1417 
   1418 		spec.assembly =
   1419 			string(getComputeAsmShaderPreamble()) +
   1420 
   1421 			"OpName %main           \"main\"\n"
   1422 			"OpName %id             \"gl_GlobalInvocationID\"\n"
   1423 
   1424 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1425 
   1426 			"OpDecorate %buf BufferBlock\n"
   1427 			"OpDecorate %indata1 DescriptorSet 0\n"
   1428 			"OpDecorate %indata1 Binding 0\n"
   1429 			"OpDecorate %indata2 DescriptorSet 0\n"
   1430 			"OpDecorate %indata2 Binding 1\n"
   1431 			"OpDecorate %outdata DescriptorSet 0\n"
   1432 			"OpDecorate %outdata Binding 2\n"
   1433 			"OpDecorate %i32arr ArrayStride 4\n"
   1434 			"OpMemberDecorate %buf 0 Offset 0\n"
   1435 
   1436 			+ string(getComputeAsmCommonTypes()) +
   1437 
   1438 			"%buf        = OpTypeStruct %i32arr\n"
   1439 			"%bufptr     = OpTypePointer Uniform %buf\n"
   1440 			"%indata1    = OpVariable %bufptr Uniform\n"
   1441 			"%indata2    = OpVariable %bufptr Uniform\n"
   1442 			"%outdata    = OpVariable %bufptr Uniform\n"
   1443 
   1444 			"%id        = OpVariable %uvec3ptr Input\n"
   1445 			"%zero      = OpConstant %i32 0\n"
   1446 
   1447 			"%main      = OpFunction %void None %voidf\n"
   1448 			"%label     = OpLabel\n"
   1449 			"%idval     = OpLoad %uvec3 %id\n"
   1450 			"%x         = OpCompositeExtract %u32 %idval 0\n"
   1451 			"%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
   1452 			"%inval1    = OpLoad %i32 %inloc1\n"
   1453 			"%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
   1454 			"%inval2    = OpLoad %i32 %inloc2\n"
   1455 			"%rem       = OpSRem %i32 %inval1 %inval2\n"
   1456 			"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
   1457 			"             OpStore %outloc %rem\n"
   1458 			"             OpReturn\n"
   1459 			"             OpFunctionEnd\n";
   1460 
   1461 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts1)));
   1462 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts2)));
   1463 		spec.outputs.push_back	(BufferSp(new Int32Buffer(outputInts)));
   1464 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
   1465 		spec.failResult			= params.failResult;
   1466 		spec.failMessage		= params.failMessage;
   1467 
   1468 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
   1469 	}
   1470 
   1471 	return group.release();
   1472 }
   1473 
   1474 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
   1475 {
   1476 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
   1477 	de::Random						rnd				(deStringHash(group->getName()));
   1478 	const int						numElements		= 200;
   1479 
   1480 	const struct CaseParams
   1481 	{
   1482 		const char*		name;
   1483 		const char*		failMessage;		// customized status message
   1484 		qpTestResult	failResult;			// override status on failure
   1485 		bool			positive;
   1486 	} cases[] =
   1487 	{
   1488 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	true },
   1489 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			false },	// see below
   1490 	};
   1491 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
   1492 
   1493 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
   1494 	{
   1495 		const CaseParams&	params		= cases[caseNdx];
   1496 		ComputeShaderSpec	spec;
   1497 		vector<deInt64>		inputInts1	(numElements, 0);
   1498 		vector<deInt64>		inputInts2	(numElements, 0);
   1499 		vector<deInt64>		outputInts	(numElements, 0);
   1500 
   1501 		if (params.positive)
   1502 		{
   1503 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
   1504 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
   1505 		}
   1506 		else
   1507 		{
   1508 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
   1509 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
   1510 		}
   1511 
   1512 		for (int ndx = 0; ndx < numElements; ++ndx)
   1513 		{
   1514 			// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
   1515 			outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
   1516 		}
   1517 
   1518 		spec.assembly =
   1519 			"OpCapability Int64\n"
   1520 
   1521 			+ string(getComputeAsmShaderPreamble()) +
   1522 
   1523 			"OpName %main           \"main\"\n"
   1524 			"OpName %id             \"gl_GlobalInvocationID\"\n"
   1525 
   1526 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1527 
   1528 			"OpDecorate %buf BufferBlock\n"
   1529 			"OpDecorate %indata1 DescriptorSet 0\n"
   1530 			"OpDecorate %indata1 Binding 0\n"
   1531 			"OpDecorate %indata2 DescriptorSet 0\n"
   1532 			"OpDecorate %indata2 Binding 1\n"
   1533 			"OpDecorate %outdata DescriptorSet 0\n"
   1534 			"OpDecorate %outdata Binding 2\n"
   1535 			"OpDecorate %i64arr ArrayStride 8\n"
   1536 			"OpMemberDecorate %buf 0 Offset 0\n"
   1537 
   1538 			+ string(getComputeAsmCommonTypes())
   1539 			+ string(getComputeAsmCommonInt64Types()) +
   1540 
   1541 			"%buf        = OpTypeStruct %i64arr\n"
   1542 			"%bufptr     = OpTypePointer Uniform %buf\n"
   1543 			"%indata1    = OpVariable %bufptr Uniform\n"
   1544 			"%indata2    = OpVariable %bufptr Uniform\n"
   1545 			"%outdata    = OpVariable %bufptr Uniform\n"
   1546 
   1547 			"%id        = OpVariable %uvec3ptr Input\n"
   1548 			"%zero      = OpConstant %i64 0\n"
   1549 
   1550 			"%main      = OpFunction %void None %voidf\n"
   1551 			"%label     = OpLabel\n"
   1552 			"%idval     = OpLoad %uvec3 %id\n"
   1553 			"%x         = OpCompositeExtract %u32 %idval 0\n"
   1554 			"%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
   1555 			"%inval1    = OpLoad %i64 %inloc1\n"
   1556 			"%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
   1557 			"%inval2    = OpLoad %i64 %inloc2\n"
   1558 			"%rem       = OpSRem %i64 %inval1 %inval2\n"
   1559 			"%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
   1560 			"             OpStore %outloc %rem\n"
   1561 			"             OpReturn\n"
   1562 			"             OpFunctionEnd\n";
   1563 
   1564 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts1)));
   1565 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts2)));
   1566 		spec.outputs.push_back	(BufferSp(new Int64Buffer(outputInts)));
   1567 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
   1568 		spec.failResult			= params.failResult;
   1569 		spec.failMessage		= params.failMessage;
   1570 
   1571 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec, COMPUTE_TEST_USES_INT64));
   1572 	}
   1573 
   1574 	return group.release();
   1575 }
   1576 
   1577 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
   1578 {
   1579 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
   1580 	de::Random						rnd				(deStringHash(group->getName()));
   1581 	const int						numElements		= 200;
   1582 
   1583 	const struct CaseParams
   1584 	{
   1585 		const char*		name;
   1586 		const char*		failMessage;		// customized status message
   1587 		qpTestResult	failResult;			// override status on failure
   1588 		int				op1Min, op1Max;		// operand ranges
   1589 		int				op2Min, op2Max;
   1590 	} cases[] =
   1591 	{
   1592 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	0,		65536,	0,		100 },
   1593 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			-65536,	65536,	-100,	100 },	// see below
   1594 	};
   1595 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
   1596 
   1597 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
   1598 	{
   1599 		const CaseParams&	params		= cases[caseNdx];
   1600 
   1601 		ComputeShaderSpec	spec;
   1602 		vector<deInt32>		inputInts1	(numElements, 0);
   1603 		vector<deInt32>		inputInts2	(numElements, 0);
   1604 		vector<deInt32>		outputInts	(numElements, 0);
   1605 
   1606 		fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
   1607 		fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
   1608 
   1609 		for (int ndx = 0; ndx < numElements; ++ndx)
   1610 		{
   1611 			deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
   1612 			if (rem == 0)
   1613 			{
   1614 				outputInts[ndx] = 0;
   1615 			}
   1616 			else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
   1617 			{
   1618 				// They have the same sign
   1619 				outputInts[ndx] = rem;
   1620 			}
   1621 			else
   1622 			{
   1623 				// They have opposite sign.  The remainder operation takes the
   1624 				// sign inputInts1[ndx] but OpSMod is supposed to take ths sign
   1625 				// of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
   1626 				// the result has the correct sign and that it is still
   1627 				// congruent to inputInts1[ndx] modulo inputInts2[ndx]
   1628 				//
   1629 				// See also http://mathforum.org/library/drmath/view/52343.html
   1630 				outputInts[ndx] = rem + inputInts2[ndx];
   1631 			}
   1632 		}
   1633 
   1634 		spec.assembly =
   1635 			string(getComputeAsmShaderPreamble()) +
   1636 
   1637 			"OpName %main           \"main\"\n"
   1638 			"OpName %id             \"gl_GlobalInvocationID\"\n"
   1639 
   1640 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1641 
   1642 			"OpDecorate %buf BufferBlock\n"
   1643 			"OpDecorate %indata1 DescriptorSet 0\n"
   1644 			"OpDecorate %indata1 Binding 0\n"
   1645 			"OpDecorate %indata2 DescriptorSet 0\n"
   1646 			"OpDecorate %indata2 Binding 1\n"
   1647 			"OpDecorate %outdata DescriptorSet 0\n"
   1648 			"OpDecorate %outdata Binding 2\n"
   1649 			"OpDecorate %i32arr ArrayStride 4\n"
   1650 			"OpMemberDecorate %buf 0 Offset 0\n"
   1651 
   1652 			+ string(getComputeAsmCommonTypes()) +
   1653 
   1654 			"%buf        = OpTypeStruct %i32arr\n"
   1655 			"%bufptr     = OpTypePointer Uniform %buf\n"
   1656 			"%indata1    = OpVariable %bufptr Uniform\n"
   1657 			"%indata2    = OpVariable %bufptr Uniform\n"
   1658 			"%outdata    = OpVariable %bufptr Uniform\n"
   1659 
   1660 			"%id        = OpVariable %uvec3ptr Input\n"
   1661 			"%zero      = OpConstant %i32 0\n"
   1662 
   1663 			"%main      = OpFunction %void None %voidf\n"
   1664 			"%label     = OpLabel\n"
   1665 			"%idval     = OpLoad %uvec3 %id\n"
   1666 			"%x         = OpCompositeExtract %u32 %idval 0\n"
   1667 			"%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
   1668 			"%inval1    = OpLoad %i32 %inloc1\n"
   1669 			"%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
   1670 			"%inval2    = OpLoad %i32 %inloc2\n"
   1671 			"%rem       = OpSMod %i32 %inval1 %inval2\n"
   1672 			"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
   1673 			"             OpStore %outloc %rem\n"
   1674 			"             OpReturn\n"
   1675 			"             OpFunctionEnd\n";
   1676 
   1677 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts1)));
   1678 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts2)));
   1679 		spec.outputs.push_back	(BufferSp(new Int32Buffer(outputInts)));
   1680 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
   1681 		spec.failResult			= params.failResult;
   1682 		spec.failMessage		= params.failMessage;
   1683 
   1684 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
   1685 	}
   1686 
   1687 	return group.release();
   1688 }
   1689 
   1690 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
   1691 {
   1692 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
   1693 	de::Random						rnd				(deStringHash(group->getName()));
   1694 	const int						numElements		= 200;
   1695 
   1696 	const struct CaseParams
   1697 	{
   1698 		const char*		name;
   1699 		const char*		failMessage;		// customized status message
   1700 		qpTestResult	failResult;			// override status on failure
   1701 		bool			positive;
   1702 	} cases[] =
   1703 	{
   1704 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	true },
   1705 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			false },	// see below
   1706 	};
   1707 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
   1708 
   1709 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
   1710 	{
   1711 		const CaseParams&	params		= cases[caseNdx];
   1712 
   1713 		ComputeShaderSpec	spec;
   1714 		vector<deInt64>		inputInts1	(numElements, 0);
   1715 		vector<deInt64>		inputInts2	(numElements, 0);
   1716 		vector<deInt64>		outputInts	(numElements, 0);
   1717 
   1718 
   1719 		if (params.positive)
   1720 		{
   1721 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
   1722 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
   1723 		}
   1724 		else
   1725 		{
   1726 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
   1727 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
   1728 		}
   1729 
   1730 		for (int ndx = 0; ndx < numElements; ++ndx)
   1731 		{
   1732 			deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
   1733 			if (rem == 0)
   1734 			{
   1735 				outputInts[ndx] = 0;
   1736 			}
   1737 			else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
   1738 			{
   1739 				// They have the same sign
   1740 				outputInts[ndx] = rem;
   1741 			}
   1742 			else
   1743 			{
   1744 				// They have opposite sign.  The remainder operation takes the
   1745 				// sign inputInts1[ndx] but OpSMod is supposed to take ths sign
   1746 				// of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
   1747 				// the result has the correct sign and that it is still
   1748 				// congruent to inputInts1[ndx] modulo inputInts2[ndx]
   1749 				//
   1750 				// See also http://mathforum.org/library/drmath/view/52343.html
   1751 				outputInts[ndx] = rem + inputInts2[ndx];
   1752 			}
   1753 		}
   1754 
   1755 		spec.assembly =
   1756 			"OpCapability Int64\n"
   1757 
   1758 			+ string(getComputeAsmShaderPreamble()) +
   1759 
   1760 			"OpName %main           \"main\"\n"
   1761 			"OpName %id             \"gl_GlobalInvocationID\"\n"
   1762 
   1763 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1764 
   1765 			"OpDecorate %buf BufferBlock\n"
   1766 			"OpDecorate %indata1 DescriptorSet 0\n"
   1767 			"OpDecorate %indata1 Binding 0\n"
   1768 			"OpDecorate %indata2 DescriptorSet 0\n"
   1769 			"OpDecorate %indata2 Binding 1\n"
   1770 			"OpDecorate %outdata DescriptorSet 0\n"
   1771 			"OpDecorate %outdata Binding 2\n"
   1772 			"OpDecorate %i64arr ArrayStride 8\n"
   1773 			"OpMemberDecorate %buf 0 Offset 0\n"
   1774 
   1775 			+ string(getComputeAsmCommonTypes())
   1776 			+ string(getComputeAsmCommonInt64Types()) +
   1777 
   1778 			"%buf        = OpTypeStruct %i64arr\n"
   1779 			"%bufptr     = OpTypePointer Uniform %buf\n"
   1780 			"%indata1    = OpVariable %bufptr Uniform\n"
   1781 			"%indata2    = OpVariable %bufptr Uniform\n"
   1782 			"%outdata    = OpVariable %bufptr Uniform\n"
   1783 
   1784 			"%id        = OpVariable %uvec3ptr Input\n"
   1785 			"%zero      = OpConstant %i64 0\n"
   1786 
   1787 			"%main      = OpFunction %void None %voidf\n"
   1788 			"%label     = OpLabel\n"
   1789 			"%idval     = OpLoad %uvec3 %id\n"
   1790 			"%x         = OpCompositeExtract %u32 %idval 0\n"
   1791 			"%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
   1792 			"%inval1    = OpLoad %i64 %inloc1\n"
   1793 			"%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
   1794 			"%inval2    = OpLoad %i64 %inloc2\n"
   1795 			"%rem       = OpSMod %i64 %inval1 %inval2\n"
   1796 			"%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
   1797 			"             OpStore %outloc %rem\n"
   1798 			"             OpReturn\n"
   1799 			"             OpFunctionEnd\n";
   1800 
   1801 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts1)));
   1802 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts2)));
   1803 		spec.outputs.push_back	(BufferSp(new Int64Buffer(outputInts)));
   1804 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
   1805 		spec.failResult			= params.failResult;
   1806 		spec.failMessage		= params.failMessage;
   1807 
   1808 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec, COMPUTE_TEST_USES_INT64));
   1809 	}
   1810 
   1811 	return group.release();
   1812 }
   1813 
   1814 // Copy contents in the input buffer to the output buffer.
   1815 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
   1816 {
   1817 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
   1818 	de::Random						rnd				(deStringHash(group->getName()));
   1819 	const int						numElements		= 100;
   1820 
   1821 	// The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
   1822 	ComputeShaderSpec				spec1;
   1823 	vector<Vec4>					inputFloats1	(numElements);
   1824 	vector<Vec4>					outputFloats1	(numElements);
   1825 
   1826 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
   1827 
   1828 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
   1829 	floorAll(inputFloats1);
   1830 
   1831 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   1832 		outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
   1833 
   1834 	spec1.assembly =
   1835 		string(getComputeAsmShaderPreamble()) +
   1836 
   1837 		"OpName %main           \"main\"\n"
   1838 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   1839 
   1840 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1841 		"OpDecorate %vec4arr ArrayStride 16\n"
   1842 
   1843 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   1844 
   1845 		"%vec4       = OpTypeVector %f32 4\n"
   1846 		"%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
   1847 		"%vec4ptr_f  = OpTypePointer Function %vec4\n"
   1848 		"%vec4arr    = OpTypeRuntimeArray %vec4\n"
   1849 		"%buf        = OpTypeStruct %vec4arr\n"
   1850 		"%bufptr     = OpTypePointer Uniform %buf\n"
   1851 		"%indata     = OpVariable %bufptr Uniform\n"
   1852 		"%outdata    = OpVariable %bufptr Uniform\n"
   1853 
   1854 		"%id         = OpVariable %uvec3ptr Input\n"
   1855 		"%zero       = OpConstant %i32 0\n"
   1856 		"%c_f_0      = OpConstant %f32 0.\n"
   1857 		"%c_f_0_5    = OpConstant %f32 0.5\n"
   1858 		"%c_f_1_5    = OpConstant %f32 1.5\n"
   1859 		"%c_f_2_5    = OpConstant %f32 2.5\n"
   1860 		"%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
   1861 
   1862 		"%main       = OpFunction %void None %voidf\n"
   1863 		"%label      = OpLabel\n"
   1864 		"%v_vec4     = OpVariable %vec4ptr_f Function\n"
   1865 		"%idval      = OpLoad %uvec3 %id\n"
   1866 		"%x          = OpCompositeExtract %u32 %idval 0\n"
   1867 		"%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
   1868 		"%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
   1869 		"              OpCopyMemory %v_vec4 %inloc\n"
   1870 		"%v_vec4_val = OpLoad %vec4 %v_vec4\n"
   1871 		"%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
   1872 		"              OpStore %outloc %add\n"
   1873 		"              OpReturn\n"
   1874 		"              OpFunctionEnd\n";
   1875 
   1876 	spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
   1877 	spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
   1878 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
   1879 
   1880 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
   1881 
   1882 	// The following case copies a float[100] variable from the input buffer to the output buffer.
   1883 	ComputeShaderSpec				spec2;
   1884 	vector<float>					inputFloats2	(numElements);
   1885 	vector<float>					outputFloats2	(numElements);
   1886 
   1887 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
   1888 
   1889 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   1890 		outputFloats2[ndx] = inputFloats2[ndx];
   1891 
   1892 	spec2.assembly =
   1893 		string(getComputeAsmShaderPreamble()) +
   1894 
   1895 		"OpName %main           \"main\"\n"
   1896 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   1897 
   1898 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1899 		"OpDecorate %f32arr100 ArrayStride 4\n"
   1900 
   1901 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   1902 
   1903 		"%hundred        = OpConstant %u32 100\n"
   1904 		"%f32arr100      = OpTypeArray %f32 %hundred\n"
   1905 		"%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
   1906 		"%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
   1907 		"%buf            = OpTypeStruct %f32arr100\n"
   1908 		"%bufptr         = OpTypePointer Uniform %buf\n"
   1909 		"%indata         = OpVariable %bufptr Uniform\n"
   1910 		"%outdata        = OpVariable %bufptr Uniform\n"
   1911 
   1912 		"%id             = OpVariable %uvec3ptr Input\n"
   1913 		"%zero           = OpConstant %i32 0\n"
   1914 
   1915 		"%main           = OpFunction %void None %voidf\n"
   1916 		"%label          = OpLabel\n"
   1917 		"%var            = OpVariable %f32arr100ptr_f Function\n"
   1918 		"%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
   1919 		"%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
   1920 		"                  OpCopyMemory %var %inarr\n"
   1921 		"                  OpCopyMemory %outarr %var\n"
   1922 		"                  OpReturn\n"
   1923 		"                  OpFunctionEnd\n";
   1924 
   1925 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
   1926 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
   1927 	spec2.numWorkGroups = IVec3(1, 1, 1);
   1928 
   1929 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
   1930 
   1931 	// The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
   1932 	ComputeShaderSpec				spec3;
   1933 	vector<float>					inputFloats3	(16);
   1934 	vector<float>					outputFloats3	(16);
   1935 
   1936 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
   1937 
   1938 	for (size_t ndx = 0; ndx < 16; ++ndx)
   1939 		outputFloats3[ndx] = inputFloats3[ndx];
   1940 
   1941 	spec3.assembly =
   1942 		string(getComputeAsmShaderPreamble()) +
   1943 
   1944 		"OpName %main           \"main\"\n"
   1945 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   1946 
   1947 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1948 		"OpMemberDecorate %buf 0 Offset 0\n"
   1949 		"OpMemberDecorate %buf 1 Offset 16\n"
   1950 		"OpMemberDecorate %buf 2 Offset 32\n"
   1951 		"OpMemberDecorate %buf 3 Offset 48\n"
   1952 
   1953 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   1954 
   1955 		"%vec4      = OpTypeVector %f32 4\n"
   1956 		"%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
   1957 		"%bufptr    = OpTypePointer Uniform %buf\n"
   1958 		"%indata    = OpVariable %bufptr Uniform\n"
   1959 		"%outdata   = OpVariable %bufptr Uniform\n"
   1960 		"%vec4stptr = OpTypePointer Function %buf\n"
   1961 
   1962 		"%id        = OpVariable %uvec3ptr Input\n"
   1963 		"%zero      = OpConstant %i32 0\n"
   1964 
   1965 		"%main      = OpFunction %void None %voidf\n"
   1966 		"%label     = OpLabel\n"
   1967 		"%var       = OpVariable %vec4stptr Function\n"
   1968 		"             OpCopyMemory %var %indata\n"
   1969 		"             OpCopyMemory %outdata %var\n"
   1970 		"             OpReturn\n"
   1971 		"             OpFunctionEnd\n";
   1972 
   1973 	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
   1974 	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
   1975 	spec3.numWorkGroups = IVec3(1, 1, 1);
   1976 
   1977 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
   1978 
   1979 	// The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
   1980 	ComputeShaderSpec				spec4;
   1981 	vector<float>					inputFloats4	(numElements);
   1982 	vector<float>					outputFloats4	(numElements);
   1983 
   1984 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
   1985 
   1986 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   1987 		outputFloats4[ndx] = -inputFloats4[ndx];
   1988 
   1989 	spec4.assembly =
   1990 		string(getComputeAsmShaderPreamble()) +
   1991 
   1992 		"OpName %main           \"main\"\n"
   1993 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   1994 
   1995 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   1996 
   1997 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   1998 
   1999 		"%f32ptr_f  = OpTypePointer Function %f32\n"
   2000 		"%id        = OpVariable %uvec3ptr Input\n"
   2001 		"%zero      = OpConstant %i32 0\n"
   2002 
   2003 		"%main      = OpFunction %void None %voidf\n"
   2004 		"%label     = OpLabel\n"
   2005 		"%var       = OpVariable %f32ptr_f Function\n"
   2006 		"%idval     = OpLoad %uvec3 %id\n"
   2007 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   2008 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
   2009 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   2010 		"             OpCopyMemory %var %inloc\n"
   2011 		"%val       = OpLoad %f32 %var\n"
   2012 		"%neg       = OpFNegate %f32 %val\n"
   2013 		"             OpStore %outloc %neg\n"
   2014 		"             OpReturn\n"
   2015 		"             OpFunctionEnd\n";
   2016 
   2017 	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
   2018 	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
   2019 	spec4.numWorkGroups = IVec3(numElements, 1, 1);
   2020 
   2021 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
   2022 
   2023 	return group.release();
   2024 }
   2025 
   2026 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
   2027 {
   2028 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
   2029 	ComputeShaderSpec				spec;
   2030 	de::Random						rnd				(deStringHash(group->getName()));
   2031 	const int						numElements		= 100;
   2032 	vector<float>					inputFloats		(numElements, 0);
   2033 	vector<float>					outputFloats	(numElements, 0);
   2034 
   2035 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
   2036 
   2037 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
   2038 	floorAll(inputFloats);
   2039 
   2040 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   2041 		outputFloats[ndx] = inputFloats[ndx] + 7.5f;
   2042 
   2043 	spec.assembly =
   2044 		string(getComputeAsmShaderPreamble()) +
   2045 
   2046 		"OpName %main           \"main\"\n"
   2047 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   2048 
   2049 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2050 
   2051 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   2052 
   2053 		"%fmat     = OpTypeMatrix %fvec3 3\n"
   2054 		"%three    = OpConstant %u32 3\n"
   2055 		"%farr     = OpTypeArray %f32 %three\n"
   2056 		"%fst      = OpTypeStruct %f32 %f32\n"
   2057 
   2058 		+ string(getComputeAsmInputOutputBuffer()) +
   2059 
   2060 		"%id            = OpVariable %uvec3ptr Input\n"
   2061 		"%zero          = OpConstant %i32 0\n"
   2062 		"%c_f           = OpConstant %f32 1.5\n"
   2063 		"%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
   2064 		"%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
   2065 		"%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
   2066 		"%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
   2067 
   2068 		"%main          = OpFunction %void None %voidf\n"
   2069 		"%label         = OpLabel\n"
   2070 		"%c_f_copy      = OpCopyObject %f32   %c_f\n"
   2071 		"%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
   2072 		"%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
   2073 		"%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
   2074 		"%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
   2075 		"%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
   2076 		"%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
   2077 		"%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
   2078 		"%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
   2079 		// Add up. 1.5 * 5 = 7.5.
   2080 		"%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
   2081 		"%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
   2082 		"%add3          = OpFAdd %f32 %add2     %farr_elem\n"
   2083 		"%add4          = OpFAdd %f32 %add3     %fst_elem\n"
   2084 
   2085 		"%idval         = OpLoad %uvec3 %id\n"
   2086 		"%x             = OpCompositeExtract %u32 %idval 0\n"
   2087 		"%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
   2088 		"%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
   2089 		"%inval         = OpLoad %f32 %inloc\n"
   2090 		"%add           = OpFAdd %f32 %add4 %inval\n"
   2091 		"                 OpStore %outloc %add\n"
   2092 		"                 OpReturn\n"
   2093 		"                 OpFunctionEnd\n";
   2094 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   2095 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   2096 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   2097 
   2098 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
   2099 
   2100 	return group.release();
   2101 }
   2102 // Assembly code used for testing OpUnreachable is based on GLSL source code:
   2103 //
   2104 // #version 430
   2105 //
   2106 // layout(std140, set = 0, binding = 0) readonly buffer Input {
   2107 //   float elements[];
   2108 // } input_data;
   2109 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
   2110 //   float elements[];
   2111 // } output_data;
   2112 //
   2113 // void not_called_func() {
   2114 //   // place OpUnreachable here
   2115 // }
   2116 //
   2117 // uint modulo4(uint val) {
   2118 //   switch (val % uint(4)) {
   2119 //     case 0:  return 3;
   2120 //     case 1:  return 2;
   2121 //     case 2:  return 1;
   2122 //     case 3:  return 0;
   2123 //     default: return 100; // place OpUnreachable here
   2124 //   }
   2125 // }
   2126 //
   2127 // uint const5() {
   2128 //   return 5;
   2129 //   // place OpUnreachable here
   2130 // }
   2131 //
   2132 // void main() {
   2133 //   uint x = gl_GlobalInvocationID.x;
   2134 //   if (const5() > modulo4(1000)) {
   2135 //     output_data.elements[x] = -input_data.elements[x];
   2136 //   } else {
   2137 //     // place OpUnreachable here
   2138 //     output_data.elements[x] = input_data.elements[x];
   2139 //   }
   2140 // }
   2141 
   2142 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
   2143 {
   2144 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
   2145 	ComputeShaderSpec				spec;
   2146 	de::Random						rnd				(deStringHash(group->getName()));
   2147 	const int						numElements		= 100;
   2148 	vector<float>					positiveFloats	(numElements, 0);
   2149 	vector<float>					negativeFloats	(numElements, 0);
   2150 
   2151 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
   2152 
   2153 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   2154 		negativeFloats[ndx] = -positiveFloats[ndx];
   2155 
   2156 	spec.assembly =
   2157 		string(getComputeAsmShaderPreamble()) +
   2158 
   2159 		"OpSource GLSL 430\n"
   2160 		"OpName %main            \"main\"\n"
   2161 		"OpName %func_not_called_func \"not_called_func(\"\n"
   2162 		"OpName %func_modulo4         \"modulo4(u1;\"\n"
   2163 		"OpName %func_const5          \"const5(\"\n"
   2164 		"OpName %id                   \"gl_GlobalInvocationID\"\n"
   2165 
   2166 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2167 
   2168 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   2169 
   2170 		"%u32ptr    = OpTypePointer Function %u32\n"
   2171 		"%uintfuint = OpTypeFunction %u32 %u32ptr\n"
   2172 		"%unitf     = OpTypeFunction %u32\n"
   2173 
   2174 		"%id        = OpVariable %uvec3ptr Input\n"
   2175 		"%zero      = OpConstant %u32 0\n"
   2176 		"%one       = OpConstant %u32 1\n"
   2177 		"%two       = OpConstant %u32 2\n"
   2178 		"%three     = OpConstant %u32 3\n"
   2179 		"%four      = OpConstant %u32 4\n"
   2180 		"%five      = OpConstant %u32 5\n"
   2181 		"%hundred   = OpConstant %u32 100\n"
   2182 		"%thousand  = OpConstant %u32 1000\n"
   2183 
   2184 		+ string(getComputeAsmInputOutputBuffer()) +
   2185 
   2186 		// Main()
   2187 		"%main   = OpFunction %void None %voidf\n"
   2188 		"%main_entry  = OpLabel\n"
   2189 		"%v_thousand  = OpVariable %u32ptr Function %thousand\n"
   2190 		"%idval       = OpLoad %uvec3 %id\n"
   2191 		"%x           = OpCompositeExtract %u32 %idval 0\n"
   2192 		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
   2193 		"%inval       = OpLoad %f32 %inloc\n"
   2194 		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
   2195 		"%ret_const5  = OpFunctionCall %u32 %func_const5\n"
   2196 		"%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
   2197 		"%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
   2198 		"               OpSelectionMerge %if_end None\n"
   2199 		"               OpBranchConditional %cmp_gt %if_true %if_false\n"
   2200 		"%if_true     = OpLabel\n"
   2201 		"%negate      = OpFNegate %f32 %inval\n"
   2202 		"               OpStore %outloc %negate\n"
   2203 		"               OpBranch %if_end\n"
   2204 		"%if_false    = OpLabel\n"
   2205 		"               OpUnreachable\n" // Unreachable else branch for if statement
   2206 		"%if_end      = OpLabel\n"
   2207 		"               OpReturn\n"
   2208 		"               OpFunctionEnd\n"
   2209 
   2210 		// not_called_function()
   2211 		"%func_not_called_func  = OpFunction %void None %voidf\n"
   2212 		"%not_called_func_entry = OpLabel\n"
   2213 		"                         OpUnreachable\n" // Unreachable entry block in not called static function
   2214 		"                         OpFunctionEnd\n"
   2215 
   2216 		// modulo4()
   2217 		"%func_modulo4  = OpFunction %u32 None %uintfuint\n"
   2218 		"%valptr        = OpFunctionParameter %u32ptr\n"
   2219 		"%modulo4_entry = OpLabel\n"
   2220 		"%val           = OpLoad %u32 %valptr\n"
   2221 		"%modulo        = OpUMod %u32 %val %four\n"
   2222 		"                 OpSelectionMerge %switch_merge None\n"
   2223 		"                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
   2224 		"%case0         = OpLabel\n"
   2225 		"                 OpReturnValue %three\n"
   2226 		"%case1         = OpLabel\n"
   2227 		"                 OpReturnValue %two\n"
   2228 		"%case2         = OpLabel\n"
   2229 		"                 OpReturnValue %one\n"
   2230 		"%case3         = OpLabel\n"
   2231 		"                 OpReturnValue %zero\n"
   2232 		"%default       = OpLabel\n"
   2233 		"                 OpUnreachable\n" // Unreachable default case for switch statement
   2234 		"%switch_merge  = OpLabel\n"
   2235 		"                 OpUnreachable\n" // Unreachable merge block for switch statement
   2236 		"                 OpFunctionEnd\n"
   2237 
   2238 		// const5()
   2239 		"%func_const5  = OpFunction %u32 None %unitf\n"
   2240 		"%const5_entry = OpLabel\n"
   2241 		"                OpReturnValue %five\n"
   2242 		"%unreachable  = OpLabel\n"
   2243 		"                OpUnreachable\n" // Unreachable block in function
   2244 		"                OpFunctionEnd\n";
   2245 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
   2246 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
   2247 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   2248 
   2249 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
   2250 
   2251 	return group.release();
   2252 }
   2253 
   2254 // Assembly code used for testing decoration group is based on GLSL source code:
   2255 //
   2256 // #version 430
   2257 //
   2258 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
   2259 //   float elements[];
   2260 // } input_data0;
   2261 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
   2262 //   float elements[];
   2263 // } input_data1;
   2264 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
   2265 //   float elements[];
   2266 // } input_data2;
   2267 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
   2268 //   float elements[];
   2269 // } input_data3;
   2270 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
   2271 //   float elements[];
   2272 // } input_data4;
   2273 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
   2274 //   float elements[];
   2275 // } output_data;
   2276 //
   2277 // void main() {
   2278 //   uint x = gl_GlobalInvocationID.x;
   2279 //   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
   2280 // }
   2281 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
   2282 {
   2283 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
   2284 	ComputeShaderSpec				spec;
   2285 	de::Random						rnd				(deStringHash(group->getName()));
   2286 	const int						numElements		= 100;
   2287 	vector<float>					inputFloats0	(numElements, 0);
   2288 	vector<float>					inputFloats1	(numElements, 0);
   2289 	vector<float>					inputFloats2	(numElements, 0);
   2290 	vector<float>					inputFloats3	(numElements, 0);
   2291 	vector<float>					inputFloats4	(numElements, 0);
   2292 	vector<float>					outputFloats	(numElements, 0);
   2293 
   2294 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
   2295 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
   2296 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
   2297 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
   2298 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
   2299 
   2300 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
   2301 	floorAll(inputFloats0);
   2302 	floorAll(inputFloats1);
   2303 	floorAll(inputFloats2);
   2304 	floorAll(inputFloats3);
   2305 	floorAll(inputFloats4);
   2306 
   2307 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   2308 		outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
   2309 
   2310 	spec.assembly =
   2311 		string(getComputeAsmShaderPreamble()) +
   2312 
   2313 		"OpSource GLSL 430\n"
   2314 		"OpName %main \"main\"\n"
   2315 		"OpName %id \"gl_GlobalInvocationID\"\n"
   2316 
   2317 		// Not using group decoration on variable.
   2318 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2319 		// Not using group decoration on type.
   2320 		"OpDecorate %f32arr ArrayStride 4\n"
   2321 
   2322 		"OpDecorate %groups BufferBlock\n"
   2323 		"OpDecorate %groupm Offset 0\n"
   2324 		"%groups = OpDecorationGroup\n"
   2325 		"%groupm = OpDecorationGroup\n"
   2326 
   2327 		// Group decoration on multiple structs.
   2328 		"OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
   2329 		// Group decoration on multiple struct members.
   2330 		"OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
   2331 
   2332 		"OpDecorate %group1 DescriptorSet 0\n"
   2333 		"OpDecorate %group3 DescriptorSet 0\n"
   2334 		"OpDecorate %group3 NonWritable\n"
   2335 		"OpDecorate %group3 Restrict\n"
   2336 		"%group0 = OpDecorationGroup\n"
   2337 		"%group1 = OpDecorationGroup\n"
   2338 		"%group3 = OpDecorationGroup\n"
   2339 
   2340 		// Applying the same decoration group multiple times.
   2341 		"OpGroupDecorate %group1 %outdata\n"
   2342 		"OpGroupDecorate %group1 %outdata\n"
   2343 		"OpGroupDecorate %group1 %outdata\n"
   2344 		"OpDecorate %outdata DescriptorSet 0\n"
   2345 		"OpDecorate %outdata Binding 5\n"
   2346 		// Applying decoration group containing nothing.
   2347 		"OpGroupDecorate %group0 %indata0\n"
   2348 		"OpDecorate %indata0 DescriptorSet 0\n"
   2349 		"OpDecorate %indata0 Binding 0\n"
   2350 		// Applying decoration group containing one decoration.
   2351 		"OpGroupDecorate %group1 %indata1\n"
   2352 		"OpDecorate %indata1 Binding 1\n"
   2353 		// Applying decoration group containing multiple decorations.
   2354 		"OpGroupDecorate %group3 %indata2 %indata3\n"
   2355 		"OpDecorate %indata2 Binding 2\n"
   2356 		"OpDecorate %indata3 Binding 3\n"
   2357 		// Applying multiple decoration groups (with overlapping).
   2358 		"OpGroupDecorate %group0 %indata4\n"
   2359 		"OpGroupDecorate %group1 %indata4\n"
   2360 		"OpGroupDecorate %group3 %indata4\n"
   2361 		"OpDecorate %indata4 Binding 4\n"
   2362 
   2363 		+ string(getComputeAsmCommonTypes()) +
   2364 
   2365 		"%id   = OpVariable %uvec3ptr Input\n"
   2366 		"%zero = OpConstant %i32 0\n"
   2367 
   2368 		"%outbuf    = OpTypeStruct %f32arr\n"
   2369 		"%outbufptr = OpTypePointer Uniform %outbuf\n"
   2370 		"%outdata   = OpVariable %outbufptr Uniform\n"
   2371 		"%inbuf0    = OpTypeStruct %f32arr\n"
   2372 		"%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
   2373 		"%indata0   = OpVariable %inbuf0ptr Uniform\n"
   2374 		"%inbuf1    = OpTypeStruct %f32arr\n"
   2375 		"%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
   2376 		"%indata1   = OpVariable %inbuf1ptr Uniform\n"
   2377 		"%inbuf2    = OpTypeStruct %f32arr\n"
   2378 		"%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
   2379 		"%indata2   = OpVariable %inbuf2ptr Uniform\n"
   2380 		"%inbuf3    = OpTypeStruct %f32arr\n"
   2381 		"%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
   2382 		"%indata3   = OpVariable %inbuf3ptr Uniform\n"
   2383 		"%inbuf4    = OpTypeStruct %f32arr\n"
   2384 		"%inbufptr  = OpTypePointer Uniform %inbuf4\n"
   2385 		"%indata4   = OpVariable %inbufptr Uniform\n"
   2386 
   2387 		"%main   = OpFunction %void None %voidf\n"
   2388 		"%label  = OpLabel\n"
   2389 		"%idval  = OpLoad %uvec3 %id\n"
   2390 		"%x      = OpCompositeExtract %u32 %idval 0\n"
   2391 		"%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
   2392 		"%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
   2393 		"%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
   2394 		"%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
   2395 		"%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
   2396 		"%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
   2397 		"%inval0 = OpLoad %f32 %inloc0\n"
   2398 		"%inval1 = OpLoad %f32 %inloc1\n"
   2399 		"%inval2 = OpLoad %f32 %inloc2\n"
   2400 		"%inval3 = OpLoad %f32 %inloc3\n"
   2401 		"%inval4 = OpLoad %f32 %inloc4\n"
   2402 		"%add0   = OpFAdd %f32 %inval0 %inval1\n"
   2403 		"%add1   = OpFAdd %f32 %add0 %inval2\n"
   2404 		"%add2   = OpFAdd %f32 %add1 %inval3\n"
   2405 		"%add    = OpFAdd %f32 %add2 %inval4\n"
   2406 		"          OpStore %outloc %add\n"
   2407 		"          OpReturn\n"
   2408 		"          OpFunctionEnd\n";
   2409 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
   2410 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
   2411 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
   2412 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
   2413 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
   2414 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   2415 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   2416 
   2417 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
   2418 
   2419 	return group.release();
   2420 }
   2421 
   2422 struct SpecConstantTwoIntCase
   2423 {
   2424 	const char*		caseName;
   2425 	const char*		scDefinition0;
   2426 	const char*		scDefinition1;
   2427 	const char*		scResultType;
   2428 	const char*		scOperation;
   2429 	deInt32			scActualValue0;
   2430 	deInt32			scActualValue1;
   2431 	const char*		resultOperation;
   2432 	vector<deInt32>	expectedOutput;
   2433 
   2434 					SpecConstantTwoIntCase (const char* name,
   2435 											const char* definition0,
   2436 											const char* definition1,
   2437 											const char* resultType,
   2438 											const char* operation,
   2439 											deInt32 value0,
   2440 											deInt32 value1,
   2441 											const char* resultOp,
   2442 											const vector<deInt32>& output)
   2443 						: caseName			(name)
   2444 						, scDefinition0		(definition0)
   2445 						, scDefinition1		(definition1)
   2446 						, scResultType		(resultType)
   2447 						, scOperation		(operation)
   2448 						, scActualValue0	(value0)
   2449 						, scActualValue1	(value1)
   2450 						, resultOperation	(resultOp)
   2451 						, expectedOutput	(output) {}
   2452 };
   2453 
   2454 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
   2455 {
   2456 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
   2457 	vector<SpecConstantTwoIntCase>	cases;
   2458 	de::Random						rnd				(deStringHash(group->getName()));
   2459 	const int						numElements		= 100;
   2460 	vector<deInt32>					inputInts		(numElements, 0);
   2461 	vector<deInt32>					outputInts1		(numElements, 0);
   2462 	vector<deInt32>					outputInts2		(numElements, 0);
   2463 	vector<deInt32>					outputInts3		(numElements, 0);
   2464 	vector<deInt32>					outputInts4		(numElements, 0);
   2465 	const StringTemplate			shaderTemplate	(
   2466 		string(getComputeAsmShaderPreamble()) +
   2467 
   2468 		"OpName %main           \"main\"\n"
   2469 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   2470 
   2471 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2472 		"OpDecorate %sc_0  SpecId 0\n"
   2473 		"OpDecorate %sc_1  SpecId 1\n"
   2474 		"OpDecorate %i32arr ArrayStride 4\n"
   2475 
   2476 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   2477 
   2478 		"%buf     = OpTypeStruct %i32arr\n"
   2479 		"%bufptr  = OpTypePointer Uniform %buf\n"
   2480 		"%indata    = OpVariable %bufptr Uniform\n"
   2481 		"%outdata   = OpVariable %bufptr Uniform\n"
   2482 
   2483 		"%id        = OpVariable %uvec3ptr Input\n"
   2484 		"%zero      = OpConstant %i32 0\n"
   2485 
   2486 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
   2487 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
   2488 		"%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
   2489 
   2490 		"%main      = OpFunction %void None %voidf\n"
   2491 		"%label     = OpLabel\n"
   2492 		"%idval     = OpLoad %uvec3 %id\n"
   2493 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   2494 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
   2495 		"%inval     = OpLoad %i32 %inloc\n"
   2496 		"%final     = ${GEN_RESULT}\n"
   2497 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
   2498 		"             OpStore %outloc %final\n"
   2499 		"             OpReturn\n"
   2500 		"             OpFunctionEnd\n");
   2501 
   2502 	fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
   2503 
   2504 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   2505 	{
   2506 		outputInts1[ndx] = inputInts[ndx] + 42;
   2507 		outputInts2[ndx] = inputInts[ndx];
   2508 		outputInts3[ndx] = inputInts[ndx] - 11200;
   2509 		outputInts4[ndx] = inputInts[ndx] + 1;
   2510 	}
   2511 
   2512 	const char addScToInput[]		= "OpIAdd %i32 %inval %sc_final";
   2513 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_final %inval %zero";
   2514 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_final %zero %inval";
   2515 
   2516 	cases.push_back(SpecConstantTwoIntCase("iadd",					" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",			62,		-20,	addScToInput,		outputInts1));
   2517 	cases.push_back(SpecConstantTwoIntCase("isub",					" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",			100,	58,		addScToInput,		outputInts1));
   2518 	cases.push_back(SpecConstantTwoIntCase("imul",					" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",			-2,		-21,	addScToInput,		outputInts1));
   2519 	cases.push_back(SpecConstantTwoIntCase("sdiv",					" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",			-126,	-3,		addScToInput,		outputInts1));
   2520 	cases.push_back(SpecConstantTwoIntCase("udiv",					" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",			126,	3,		addScToInput,		outputInts1));
   2521 	cases.push_back(SpecConstantTwoIntCase("srem",					" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",			7,		3,		addScToInput,		outputInts4));
   2522 	cases.push_back(SpecConstantTwoIntCase("smod",					" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",			7,		3,		addScToInput,		outputInts4));
   2523 	cases.push_back(SpecConstantTwoIntCase("umod",					" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",			342,	50,		addScToInput,		outputInts1));
   2524 	cases.push_back(SpecConstantTwoIntCase("bitwiseand",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",			42,		63,		addScToInput,		outputInts1));
   2525 	cases.push_back(SpecConstantTwoIntCase("bitwiseor",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",			34,		8,		addScToInput,		outputInts1));
   2526 	cases.push_back(SpecConstantTwoIntCase("bitwisexor",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",			18,		56,		addScToInput,		outputInts1));
   2527 	cases.push_back(SpecConstantTwoIntCase("shiftrightlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,	2,		addScToInput,		outputInts1));
   2528 	cases.push_back(SpecConstantTwoIntCase("shiftrightarithmetic",	" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			168,	2,		addScToInput,		outputInts1));
   2529 	cases.push_back(SpecConstantTwoIntCase("shiftleftlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,		1,		addScToInput,		outputInts1));
   2530 	cases.push_back(SpecConstantTwoIntCase("slessthan",				" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",			-20,	-10,	selectTrueUsingSc,	outputInts2));
   2531 	cases.push_back(SpecConstantTwoIntCase("ulessthan",				" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",			10,		20,		selectTrueUsingSc,	outputInts2));
   2532 	cases.push_back(SpecConstantTwoIntCase("sgreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",			-1000,	50,		selectFalseUsingSc,	outputInts2));
   2533 	cases.push_back(SpecConstantTwoIntCase("ugreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",			10,		5,		selectTrueUsingSc,	outputInts2));
   2534 	cases.push_back(SpecConstantTwoIntCase("slessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",			-10,	-10,	selectTrueUsingSc,	outputInts2));
   2535 	cases.push_back(SpecConstantTwoIntCase("ulessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",			50,		100,	selectTrueUsingSc,	outputInts2));
   2536 	cases.push_back(SpecConstantTwoIntCase("sgreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",			-1000,	50,		selectFalseUsingSc,	outputInts2));
   2537 	cases.push_back(SpecConstantTwoIntCase("ugreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",			10,		10,		selectTrueUsingSc,	outputInts2));
   2538 	cases.push_back(SpecConstantTwoIntCase("iequal",				" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",			42,		24,		selectFalseUsingSc,	outputInts2));
   2539 	cases.push_back(SpecConstantTwoIntCase("logicaland",			"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",			0,		1,		selectFalseUsingSc,	outputInts2));
   2540 	cases.push_back(SpecConstantTwoIntCase("logicalor",				"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",			1,		0,		selectTrueUsingSc,	outputInts2));
   2541 	cases.push_back(SpecConstantTwoIntCase("logicalequal",			"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",			0,		1,		selectFalseUsingSc,	outputInts2));
   2542 	cases.push_back(SpecConstantTwoIntCase("logicalnotequal",		"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",			1,		0,		selectTrueUsingSc,	outputInts2));
   2543 	cases.push_back(SpecConstantTwoIntCase("snegate",				" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",				-42,	0,		addScToInput,		outputInts1));
   2544 	cases.push_back(SpecConstantTwoIntCase("not",					" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",				-43,	0,		addScToInput,		outputInts1));
   2545 	cases.push_back(SpecConstantTwoIntCase("logicalnot",			"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",				1,		0,		selectFalseUsingSc,	outputInts2));
   2546 	cases.push_back(SpecConstantTwoIntCase("select",				"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %zero",	1,		42,		addScToInput,		outputInts1));
   2547 	// OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
   2548 
   2549 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   2550 	{
   2551 		map<string, string>		specializations;
   2552 		ComputeShaderSpec		spec;
   2553 
   2554 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
   2555 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
   2556 		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
   2557 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
   2558 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
   2559 
   2560 		spec.assembly = shaderTemplate.specialize(specializations);
   2561 		spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
   2562 		spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
   2563 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   2564 		spec.specConstants.push_back(cases[caseNdx].scActualValue0);
   2565 		spec.specConstants.push_back(cases[caseNdx].scActualValue1);
   2566 
   2567 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec));
   2568 	}
   2569 
   2570 	ComputeShaderSpec				spec;
   2571 
   2572 	spec.assembly =
   2573 		string(getComputeAsmShaderPreamble()) +
   2574 
   2575 		"OpName %main           \"main\"\n"
   2576 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   2577 
   2578 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2579 		"OpDecorate %sc_0  SpecId 0\n"
   2580 		"OpDecorate %sc_1  SpecId 1\n"
   2581 		"OpDecorate %sc_2  SpecId 2\n"
   2582 		"OpDecorate %i32arr ArrayStride 4\n"
   2583 
   2584 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   2585 
   2586 		"%ivec3       = OpTypeVector %i32 3\n"
   2587 		"%buf         = OpTypeStruct %i32arr\n"
   2588 		"%bufptr      = OpTypePointer Uniform %buf\n"
   2589 		"%indata      = OpVariable %bufptr Uniform\n"
   2590 		"%outdata     = OpVariable %bufptr Uniform\n"
   2591 
   2592 		"%id          = OpVariable %uvec3ptr Input\n"
   2593 		"%zero        = OpConstant %i32 0\n"
   2594 		"%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero\n"
   2595 		"%vec3_undef  = OpUndef %ivec3\n"
   2596 
   2597 		"%sc_0        = OpSpecConstant %i32 0\n"
   2598 		"%sc_1        = OpSpecConstant %i32 0\n"
   2599 		"%sc_2        = OpSpecConstant %i32 0\n"
   2600 		"%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0\n"							// (sc_0, 0, 0)
   2601 		"%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1\n"							// (0, sc_1, 0)
   2602 		"%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2\n"							// (0, 0, sc_2)
   2603 		"%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"	// (sc_0, ???,  0)
   2604 		"%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"	// (???,  sc_1, 0)
   2605 		"%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"	// (sc_2, ???,  sc_2)
   2606 		"%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"						// (0,    sc_0, sc_1)
   2607 		"%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"						// (sc_2, sc_0, sc_1)
   2608 		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"							// sc_2
   2609 		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"							// sc_0
   2610 		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"							// sc_1
   2611 		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"								// (sc_2 - sc_0)
   2612 		"%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"								// (sc_2 - sc_0) * sc_1
   2613 
   2614 		"%main      = OpFunction %void None %voidf\n"
   2615 		"%label     = OpLabel\n"
   2616 		"%idval     = OpLoad %uvec3 %id\n"
   2617 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   2618 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
   2619 		"%inval     = OpLoad %i32 %inloc\n"
   2620 		"%final     = OpIAdd %i32 %inval %sc_final\n"
   2621 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
   2622 		"             OpStore %outloc %final\n"
   2623 		"             OpReturn\n"
   2624 		"             OpFunctionEnd\n";
   2625 	spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
   2626 	spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
   2627 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   2628 	spec.specConstants.push_back(123);
   2629 	spec.specConstants.push_back(56);
   2630 	spec.specConstants.push_back(-77);
   2631 
   2632 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
   2633 
   2634 	return group.release();
   2635 }
   2636 
   2637 string generateConstantDefinitions (int count)
   2638 {
   2639 	std::stringstream	r;
   2640 	for (int i = 0; i < count; i++)
   2641 		r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
   2642 	return r.str() + string("\n");
   2643 }
   2644 
   2645 string generateSwitchCases (int count)
   2646 {
   2647 	std::stringstream	r;
   2648 	for (int i = 0; i < count; i++)
   2649 		r << " " << i << " %case" << i;
   2650 	return r.str() + string("\n");
   2651 }
   2652 
   2653 string generateSwitchTargets (int count)
   2654 {
   2655 	std::stringstream	r;
   2656 	for (int i = 0; i < count; i++)
   2657 		r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
   2658 	return r.str() + string("\n");
   2659 }
   2660 
   2661 string generateOpPhiParams (int count)
   2662 {
   2663 	std::stringstream	r;
   2664 	for (int i = 0; i < count; i++)
   2665 		r << " %cf" << (i * 10 + 5) << " %case" << i;
   2666 	return r.str() + string("\n");
   2667 }
   2668 
   2669 string generateIntWidth (int value)
   2670 {
   2671 	std::stringstream	r;
   2672 	r << value;
   2673 	return r.str();
   2674 }
   2675 
   2676 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
   2677 {
   2678 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
   2679 	ComputeShaderSpec				spec1;
   2680 	ComputeShaderSpec				spec2;
   2681 	ComputeShaderSpec				spec3;
   2682 	ComputeShaderSpec				spec4;
   2683 	de::Random						rnd				(deStringHash(group->getName()));
   2684 	const int						numElements		= 100;
   2685 	vector<float>					inputFloats		(numElements, 0);
   2686 	vector<float>					outputFloats1	(numElements, 0);
   2687 	vector<float>					outputFloats2	(numElements, 0);
   2688 	vector<float>					outputFloats3	(numElements, 0);
   2689 	vector<float>					outputFloats4	(numElements, 0);
   2690 	const int						test4Width		= 1024;
   2691 
   2692 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
   2693 
   2694 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
   2695 	floorAll(inputFloats);
   2696 
   2697 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   2698 	{
   2699 		switch (ndx % 3)
   2700 		{
   2701 			case 0:		outputFloats1[ndx] = inputFloats[ndx] + 5.5f;	break;
   2702 			case 1:		outputFloats1[ndx] = inputFloats[ndx] + 20.5f;	break;
   2703 			case 2:		outputFloats1[ndx] = inputFloats[ndx] + 1.75f;	break;
   2704 			default:	break;
   2705 		}
   2706 		outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
   2707 		outputFloats3[ndx] = 8.5f - inputFloats[ndx];
   2708 
   2709 		int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
   2710 		outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
   2711 	}
   2712 
   2713 	spec1.assembly =
   2714 		string(getComputeAsmShaderPreamble()) +
   2715 
   2716 		"OpSource GLSL 430\n"
   2717 		"OpName %main \"main\"\n"
   2718 		"OpName %id \"gl_GlobalInvocationID\"\n"
   2719 
   2720 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2721 
   2722 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   2723 
   2724 		"%id = OpVariable %uvec3ptr Input\n"
   2725 		"%zero       = OpConstant %i32 0\n"
   2726 		"%three      = OpConstant %u32 3\n"
   2727 		"%constf5p5  = OpConstant %f32 5.5\n"
   2728 		"%constf20p5 = OpConstant %f32 20.5\n"
   2729 		"%constf1p75 = OpConstant %f32 1.75\n"
   2730 		"%constf8p5  = OpConstant %f32 8.5\n"
   2731 		"%constf6p5  = OpConstant %f32 6.5\n"
   2732 
   2733 		"%main     = OpFunction %void None %voidf\n"
   2734 		"%entry    = OpLabel\n"
   2735 		"%idval    = OpLoad %uvec3 %id\n"
   2736 		"%x        = OpCompositeExtract %u32 %idval 0\n"
   2737 		"%selector = OpUMod %u32 %x %three\n"
   2738 		"            OpSelectionMerge %phi None\n"
   2739 		"            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
   2740 
   2741 		// Case 1 before OpPhi.
   2742 		"%case1    = OpLabel\n"
   2743 		"            OpBranch %phi\n"
   2744 
   2745 		"%default  = OpLabel\n"
   2746 		"            OpUnreachable\n"
   2747 
   2748 		"%phi      = OpLabel\n"
   2749 		"%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
   2750 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
   2751 		"%inval    = OpLoad %f32 %inloc\n"
   2752 		"%add      = OpFAdd %f32 %inval %operand\n"
   2753 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
   2754 		"            OpStore %outloc %add\n"
   2755 		"            OpReturn\n"
   2756 
   2757 		// Case 0 after OpPhi.
   2758 		"%case0    = OpLabel\n"
   2759 		"            OpBranch %phi\n"
   2760 
   2761 
   2762 		// Case 2 after OpPhi.
   2763 		"%case2    = OpLabel\n"
   2764 		"            OpBranch %phi\n"
   2765 
   2766 		"            OpFunctionEnd\n";
   2767 	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   2768 	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
   2769 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
   2770 
   2771 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
   2772 
   2773 	spec2.assembly =
   2774 		string(getComputeAsmShaderPreamble()) +
   2775 
   2776 		"OpName %main \"main\"\n"
   2777 		"OpName %id \"gl_GlobalInvocationID\"\n"
   2778 
   2779 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2780 
   2781 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   2782 
   2783 		"%id         = OpVariable %uvec3ptr Input\n"
   2784 		"%zero       = OpConstant %i32 0\n"
   2785 		"%one        = OpConstant %i32 1\n"
   2786 		"%three      = OpConstant %i32 3\n"
   2787 		"%constf6p5  = OpConstant %f32 6.5\n"
   2788 
   2789 		"%main       = OpFunction %void None %voidf\n"
   2790 		"%entry      = OpLabel\n"
   2791 		"%idval      = OpLoad %uvec3 %id\n"
   2792 		"%x          = OpCompositeExtract %u32 %idval 0\n"
   2793 		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
   2794 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
   2795 		"%inval      = OpLoad %f32 %inloc\n"
   2796 		"              OpBranch %phi\n"
   2797 
   2798 		"%phi        = OpLabel\n"
   2799 		"%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
   2800 		"%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
   2801 		"%step_next  = OpIAdd %i32 %step %one\n"
   2802 		"%accum_next = OpFAdd %f32 %accum %constf6p5\n"
   2803 		"%still_loop = OpSLessThan %bool %step %three\n"
   2804 		"              OpLoopMerge %exit %phi None\n"
   2805 		"              OpBranchConditional %still_loop %phi %exit\n"
   2806 
   2807 		"%exit       = OpLabel\n"
   2808 		"              OpStore %outloc %accum\n"
   2809 		"              OpReturn\n"
   2810 		"              OpFunctionEnd\n";
   2811 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   2812 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
   2813 	spec2.numWorkGroups = IVec3(numElements, 1, 1);
   2814 
   2815 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
   2816 
   2817 	spec3.assembly =
   2818 		string(getComputeAsmShaderPreamble()) +
   2819 
   2820 		"OpName %main \"main\"\n"
   2821 		"OpName %id \"gl_GlobalInvocationID\"\n"
   2822 
   2823 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2824 
   2825 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   2826 
   2827 		"%f32ptr_f   = OpTypePointer Function %f32\n"
   2828 		"%id         = OpVariable %uvec3ptr Input\n"
   2829 		"%true       = OpConstantTrue %bool\n"
   2830 		"%false      = OpConstantFalse %bool\n"
   2831 		"%zero       = OpConstant %i32 0\n"
   2832 		"%constf8p5  = OpConstant %f32 8.5\n"
   2833 
   2834 		"%main       = OpFunction %void None %voidf\n"
   2835 		"%entry      = OpLabel\n"
   2836 		"%b          = OpVariable %f32ptr_f Function %constf8p5\n"
   2837 		"%idval      = OpLoad %uvec3 %id\n"
   2838 		"%x          = OpCompositeExtract %u32 %idval 0\n"
   2839 		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
   2840 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
   2841 		"%a_init     = OpLoad %f32 %inloc\n"
   2842 		"%b_init     = OpLoad %f32 %b\n"
   2843 		"              OpBranch %phi\n"
   2844 
   2845 		"%phi        = OpLabel\n"
   2846 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
   2847 		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
   2848 		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
   2849 		"              OpLoopMerge %exit %phi None\n"
   2850 		"              OpBranchConditional %still_loop %phi %exit\n"
   2851 
   2852 		"%exit       = OpLabel\n"
   2853 		"%sub        = OpFSub %f32 %a_next %b_next\n"
   2854 		"              OpStore %outloc %sub\n"
   2855 		"              OpReturn\n"
   2856 		"              OpFunctionEnd\n";
   2857 	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   2858 	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
   2859 	spec3.numWorkGroups = IVec3(numElements, 1, 1);
   2860 
   2861 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
   2862 
   2863 	spec4.assembly =
   2864 		"OpCapability Shader\n"
   2865 		"%ext = OpExtInstImport \"GLSL.std.450\"\n"
   2866 		"OpMemoryModel Logical GLSL450\n"
   2867 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   2868 		"OpExecutionMode %main LocalSize 1 1 1\n"
   2869 
   2870 		"OpSource GLSL 430\n"
   2871 		"OpName %main \"main\"\n"
   2872 		"OpName %id \"gl_GlobalInvocationID\"\n"
   2873 
   2874 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2875 
   2876 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   2877 
   2878 		"%id       = OpVariable %uvec3ptr Input\n"
   2879 		"%zero     = OpConstant %i32 0\n"
   2880 		"%cimod    = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
   2881 
   2882 		+ generateConstantDefinitions(test4Width) +
   2883 
   2884 		"%main     = OpFunction %void None %voidf\n"
   2885 		"%entry    = OpLabel\n"
   2886 		"%idval    = OpLoad %uvec3 %id\n"
   2887 		"%x        = OpCompositeExtract %u32 %idval 0\n"
   2888 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
   2889 		"%inval    = OpLoad %f32 %inloc\n"
   2890 		"%xf       = OpConvertUToF %f32 %x\n"
   2891 		"%xm       = OpFMul %f32 %xf %inval\n"
   2892 		"%xa       = OpExtInst %f32 %ext FAbs %xm\n"
   2893 		"%xi       = OpConvertFToU %u32 %xa\n"
   2894 		"%selector = OpUMod %u32 %xi %cimod\n"
   2895 		"            OpSelectionMerge %phi None\n"
   2896 		"            OpSwitch %selector %default "
   2897 
   2898 		+ generateSwitchCases(test4Width) +
   2899 
   2900 		"%default  = OpLabel\n"
   2901 		"            OpUnreachable\n"
   2902 
   2903 		+ generateSwitchTargets(test4Width) +
   2904 
   2905 		"%phi      = OpLabel\n"
   2906 		"%result   = OpPhi %f32"
   2907 
   2908 		+ generateOpPhiParams(test4Width) +
   2909 
   2910 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
   2911 		"            OpStore %outloc %result\n"
   2912 		"            OpReturn\n"
   2913 
   2914 		"            OpFunctionEnd\n";
   2915 	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   2916 	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
   2917 	spec4.numWorkGroups = IVec3(numElements, 1, 1);
   2918 
   2919 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
   2920 
   2921 	return group.release();
   2922 }
   2923 
   2924 // Assembly code used for testing block order is based on GLSL source code:
   2925 //
   2926 // #version 430
   2927 //
   2928 // layout(std140, set = 0, binding = 0) readonly buffer Input {
   2929 //   float elements[];
   2930 // } input_data;
   2931 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
   2932 //   float elements[];
   2933 // } output_data;
   2934 //
   2935 // void main() {
   2936 //   uint x = gl_GlobalInvocationID.x;
   2937 //   output_data.elements[x] = input_data.elements[x];
   2938 //   if (x > uint(50)) {
   2939 //     switch (x % uint(3)) {
   2940 //       case 0: output_data.elements[x] += 1.5f; break;
   2941 //       case 1: output_data.elements[x] += 42.f; break;
   2942 //       case 2: output_data.elements[x] -= 27.f; break;
   2943 //       default: break;
   2944 //     }
   2945 //   } else {
   2946 //     output_data.elements[x] = -input_data.elements[x];
   2947 //   }
   2948 // }
   2949 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
   2950 {
   2951 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
   2952 	ComputeShaderSpec				spec;
   2953 	de::Random						rnd				(deStringHash(group->getName()));
   2954 	const int						numElements		= 100;
   2955 	vector<float>					inputFloats		(numElements, 0);
   2956 	vector<float>					outputFloats	(numElements, 0);
   2957 
   2958 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
   2959 
   2960 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
   2961 	floorAll(inputFloats);
   2962 
   2963 	for (size_t ndx = 0; ndx <= 50; ++ndx)
   2964 		outputFloats[ndx] = -inputFloats[ndx];
   2965 
   2966 	for (size_t ndx = 51; ndx < numElements; ++ndx)
   2967 	{
   2968 		switch (ndx % 3)
   2969 		{
   2970 			case 0:		outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
   2971 			case 1:		outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
   2972 			case 2:		outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
   2973 			default:	break;
   2974 		}
   2975 	}
   2976 
   2977 	spec.assembly =
   2978 		string(getComputeAsmShaderPreamble()) +
   2979 
   2980 		"OpSource GLSL 430\n"
   2981 		"OpName %main \"main\"\n"
   2982 		"OpName %id \"gl_GlobalInvocationID\"\n"
   2983 
   2984 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   2985 
   2986 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   2987 
   2988 		"%u32ptr       = OpTypePointer Function %u32\n"
   2989 		"%u32ptr_input = OpTypePointer Input %u32\n"
   2990 
   2991 		+ string(getComputeAsmInputOutputBuffer()) +
   2992 
   2993 		"%id        = OpVariable %uvec3ptr Input\n"
   2994 		"%zero      = OpConstant %i32 0\n"
   2995 		"%const3    = OpConstant %u32 3\n"
   2996 		"%const50   = OpConstant %u32 50\n"
   2997 		"%constf1p5 = OpConstant %f32 1.5\n"
   2998 		"%constf27  = OpConstant %f32 27.0\n"
   2999 		"%constf42  = OpConstant %f32 42.0\n"
   3000 
   3001 		"%main = OpFunction %void None %voidf\n"
   3002 
   3003 		// entry block.
   3004 		"%entry    = OpLabel\n"
   3005 
   3006 		// Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
   3007 		"%xvar     = OpVariable %u32ptr Function\n"
   3008 		"%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
   3009 		"%x        = OpLoad %u32 %xptr\n"
   3010 		"            OpStore %xvar %x\n"
   3011 
   3012 		"%cmp      = OpUGreaterThan %bool %x %const50\n"
   3013 		"            OpSelectionMerge %if_merge None\n"
   3014 		"            OpBranchConditional %cmp %if_true %if_false\n"
   3015 
   3016 		// False branch for if-statement: placed in the middle of switch cases and before true branch.
   3017 		"%if_false = OpLabel\n"
   3018 		"%x_f      = OpLoad %u32 %xvar\n"
   3019 		"%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
   3020 		"%inval_f  = OpLoad %f32 %inloc_f\n"
   3021 		"%negate   = OpFNegate %f32 %inval_f\n"
   3022 		"%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
   3023 		"            OpStore %outloc_f %negate\n"
   3024 		"            OpBranch %if_merge\n"
   3025 
   3026 		// Merge block for if-statement: placed in the middle of true and false branch.
   3027 		"%if_merge = OpLabel\n"
   3028 		"            OpReturn\n"
   3029 
   3030 		// True branch for if-statement: placed in the middle of swtich cases and after the false branch.
   3031 		"%if_true  = OpLabel\n"
   3032 		"%xval_t   = OpLoad %u32 %xvar\n"
   3033 		"%mod      = OpUMod %u32 %xval_t %const3\n"
   3034 		"            OpSelectionMerge %switch_merge None\n"
   3035 		"            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
   3036 
   3037 		// Merge block for switch-statement: placed before the case
   3038                 // bodies.  But it must follow OpSwitch which dominates it.
   3039 		"%switch_merge = OpLabel\n"
   3040 		"                OpBranch %if_merge\n"
   3041 
   3042 		// Case 1 for switch-statement: placed before case 0.
   3043                 // It must follow the OpSwitch that dominates it.
   3044 		"%case1    = OpLabel\n"
   3045 		"%x_1      = OpLoad %u32 %xvar\n"
   3046 		"%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
   3047 		"%inval_1  = OpLoad %f32 %inloc_1\n"
   3048 		"%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
   3049 		"%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
   3050 		"            OpStore %outloc_1 %addf42\n"
   3051 		"            OpBranch %switch_merge\n"
   3052 
   3053 		// Case 2 for switch-statement.
   3054 		"%case2    = OpLabel\n"
   3055 		"%x_2      = OpLoad %u32 %xvar\n"
   3056 		"%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
   3057 		"%inval_2  = OpLoad %f32 %inloc_2\n"
   3058 		"%subf27   = OpFSub %f32 %inval_2 %constf27\n"
   3059 		"%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
   3060 		"            OpStore %outloc_2 %subf27\n"
   3061 		"            OpBranch %switch_merge\n"
   3062 
   3063 		// Default case for switch-statement: placed in the middle of normal cases.
   3064 		"%default = OpLabel\n"
   3065 		"           OpBranch %switch_merge\n"
   3066 
   3067 		// Case 0 for switch-statement: out of order.
   3068 		"%case0    = OpLabel\n"
   3069 		"%x_0      = OpLoad %u32 %xvar\n"
   3070 		"%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
   3071 		"%inval_0  = OpLoad %f32 %inloc_0\n"
   3072 		"%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
   3073 		"%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
   3074 		"            OpStore %outloc_0 %addf1p5\n"
   3075 		"            OpBranch %switch_merge\n"
   3076 
   3077 		"            OpFunctionEnd\n";
   3078 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   3079 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   3080 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   3081 
   3082 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
   3083 
   3084 	return group.release();
   3085 }
   3086 
   3087 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
   3088 {
   3089 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
   3090 	ComputeShaderSpec				spec1;
   3091 	ComputeShaderSpec				spec2;
   3092 	de::Random						rnd				(deStringHash(group->getName()));
   3093 	const int						numElements		= 100;
   3094 	vector<float>					inputFloats		(numElements, 0);
   3095 	vector<float>					outputFloats1	(numElements, 0);
   3096 	vector<float>					outputFloats2	(numElements, 0);
   3097 	fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
   3098 
   3099 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   3100 	{
   3101 		outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
   3102 		outputFloats2[ndx] = -inputFloats[ndx];
   3103 	}
   3104 
   3105 	const string assembly(
   3106 		"OpCapability Shader\n"
   3107 		"OpCapability ClipDistance\n"
   3108 		"OpMemoryModel Logical GLSL450\n"
   3109 		"OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
   3110 		"OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
   3111 		// A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
   3112 		"OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
   3113 		"OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
   3114 		"OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
   3115 
   3116 		"OpName %comp_main1              \"entrypoint1\"\n"
   3117 		"OpName %comp_main2              \"entrypoint2\"\n"
   3118 		"OpName %vert_main               \"entrypoint2\"\n"
   3119 		"OpName %id                      \"gl_GlobalInvocationID\"\n"
   3120 		"OpName %vert_builtin_st         \"gl_PerVertex\"\n"
   3121 		"OpName %vertexIndex             \"gl_VertexIndex\"\n"
   3122 		"OpName %instanceIndex           \"gl_InstanceIndex\"\n"
   3123 		"OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
   3124 		"OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
   3125 		"OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
   3126 
   3127 		"OpDecorate %id                      BuiltIn GlobalInvocationId\n"
   3128 		"OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
   3129 		"OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
   3130 		"OpDecorate %vert_builtin_st         Block\n"
   3131 		"OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
   3132 		"OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
   3133 		"OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
   3134 
   3135 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   3136 
   3137 		"%zero       = OpConstant %i32 0\n"
   3138 		"%one        = OpConstant %u32 1\n"
   3139 		"%c_f32_1    = OpConstant %f32 1\n"
   3140 
   3141 		"%i32inputptr         = OpTypePointer Input %i32\n"
   3142 		"%vec4                = OpTypeVector %f32 4\n"
   3143 		"%vec4ptr             = OpTypePointer Output %vec4\n"
   3144 		"%f32arr1             = OpTypeArray %f32 %one\n"
   3145 		"%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
   3146 		"%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
   3147 		"%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
   3148 
   3149 		"%id         = OpVariable %uvec3ptr Input\n"
   3150 		"%vertexIndex = OpVariable %i32inputptr Input\n"
   3151 		"%instanceIndex = OpVariable %i32inputptr Input\n"
   3152 		"%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
   3153 
   3154 		// gl_Position = vec4(1.);
   3155 		"%vert_main  = OpFunction %void None %voidf\n"
   3156 		"%vert_entry = OpLabel\n"
   3157 		"%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
   3158 		"              OpStore %position %c_vec4_1\n"
   3159 		"              OpReturn\n"
   3160 		"              OpFunctionEnd\n"
   3161 
   3162 		// Double inputs.
   3163 		"%comp_main1  = OpFunction %void None %voidf\n"
   3164 		"%comp1_entry = OpLabel\n"
   3165 		"%idval1      = OpLoad %uvec3 %id\n"
   3166 		"%x1          = OpCompositeExtract %u32 %idval1 0\n"
   3167 		"%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
   3168 		"%inval1      = OpLoad %f32 %inloc1\n"
   3169 		"%add         = OpFAdd %f32 %inval1 %inval1\n"
   3170 		"%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
   3171 		"               OpStore %outloc1 %add\n"
   3172 		"               OpReturn\n"
   3173 		"               OpFunctionEnd\n"
   3174 
   3175 		// Negate inputs.
   3176 		"%comp_main2  = OpFunction %void None %voidf\n"
   3177 		"%comp2_entry = OpLabel\n"
   3178 		"%idval2      = OpLoad %uvec3 %id\n"
   3179 		"%x2          = OpCompositeExtract %u32 %idval2 0\n"
   3180 		"%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
   3181 		"%inval2      = OpLoad %f32 %inloc2\n"
   3182 		"%neg         = OpFNegate %f32 %inval2\n"
   3183 		"%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
   3184 		"               OpStore %outloc2 %neg\n"
   3185 		"               OpReturn\n"
   3186 		"               OpFunctionEnd\n");
   3187 
   3188 	spec1.assembly = assembly;
   3189 	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   3190 	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
   3191 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
   3192 	spec1.entryPoint = "entrypoint1";
   3193 
   3194 	spec2.assembly = assembly;
   3195 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   3196 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
   3197 	spec2.numWorkGroups = IVec3(numElements, 1, 1);
   3198 	spec2.entryPoint = "entrypoint2";
   3199 
   3200 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
   3201 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
   3202 
   3203 	return group.release();
   3204 }
   3205 
   3206 inline std::string makeLongUTF8String (size_t num4ByteChars)
   3207 {
   3208 	// An example of a longest valid UTF-8 character.  Be explicit about the
   3209 	// character type because Microsoft compilers can otherwise interpret the
   3210 	// character string as being over wide (16-bit) characters. Ideally, we
   3211 	// would just use a C++11 UTF-8 string literal, but we want to support older
   3212 	// Microsoft compilers.
   3213 	const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
   3214 	std::string longString;
   3215 	longString.reserve(num4ByteChars * 4);
   3216 	for (size_t count = 0; count < num4ByteChars; count++)
   3217 	{
   3218 		longString += earthAfrica;
   3219 	}
   3220 	return longString;
   3221 }
   3222 
   3223 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
   3224 {
   3225 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
   3226 	vector<CaseParameter>			cases;
   3227 	de::Random						rnd				(deStringHash(group->getName()));
   3228 	const int						numElements		= 100;
   3229 	vector<float>					positiveFloats	(numElements, 0);
   3230 	vector<float>					negativeFloats	(numElements, 0);
   3231 	const StringTemplate			shaderTemplate	(
   3232 		"OpCapability Shader\n"
   3233 		"OpMemoryModel Logical GLSL450\n"
   3234 
   3235 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   3236 		"OpExecutionMode %main LocalSize 1 1 1\n"
   3237 
   3238 		"${SOURCE}\n"
   3239 
   3240 		"OpName %main           \"main\"\n"
   3241 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   3242 
   3243 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   3244 
   3245 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   3246 
   3247 		"%id        = OpVariable %uvec3ptr Input\n"
   3248 		"%zero      = OpConstant %i32 0\n"
   3249 
   3250 		"%main      = OpFunction %void None %voidf\n"
   3251 		"%label     = OpLabel\n"
   3252 		"%idval     = OpLoad %uvec3 %id\n"
   3253 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   3254 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
   3255 		"%inval     = OpLoad %f32 %inloc\n"
   3256 		"%neg       = OpFNegate %f32 %inval\n"
   3257 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   3258 		"             OpStore %outloc %neg\n"
   3259 		"             OpReturn\n"
   3260 		"             OpFunctionEnd\n");
   3261 
   3262 	cases.push_back(CaseParameter("unknown_source",							"OpSource Unknown 0"));
   3263 	cases.push_back(CaseParameter("wrong_source",							"OpSource OpenCL_C 210"));
   3264 	cases.push_back(CaseParameter("normal_filename",						"%fname = OpString \"filename\"\n"
   3265 																			"OpSource GLSL 430 %fname"));
   3266 	cases.push_back(CaseParameter("empty_filename",							"%fname = OpString \"\"\n"
   3267 																			"OpSource GLSL 430 %fname"));
   3268 	cases.push_back(CaseParameter("normal_source_code",						"%fname = OpString \"filename\"\n"
   3269 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
   3270 	cases.push_back(CaseParameter("empty_source_code",						"%fname = OpString \"filename\"\n"
   3271 																			"OpSource GLSL 430 %fname \"\""));
   3272 	cases.push_back(CaseParameter("long_source_code",						"%fname = OpString \"filename\"\n"
   3273 																			"OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
   3274 	cases.push_back(CaseParameter("utf8_source_code",						"%fname = OpString \"filename\"\n"
   3275 																			"OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
   3276 	cases.push_back(CaseParameter("normal_sourcecontinued",					"%fname = OpString \"filename\"\n"
   3277 																			"OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
   3278 																			"OpSourceContinued \"id main() {}\""));
   3279 	cases.push_back(CaseParameter("empty_sourcecontinued",					"%fname = OpString \"filename\"\n"
   3280 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
   3281 																			"OpSourceContinued \"\""));
   3282 	cases.push_back(CaseParameter("long_sourcecontinued",					"%fname = OpString \"filename\"\n"
   3283 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
   3284 																			"OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
   3285 	cases.push_back(CaseParameter("utf8_sourcecontinued",					"%fname = OpString \"filename\"\n"
   3286 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
   3287 																			"OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
   3288 	cases.push_back(CaseParameter("multi_sourcecontinued",					"%fname = OpString \"filename\"\n"
   3289 																			"OpSource GLSL 430 %fname \"#version 430\n\"\n"
   3290 																			"OpSourceContinued \"void\"\n"
   3291 																			"OpSourceContinued \"main()\"\n"
   3292 																			"OpSourceContinued \"{}\""));
   3293 	cases.push_back(CaseParameter("empty_source_before_sourcecontinued",	"%fname = OpString \"filename\"\n"
   3294 																			"OpSource GLSL 430 %fname \"\"\n"
   3295 																			"OpSourceContinued \"#version 430\nvoid main() {}\""));
   3296 
   3297 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
   3298 
   3299 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   3300 		negativeFloats[ndx] = -positiveFloats[ndx];
   3301 
   3302 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   3303 	{
   3304 		map<string, string>		specializations;
   3305 		ComputeShaderSpec		spec;
   3306 
   3307 		specializations["SOURCE"] = cases[caseNdx].param;
   3308 		spec.assembly = shaderTemplate.specialize(specializations);
   3309 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
   3310 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
   3311 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3312 
   3313 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   3314 	}
   3315 
   3316 	return group.release();
   3317 }
   3318 
   3319 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
   3320 {
   3321 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
   3322 	vector<CaseParameter>			cases;
   3323 	de::Random						rnd				(deStringHash(group->getName()));
   3324 	const int						numElements		= 100;
   3325 	vector<float>					inputFloats		(numElements, 0);
   3326 	vector<float>					outputFloats	(numElements, 0);
   3327 	const StringTemplate			shaderTemplate	(
   3328 		string(getComputeAsmShaderPreamble()) +
   3329 
   3330 		"OpSourceExtension \"${EXTENSION}\"\n"
   3331 
   3332 		"OpName %main           \"main\"\n"
   3333 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   3334 
   3335 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   3336 
   3337 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   3338 
   3339 		"%id        = OpVariable %uvec3ptr Input\n"
   3340 		"%zero      = OpConstant %i32 0\n"
   3341 
   3342 		"%main      = OpFunction %void None %voidf\n"
   3343 		"%label     = OpLabel\n"
   3344 		"%idval     = OpLoad %uvec3 %id\n"
   3345 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   3346 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
   3347 		"%inval     = OpLoad %f32 %inloc\n"
   3348 		"%neg       = OpFNegate %f32 %inval\n"
   3349 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   3350 		"             OpStore %outloc %neg\n"
   3351 		"             OpReturn\n"
   3352 		"             OpFunctionEnd\n");
   3353 
   3354 	cases.push_back(CaseParameter("empty_extension",	""));
   3355 	cases.push_back(CaseParameter("real_extension",		"GL_ARB_texture_rectangle"));
   3356 	cases.push_back(CaseParameter("fake_extension",		"GL_ARB_im_the_ultimate_extension"));
   3357 	cases.push_back(CaseParameter("utf8_extension",		"GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
   3358 	cases.push_back(CaseParameter("long_extension",		makeLongUTF8String(65533) + "ccc")); // word count: 65535
   3359 
   3360 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
   3361 
   3362 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   3363 		outputFloats[ndx] = -inputFloats[ndx];
   3364 
   3365 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   3366 	{
   3367 		map<string, string>		specializations;
   3368 		ComputeShaderSpec		spec;
   3369 
   3370 		specializations["EXTENSION"] = cases[caseNdx].param;
   3371 		spec.assembly = shaderTemplate.specialize(specializations);
   3372 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   3373 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   3374 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3375 
   3376 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   3377 	}
   3378 
   3379 	return group.release();
   3380 }
   3381 
   3382 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
   3383 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
   3384 {
   3385 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
   3386 	vector<CaseParameter>			cases;
   3387 	de::Random						rnd				(deStringHash(group->getName()));
   3388 	const int						numElements		= 100;
   3389 	vector<float>					positiveFloats	(numElements, 0);
   3390 	vector<float>					negativeFloats	(numElements, 0);
   3391 	const StringTemplate			shaderTemplate	(
   3392 		string(getComputeAsmShaderPreamble()) +
   3393 
   3394 		"OpSource GLSL 430\n"
   3395 		"OpName %main           \"main\"\n"
   3396 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   3397 
   3398 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   3399 
   3400 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   3401 		"%uvec2     = OpTypeVector %u32 2\n"
   3402 		"%bvec3     = OpTypeVector %bool 3\n"
   3403 		"%fvec4     = OpTypeVector %f32 4\n"
   3404 		"%fmat33    = OpTypeMatrix %fvec3 3\n"
   3405 		"%const100  = OpConstant %u32 100\n"
   3406 		"%uarr100   = OpTypeArray %i32 %const100\n"
   3407 		"%struct    = OpTypeStruct %f32 %i32 %u32\n"
   3408 		"%pointer   = OpTypePointer Function %i32\n"
   3409 		+ string(getComputeAsmInputOutputBuffer()) +
   3410 
   3411 		"%null      = OpConstantNull ${TYPE}\n"
   3412 
   3413 		"%id        = OpVariable %uvec3ptr Input\n"
   3414 		"%zero      = OpConstant %i32 0\n"
   3415 
   3416 		"%main      = OpFunction %void None %voidf\n"
   3417 		"%label     = OpLabel\n"
   3418 		"%idval     = OpLoad %uvec3 %id\n"
   3419 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   3420 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
   3421 		"%inval     = OpLoad %f32 %inloc\n"
   3422 		"%neg       = OpFNegate %f32 %inval\n"
   3423 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   3424 		"             OpStore %outloc %neg\n"
   3425 		"             OpReturn\n"
   3426 		"             OpFunctionEnd\n");
   3427 
   3428 	cases.push_back(CaseParameter("bool",			"%bool"));
   3429 	cases.push_back(CaseParameter("sint32",			"%i32"));
   3430 	cases.push_back(CaseParameter("uint32",			"%u32"));
   3431 	cases.push_back(CaseParameter("float32",		"%f32"));
   3432 	cases.push_back(CaseParameter("vec4float32",	"%fvec4"));
   3433 	cases.push_back(CaseParameter("vec3bool",		"%bvec3"));
   3434 	cases.push_back(CaseParameter("vec2uint32",		"%uvec2"));
   3435 	cases.push_back(CaseParameter("matrix",			"%fmat33"));
   3436 	cases.push_back(CaseParameter("array",			"%uarr100"));
   3437 	cases.push_back(CaseParameter("struct",			"%struct"));
   3438 	cases.push_back(CaseParameter("pointer",		"%pointer"));
   3439 
   3440 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
   3441 
   3442 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   3443 		negativeFloats[ndx] = -positiveFloats[ndx];
   3444 
   3445 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   3446 	{
   3447 		map<string, string>		specializations;
   3448 		ComputeShaderSpec		spec;
   3449 
   3450 		specializations["TYPE"] = cases[caseNdx].param;
   3451 		spec.assembly = shaderTemplate.specialize(specializations);
   3452 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
   3453 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
   3454 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3455 
   3456 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   3457 	}
   3458 
   3459 	return group.release();
   3460 }
   3461 
   3462 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
   3463 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
   3464 {
   3465 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
   3466 	vector<CaseParameter>			cases;
   3467 	de::Random						rnd				(deStringHash(group->getName()));
   3468 	const int						numElements		= 100;
   3469 	vector<float>					positiveFloats	(numElements, 0);
   3470 	vector<float>					negativeFloats	(numElements, 0);
   3471 	const StringTemplate			shaderTemplate	(
   3472 		string(getComputeAsmShaderPreamble()) +
   3473 
   3474 		"OpSource GLSL 430\n"
   3475 		"OpName %main           \"main\"\n"
   3476 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   3477 
   3478 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   3479 
   3480 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   3481 
   3482 		"%id        = OpVariable %uvec3ptr Input\n"
   3483 		"%zero      = OpConstant %i32 0\n"
   3484 
   3485 		"${CONSTANT}\n"
   3486 
   3487 		"%main      = OpFunction %void None %voidf\n"
   3488 		"%label     = OpLabel\n"
   3489 		"%idval     = OpLoad %uvec3 %id\n"
   3490 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   3491 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
   3492 		"%inval     = OpLoad %f32 %inloc\n"
   3493 		"%neg       = OpFNegate %f32 %inval\n"
   3494 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   3495 		"             OpStore %outloc %neg\n"
   3496 		"             OpReturn\n"
   3497 		"             OpFunctionEnd\n");
   3498 
   3499 	cases.push_back(CaseParameter("vector",			"%five = OpConstant %u32 5\n"
   3500 													"%const = OpConstantComposite %uvec3 %five %zero %five"));
   3501 	cases.push_back(CaseParameter("matrix",			"%m3fvec3 = OpTypeMatrix %fvec3 3\n"
   3502 													"%ten = OpConstant %f32 10.\n"
   3503 													"%fzero = OpConstant %f32 0.\n"
   3504 													"%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
   3505 													"%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
   3506 	cases.push_back(CaseParameter("struct",			"%m2vec3 = OpTypeMatrix %fvec3 2\n"
   3507 													"%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
   3508 													"%fzero = OpConstant %f32 0.\n"
   3509 													"%one = OpConstant %f32 1.\n"
   3510 													"%point5 = OpConstant %f32 0.5\n"
   3511 													"%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
   3512 													"%mat = OpConstantComposite %m2vec3 %vec %vec\n"
   3513 													"%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
   3514 	cases.push_back(CaseParameter("nested_struct",	"%st1 = OpTypeStruct %u32 %f32\n"
   3515 													"%st2 = OpTypeStruct %i32 %i32\n"
   3516 													"%struct = OpTypeStruct %st1 %st2\n"
   3517 													"%point5 = OpConstant %f32 0.5\n"
   3518 													"%one = OpConstant %u32 1\n"
   3519 													"%ten = OpConstant %i32 10\n"
   3520 													"%st1val = OpConstantComposite %st1 %one %point5\n"
   3521 													"%st2val = OpConstantComposite %st2 %ten %ten\n"
   3522 													"%const = OpConstantComposite %struct %st1val %st2val"));
   3523 
   3524 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
   3525 
   3526 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   3527 		negativeFloats[ndx] = -positiveFloats[ndx];
   3528 
   3529 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   3530 	{
   3531 		map<string, string>		specializations;
   3532 		ComputeShaderSpec		spec;
   3533 
   3534 		specializations["CONSTANT"] = cases[caseNdx].param;
   3535 		spec.assembly = shaderTemplate.specialize(specializations);
   3536 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
   3537 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
   3538 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3539 
   3540 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   3541 	}
   3542 
   3543 	return group.release();
   3544 }
   3545 
   3546 // Creates a floating point number with the given exponent, and significand
   3547 // bits set. It can only create normalized numbers. Only the least significant
   3548 // 24 bits of the significand will be examined. The final bit of the
   3549 // significand will also be ignored. This allows alignment to be written
   3550 // similarly to C99 hex-floats.
   3551 // For example if you wanted to write 0x1.7f34p-12 you would call
   3552 // constructNormalizedFloat(-12, 0x7f3400)
   3553 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
   3554 {
   3555 	float f = 1.0f;
   3556 
   3557 	for (deInt32 idx = 0; idx < 23; ++idx)
   3558 	{
   3559 		f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
   3560 		significand <<= 1;
   3561 	}
   3562 
   3563 	return std::ldexp(f, exponent);
   3564 }
   3565 
   3566 // Compare instruction for the OpQuantizeF16 compute exact case.
   3567 // Returns true if the output is what is expected from the test case.
   3568 bool compareOpQuantizeF16ComputeExactCase (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
   3569 {
   3570 	if (outputAllocs.size() != 1)
   3571 		return false;
   3572 
   3573 	// Only size is needed because we cannot compare Nans.
   3574 	size_t byteSize = expectedOutputs[0]->getByteSize();
   3575 
   3576 	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
   3577 
   3578 	if (byteSize != 4*sizeof(float)) {
   3579 		return false;
   3580 	}
   3581 
   3582 	if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
   3583 		*outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
   3584 		return false;
   3585 	}
   3586 	outputAsFloat++;
   3587 
   3588 	if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
   3589 		*outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
   3590 		return false;
   3591 	}
   3592 	outputAsFloat++;
   3593 
   3594 	if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
   3595 		*outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
   3596 		return false;
   3597 	}
   3598 	outputAsFloat++;
   3599 
   3600 	if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
   3601 		*outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
   3602 		return false;
   3603 	}
   3604 
   3605 	return true;
   3606 }
   3607 
   3608 // Checks that every output from a test-case is a float NaN.
   3609 bool compareNan (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
   3610 {
   3611 	if (outputAllocs.size() != 1)
   3612 		return false;
   3613 
   3614 	// Only size is needed because we cannot compare Nans.
   3615 	size_t byteSize = expectedOutputs[0]->getByteSize();
   3616 
   3617 	const float* const	output_as_float	= static_cast<const float* const>(outputAllocs[0]->getHostPtr());
   3618 
   3619 	for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
   3620 	{
   3621 		if (!deFloatIsNaN(output_as_float[idx]))
   3622 		{
   3623 			return false;
   3624 		}
   3625 	}
   3626 
   3627 	return true;
   3628 }
   3629 
   3630 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
   3631 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
   3632 {
   3633 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
   3634 
   3635 	const std::string shader (
   3636 		string(getComputeAsmShaderPreamble()) +
   3637 
   3638 		"OpSource GLSL 430\n"
   3639 		"OpName %main           \"main\"\n"
   3640 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   3641 
   3642 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   3643 
   3644 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   3645 
   3646 		"%id        = OpVariable %uvec3ptr Input\n"
   3647 		"%zero      = OpConstant %i32 0\n"
   3648 
   3649 		"%main      = OpFunction %void None %voidf\n"
   3650 		"%label     = OpLabel\n"
   3651 		"%idval     = OpLoad %uvec3 %id\n"
   3652 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   3653 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
   3654 		"%inval     = OpLoad %f32 %inloc\n"
   3655 		"%quant     = OpQuantizeToF16 %f32 %inval\n"
   3656 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   3657 		"             OpStore %outloc %quant\n"
   3658 		"             OpReturn\n"
   3659 		"             OpFunctionEnd\n");
   3660 
   3661 	{
   3662 		ComputeShaderSpec	spec;
   3663 		const deUint32		numElements		= 100;
   3664 		vector<float>		infinities;
   3665 		vector<float>		results;
   3666 
   3667 		infinities.reserve(numElements);
   3668 		results.reserve(numElements);
   3669 
   3670 		for (size_t idx = 0; idx < numElements; ++idx)
   3671 		{
   3672 			switch(idx % 4)
   3673 			{
   3674 				case 0:
   3675 					infinities.push_back(std::numeric_limits<float>::infinity());
   3676 					results.push_back(std::numeric_limits<float>::infinity());
   3677 					break;
   3678 				case 1:
   3679 					infinities.push_back(-std::numeric_limits<float>::infinity());
   3680 					results.push_back(-std::numeric_limits<float>::infinity());
   3681 					break;
   3682 				case 2:
   3683 					infinities.push_back(std::ldexp(1.0f, 16));
   3684 					results.push_back(std::numeric_limits<float>::infinity());
   3685 					break;
   3686 				case 3:
   3687 					infinities.push_back(std::ldexp(-1.0f, 32));
   3688 					results.push_back(-std::numeric_limits<float>::infinity());
   3689 					break;
   3690 			}
   3691 		}
   3692 
   3693 		spec.assembly = shader;
   3694 		spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
   3695 		spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
   3696 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3697 
   3698 		group->addChild(new SpvAsmComputeShaderCase(
   3699 			testCtx, "infinities", "Check that infinities propagated and created", spec));
   3700 	}
   3701 
   3702 	{
   3703 		ComputeShaderSpec	spec;
   3704 		vector<float>		nans;
   3705 		const deUint32		numElements		= 100;
   3706 
   3707 		nans.reserve(numElements);
   3708 
   3709 		for (size_t idx = 0; idx < numElements; ++idx)
   3710 		{
   3711 			if (idx % 2 == 0)
   3712 			{
   3713 				nans.push_back(std::numeric_limits<float>::quiet_NaN());
   3714 			}
   3715 			else
   3716 			{
   3717 				nans.push_back(-std::numeric_limits<float>::quiet_NaN());
   3718 			}
   3719 		}
   3720 
   3721 		spec.assembly = shader;
   3722 		spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
   3723 		spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
   3724 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3725 		spec.verifyIO = &compareNan;
   3726 
   3727 		group->addChild(new SpvAsmComputeShaderCase(
   3728 			testCtx, "propagated_nans", "Check that nans are propagated", spec));
   3729 	}
   3730 
   3731 	{
   3732 		ComputeShaderSpec	spec;
   3733 		vector<float>		small;
   3734 		vector<float>		zeros;
   3735 		const deUint32		numElements		= 100;
   3736 
   3737 		small.reserve(numElements);
   3738 		zeros.reserve(numElements);
   3739 
   3740 		for (size_t idx = 0; idx < numElements; ++idx)
   3741 		{
   3742 			switch(idx % 6)
   3743 			{
   3744 				case 0:
   3745 					small.push_back(0.f);
   3746 					zeros.push_back(0.f);
   3747 					break;
   3748 				case 1:
   3749 					small.push_back(-0.f);
   3750 					zeros.push_back(-0.f);
   3751 					break;
   3752 				case 2:
   3753 					small.push_back(std::ldexp(1.0f, -16));
   3754 					zeros.push_back(0.f);
   3755 					break;
   3756 				case 3:
   3757 					small.push_back(std::ldexp(-1.0f, -32));
   3758 					zeros.push_back(-0.f);
   3759 					break;
   3760 				case 4:
   3761 					small.push_back(std::ldexp(1.0f, -127));
   3762 					zeros.push_back(0.f);
   3763 					break;
   3764 				case 5:
   3765 					small.push_back(-std::ldexp(1.0f, -128));
   3766 					zeros.push_back(-0.f);
   3767 					break;
   3768 			}
   3769 		}
   3770 
   3771 		spec.assembly = shader;
   3772 		spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
   3773 		spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
   3774 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3775 
   3776 		group->addChild(new SpvAsmComputeShaderCase(
   3777 			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
   3778 	}
   3779 
   3780 	{
   3781 		ComputeShaderSpec	spec;
   3782 		vector<float>		exact;
   3783 		const deUint32		numElements		= 200;
   3784 
   3785 		exact.reserve(numElements);
   3786 
   3787 		for (size_t idx = 0; idx < numElements; ++idx)
   3788 			exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
   3789 
   3790 		spec.assembly = shader;
   3791 		spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
   3792 		spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
   3793 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3794 
   3795 		group->addChild(new SpvAsmComputeShaderCase(
   3796 			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
   3797 	}
   3798 
   3799 	{
   3800 		ComputeShaderSpec	spec;
   3801 		vector<float>		inputs;
   3802 		const deUint32		numElements		= 4;
   3803 
   3804 		inputs.push_back(constructNormalizedFloat(8,	0x300300));
   3805 		inputs.push_back(-constructNormalizedFloat(-7,	0x600800));
   3806 		inputs.push_back(constructNormalizedFloat(2,	0x01E000));
   3807 		inputs.push_back(constructNormalizedFloat(1,	0xFFE000));
   3808 
   3809 		spec.assembly = shader;
   3810 		spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
   3811 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
   3812 		spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
   3813 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   3814 
   3815 		group->addChild(new SpvAsmComputeShaderCase(
   3816 			testCtx, "rounded", "Check that are rounded when needed", spec));
   3817 	}
   3818 
   3819 	return group.release();
   3820 }
   3821 
   3822 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
   3823 {
   3824 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
   3825 
   3826 	const std::string shader (
   3827 		string(getComputeAsmShaderPreamble()) +
   3828 
   3829 		"OpName %main           \"main\"\n"
   3830 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   3831 
   3832 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   3833 
   3834 		"OpDecorate %sc_0  SpecId 0\n"
   3835 		"OpDecorate %sc_1  SpecId 1\n"
   3836 		"OpDecorate %sc_2  SpecId 2\n"
   3837 		"OpDecorate %sc_3  SpecId 3\n"
   3838 		"OpDecorate %sc_4  SpecId 4\n"
   3839 		"OpDecorate %sc_5  SpecId 5\n"
   3840 
   3841 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   3842 
   3843 		"%id        = OpVariable %uvec3ptr Input\n"
   3844 		"%zero      = OpConstant %i32 0\n"
   3845 		"%c_u32_6   = OpConstant %u32 6\n"
   3846 
   3847 		"%sc_0      = OpSpecConstant %f32 0.\n"
   3848 		"%sc_1      = OpSpecConstant %f32 0.\n"
   3849 		"%sc_2      = OpSpecConstant %f32 0.\n"
   3850 		"%sc_3      = OpSpecConstant %f32 0.\n"
   3851 		"%sc_4      = OpSpecConstant %f32 0.\n"
   3852 		"%sc_5      = OpSpecConstant %f32 0.\n"
   3853 
   3854 		"%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
   3855 		"%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
   3856 		"%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
   3857 		"%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
   3858 		"%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
   3859 		"%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
   3860 
   3861 		"%main      = OpFunction %void None %voidf\n"
   3862 		"%label     = OpLabel\n"
   3863 		"%idval     = OpLoad %uvec3 %id\n"
   3864 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   3865 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   3866 		"%selector  = OpUMod %u32 %x %c_u32_6\n"
   3867 		"            OpSelectionMerge %exit None\n"
   3868 		"            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
   3869 
   3870 		"%case0     = OpLabel\n"
   3871 		"             OpStore %outloc %sc_0_quant\n"
   3872 		"             OpBranch %exit\n"
   3873 
   3874 		"%case1     = OpLabel\n"
   3875 		"             OpStore %outloc %sc_1_quant\n"
   3876 		"             OpBranch %exit\n"
   3877 
   3878 		"%case2     = OpLabel\n"
   3879 		"             OpStore %outloc %sc_2_quant\n"
   3880 		"             OpBranch %exit\n"
   3881 
   3882 		"%case3     = OpLabel\n"
   3883 		"             OpStore %outloc %sc_3_quant\n"
   3884 		"             OpBranch %exit\n"
   3885 
   3886 		"%case4     = OpLabel\n"
   3887 		"             OpStore %outloc %sc_4_quant\n"
   3888 		"             OpBranch %exit\n"
   3889 
   3890 		"%case5     = OpLabel\n"
   3891 		"             OpStore %outloc %sc_5_quant\n"
   3892 		"             OpBranch %exit\n"
   3893 
   3894 		"%exit      = OpLabel\n"
   3895 		"             OpReturn\n"
   3896 
   3897 		"             OpFunctionEnd\n");
   3898 
   3899 	{
   3900 		ComputeShaderSpec	spec;
   3901 		const deUint8		numCases	= 4;
   3902 		vector<float>		inputs		(numCases, 0.f);
   3903 		vector<float>		outputs;
   3904 
   3905 		spec.assembly		= shader;
   3906 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
   3907 
   3908 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
   3909 		spec.specConstants.push_back(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
   3910 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
   3911 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
   3912 
   3913 		outputs.push_back(std::numeric_limits<float>::infinity());
   3914 		outputs.push_back(-std::numeric_limits<float>::infinity());
   3915 		outputs.push_back(std::numeric_limits<float>::infinity());
   3916 		outputs.push_back(-std::numeric_limits<float>::infinity());
   3917 
   3918 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
   3919 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
   3920 
   3921 		group->addChild(new SpvAsmComputeShaderCase(
   3922 			testCtx, "infinities", "Check that infinities propagated and created", spec));
   3923 	}
   3924 
   3925 	{
   3926 		ComputeShaderSpec	spec;
   3927 		const deUint8		numCases	= 2;
   3928 		vector<float>		inputs		(numCases, 0.f);
   3929 		vector<float>		outputs;
   3930 
   3931 		spec.assembly		= shader;
   3932 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
   3933 		spec.verifyIO		= &compareNan;
   3934 
   3935 		outputs.push_back(std::numeric_limits<float>::quiet_NaN());
   3936 		outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
   3937 
   3938 		for (deUint8 idx = 0; idx < numCases; ++idx)
   3939 			spec.specConstants.push_back(bitwiseCast<deUint32>(outputs[idx]));
   3940 
   3941 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
   3942 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
   3943 
   3944 		group->addChild(new SpvAsmComputeShaderCase(
   3945 			testCtx, "propagated_nans", "Check that nans are propagated", spec));
   3946 	}
   3947 
   3948 	{
   3949 		ComputeShaderSpec	spec;
   3950 		const deUint8		numCases	= 6;
   3951 		vector<float>		inputs		(numCases, 0.f);
   3952 		vector<float>		outputs;
   3953 
   3954 		spec.assembly		= shader;
   3955 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
   3956 
   3957 		spec.specConstants.push_back(bitwiseCast<deUint32>(0.f));
   3958 		spec.specConstants.push_back(bitwiseCast<deUint32>(-0.f));
   3959 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
   3960 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
   3961 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
   3962 		spec.specConstants.push_back(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
   3963 
   3964 		outputs.push_back(0.f);
   3965 		outputs.push_back(-0.f);
   3966 		outputs.push_back(0.f);
   3967 		outputs.push_back(-0.f);
   3968 		outputs.push_back(0.f);
   3969 		outputs.push_back(-0.f);
   3970 
   3971 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
   3972 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
   3973 
   3974 		group->addChild(new SpvAsmComputeShaderCase(
   3975 			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
   3976 	}
   3977 
   3978 	{
   3979 		ComputeShaderSpec	spec;
   3980 		const deUint8		numCases	= 6;
   3981 		vector<float>		inputs		(numCases, 0.f);
   3982 		vector<float>		outputs;
   3983 
   3984 		spec.assembly		= shader;
   3985 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
   3986 
   3987 		for (deUint8 idx = 0; idx < 6; ++idx)
   3988 		{
   3989 			const float f = static_cast<float>(idx * 10 - 30) / 4.f;
   3990 			spec.specConstants.push_back(bitwiseCast<deUint32>(f));
   3991 			outputs.push_back(f);
   3992 		}
   3993 
   3994 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
   3995 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
   3996 
   3997 		group->addChild(new SpvAsmComputeShaderCase(
   3998 			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
   3999 	}
   4000 
   4001 	{
   4002 		ComputeShaderSpec	spec;
   4003 		const deUint8		numCases	= 4;
   4004 		vector<float>		inputs		(numCases, 0.f);
   4005 		vector<float>		outputs;
   4006 
   4007 		spec.assembly		= shader;
   4008 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
   4009 		spec.verifyIO		= &compareOpQuantizeF16ComputeExactCase;
   4010 
   4011 		outputs.push_back(constructNormalizedFloat(8, 0x300300));
   4012 		outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
   4013 		outputs.push_back(constructNormalizedFloat(2, 0x01E000));
   4014 		outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
   4015 
   4016 		for (deUint8 idx = 0; idx < numCases; ++idx)
   4017 			spec.specConstants.push_back(bitwiseCast<deUint32>(outputs[idx]));
   4018 
   4019 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
   4020 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
   4021 
   4022 		group->addChild(new SpvAsmComputeShaderCase(
   4023 			testCtx, "rounded", "Check that are rounded when needed", spec));
   4024 	}
   4025 
   4026 	return group.release();
   4027 }
   4028 
   4029 // Checks that constant null/composite values can be used in computation.
   4030 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
   4031 {
   4032 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
   4033 	ComputeShaderSpec				spec;
   4034 	de::Random						rnd				(deStringHash(group->getName()));
   4035 	const int						numElements		= 100;
   4036 	vector<float>					positiveFloats	(numElements, 0);
   4037 	vector<float>					negativeFloats	(numElements, 0);
   4038 
   4039 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
   4040 
   4041 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   4042 		negativeFloats[ndx] = -positiveFloats[ndx];
   4043 
   4044 	spec.assembly =
   4045 		"OpCapability Shader\n"
   4046 		"%std450 = OpExtInstImport \"GLSL.std.450\"\n"
   4047 		"OpMemoryModel Logical GLSL450\n"
   4048 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   4049 		"OpExecutionMode %main LocalSize 1 1 1\n"
   4050 
   4051 		"OpSource GLSL 430\n"
   4052 		"OpName %main           \"main\"\n"
   4053 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   4054 
   4055 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   4056 
   4057 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   4058 
   4059 		"%fmat      = OpTypeMatrix %fvec3 3\n"
   4060 		"%ten       = OpConstant %u32 10\n"
   4061 		"%f32arr10  = OpTypeArray %f32 %ten\n"
   4062 		"%fst       = OpTypeStruct %f32 %f32\n"
   4063 
   4064 		+ string(getComputeAsmInputOutputBuffer()) +
   4065 
   4066 		"%id        = OpVariable %uvec3ptr Input\n"
   4067 		"%zero      = OpConstant %i32 0\n"
   4068 
   4069 		// Create a bunch of null values
   4070 		"%unull     = OpConstantNull %u32\n"
   4071 		"%fnull     = OpConstantNull %f32\n"
   4072 		"%vnull     = OpConstantNull %fvec3\n"
   4073 		"%mnull     = OpConstantNull %fmat\n"
   4074 		"%anull     = OpConstantNull %f32arr10\n"
   4075 		"%snull     = OpConstantComposite %fst %fnull %fnull\n"
   4076 
   4077 		"%main      = OpFunction %void None %voidf\n"
   4078 		"%label     = OpLabel\n"
   4079 		"%idval     = OpLoad %uvec3 %id\n"
   4080 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   4081 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
   4082 		"%inval     = OpLoad %f32 %inloc\n"
   4083 		"%neg       = OpFNegate %f32 %inval\n"
   4084 
   4085 		// Get the abs() of (a certain element of) those null values
   4086 		"%unull_cov = OpConvertUToF %f32 %unull\n"
   4087 		"%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
   4088 		"%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
   4089 		"%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
   4090 		"%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
   4091 		"%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
   4092 		"%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
   4093 		"%anull_3   = OpCompositeExtract %f32 %anull 3\n"
   4094 		"%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
   4095 		"%snull_1   = OpCompositeExtract %f32 %snull 1\n"
   4096 		"%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
   4097 
   4098 		// Add them all
   4099 		"%add1      = OpFAdd %f32 %neg  %unull_abs\n"
   4100 		"%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
   4101 		"%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
   4102 		"%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
   4103 		"%add5      = OpFAdd %f32 %add4 %anull_abs\n"
   4104 		"%final     = OpFAdd %f32 %add5 %snull_abs\n"
   4105 
   4106 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   4107 		"             OpStore %outloc %final\n" // write to output
   4108 		"             OpReturn\n"
   4109 		"             OpFunctionEnd\n";
   4110 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
   4111 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
   4112 	spec.numWorkGroups = IVec3(numElements, 1, 1);
   4113 
   4114 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
   4115 
   4116 	return group.release();
   4117 }
   4118 
   4119 // Assembly code used for testing loop control is based on GLSL source code:
   4120 // #version 430
   4121 //
   4122 // layout(std140, set = 0, binding = 0) readonly buffer Input {
   4123 //   float elements[];
   4124 // } input_data;
   4125 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
   4126 //   float elements[];
   4127 // } output_data;
   4128 //
   4129 // void main() {
   4130 //   uint x = gl_GlobalInvocationID.x;
   4131 //   output_data.elements[x] = input_data.elements[x];
   4132 //   for (uint i = 0; i < 4; ++i)
   4133 //     output_data.elements[x] += 1.f;
   4134 // }
   4135 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
   4136 {
   4137 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
   4138 	vector<CaseParameter>			cases;
   4139 	de::Random						rnd				(deStringHash(group->getName()));
   4140 	const int						numElements		= 100;
   4141 	vector<float>					inputFloats		(numElements, 0);
   4142 	vector<float>					outputFloats	(numElements, 0);
   4143 	const StringTemplate			shaderTemplate	(
   4144 		string(getComputeAsmShaderPreamble()) +
   4145 
   4146 		"OpSource GLSL 430\n"
   4147 		"OpName %main \"main\"\n"
   4148 		"OpName %id \"gl_GlobalInvocationID\"\n"
   4149 
   4150 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   4151 
   4152 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   4153 
   4154 		"%u32ptr      = OpTypePointer Function %u32\n"
   4155 
   4156 		"%id          = OpVariable %uvec3ptr Input\n"
   4157 		"%zero        = OpConstant %i32 0\n"
   4158 		"%uzero       = OpConstant %u32 0\n"
   4159 		"%one         = OpConstant %i32 1\n"
   4160 		"%constf1     = OpConstant %f32 1.0\n"
   4161 		"%four        = OpConstant %u32 4\n"
   4162 
   4163 		"%main        = OpFunction %void None %voidf\n"
   4164 		"%entry       = OpLabel\n"
   4165 		"%i           = OpVariable %u32ptr Function\n"
   4166 		"               OpStore %i %uzero\n"
   4167 
   4168 		"%idval       = OpLoad %uvec3 %id\n"
   4169 		"%x           = OpCompositeExtract %u32 %idval 0\n"
   4170 		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
   4171 		"%inval       = OpLoad %f32 %inloc\n"
   4172 		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
   4173 		"               OpStore %outloc %inval\n"
   4174 		"               OpBranch %loop_entry\n"
   4175 
   4176 		"%loop_entry  = OpLabel\n"
   4177 		"%i_val       = OpLoad %u32 %i\n"
   4178 		"%cmp_lt      = OpULessThan %bool %i_val %four\n"
   4179 		"               OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
   4180 		"               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
   4181 		"%loop_body   = OpLabel\n"
   4182 		"%outval      = OpLoad %f32 %outloc\n"
   4183 		"%addf1       = OpFAdd %f32 %outval %constf1\n"
   4184 		"               OpStore %outloc %addf1\n"
   4185 		"%new_i       = OpIAdd %u32 %i_val %one\n"
   4186 		"               OpStore %i %new_i\n"
   4187 		"               OpBranch %loop_entry\n"
   4188 		"%loop_merge  = OpLabel\n"
   4189 		"               OpReturn\n"
   4190 		"               OpFunctionEnd\n");
   4191 
   4192 	cases.push_back(CaseParameter("none",				"None"));
   4193 	cases.push_back(CaseParameter("unroll",				"Unroll"));
   4194 	cases.push_back(CaseParameter("dont_unroll",		"DontUnroll"));
   4195 	cases.push_back(CaseParameter("unroll_dont_unroll",	"Unroll|DontUnroll"));
   4196 
   4197 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
   4198 
   4199 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   4200 		outputFloats[ndx] = inputFloats[ndx] + 4.f;
   4201 
   4202 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   4203 	{
   4204 		map<string, string>		specializations;
   4205 		ComputeShaderSpec		spec;
   4206 
   4207 		specializations["CONTROL"] = cases[caseNdx].param;
   4208 		spec.assembly = shaderTemplate.specialize(specializations);
   4209 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   4210 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   4211 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   4212 
   4213 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   4214 	}
   4215 
   4216 	group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length", "dependency_length"));
   4217 	group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite", "dependency_infinite"));
   4218 
   4219 	return group.release();
   4220 }
   4221 
   4222 // Assembly code used for testing selection control is based on GLSL source code:
   4223 // #version 430
   4224 //
   4225 // layout(std140, set = 0, binding = 0) readonly buffer Input {
   4226 //   float elements[];
   4227 // } input_data;
   4228 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
   4229 //   float elements[];
   4230 // } output_data;
   4231 //
   4232 // void main() {
   4233 //   uint x = gl_GlobalInvocationID.x;
   4234 //   float val = input_data.elements[x];
   4235 //   if (val > 10.f)
   4236 //     output_data.elements[x] = val + 1.f;
   4237 //   else
   4238 //     output_data.elements[x] = val - 1.f;
   4239 // }
   4240 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
   4241 {
   4242 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
   4243 	vector<CaseParameter>			cases;
   4244 	de::Random						rnd				(deStringHash(group->getName()));
   4245 	const int						numElements		= 100;
   4246 	vector<float>					inputFloats		(numElements, 0);
   4247 	vector<float>					outputFloats	(numElements, 0);
   4248 	const StringTemplate			shaderTemplate	(
   4249 		string(getComputeAsmShaderPreamble()) +
   4250 
   4251 		"OpSource GLSL 430\n"
   4252 		"OpName %main \"main\"\n"
   4253 		"OpName %id \"gl_GlobalInvocationID\"\n"
   4254 
   4255 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   4256 
   4257 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   4258 
   4259 		"%id       = OpVariable %uvec3ptr Input\n"
   4260 		"%zero     = OpConstant %i32 0\n"
   4261 		"%constf1  = OpConstant %f32 1.0\n"
   4262 		"%constf10 = OpConstant %f32 10.0\n"
   4263 
   4264 		"%main     = OpFunction %void None %voidf\n"
   4265 		"%entry    = OpLabel\n"
   4266 		"%idval    = OpLoad %uvec3 %id\n"
   4267 		"%x        = OpCompositeExtract %u32 %idval 0\n"
   4268 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
   4269 		"%inval    = OpLoad %f32 %inloc\n"
   4270 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
   4271 		"%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
   4272 
   4273 		"            OpSelectionMerge %if_end ${CONTROL}\n"
   4274 		"            OpBranchConditional %cmp_gt %if_true %if_false\n"
   4275 		"%if_true  = OpLabel\n"
   4276 		"%addf1    = OpFAdd %f32 %inval %constf1\n"
   4277 		"            OpStore %outloc %addf1\n"
   4278 		"            OpBranch %if_end\n"
   4279 		"%if_false = OpLabel\n"
   4280 		"%subf1    = OpFSub %f32 %inval %constf1\n"
   4281 		"            OpStore %outloc %subf1\n"
   4282 		"            OpBranch %if_end\n"
   4283 		"%if_end   = OpLabel\n"
   4284 		"            OpReturn\n"
   4285 		"            OpFunctionEnd\n");
   4286 
   4287 	cases.push_back(CaseParameter("none",					"None"));
   4288 	cases.push_back(CaseParameter("flatten",				"Flatten"));
   4289 	cases.push_back(CaseParameter("dont_flatten",			"DontFlatten"));
   4290 	cases.push_back(CaseParameter("flatten_dont_flatten",	"DontFlatten|Flatten"));
   4291 
   4292 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
   4293 
   4294 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
   4295 	floorAll(inputFloats);
   4296 
   4297 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   4298 		outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
   4299 
   4300 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   4301 	{
   4302 		map<string, string>		specializations;
   4303 		ComputeShaderSpec		spec;
   4304 
   4305 		specializations["CONTROL"] = cases[caseNdx].param;
   4306 		spec.assembly = shaderTemplate.specialize(specializations);
   4307 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   4308 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   4309 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   4310 
   4311 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   4312 	}
   4313 
   4314 	return group.release();
   4315 }
   4316 
   4317 // Assembly code used for testing function control is based on GLSL source code:
   4318 //
   4319 // #version 430
   4320 //
   4321 // layout(std140, set = 0, binding = 0) readonly buffer Input {
   4322 //   float elements[];
   4323 // } input_data;
   4324 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
   4325 //   float elements[];
   4326 // } output_data;
   4327 //
   4328 // float const10() { return 10.f; }
   4329 //
   4330 // void main() {
   4331 //   uint x = gl_GlobalInvocationID.x;
   4332 //   output_data.elements[x] = input_data.elements[x] + const10();
   4333 // }
   4334 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
   4335 {
   4336 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
   4337 	vector<CaseParameter>			cases;
   4338 	de::Random						rnd				(deStringHash(group->getName()));
   4339 	const int						numElements		= 100;
   4340 	vector<float>					inputFloats		(numElements, 0);
   4341 	vector<float>					outputFloats	(numElements, 0);
   4342 	const StringTemplate			shaderTemplate	(
   4343 		string(getComputeAsmShaderPreamble()) +
   4344 
   4345 		"OpSource GLSL 430\n"
   4346 		"OpName %main \"main\"\n"
   4347 		"OpName %func_const10 \"const10(\"\n"
   4348 		"OpName %id \"gl_GlobalInvocationID\"\n"
   4349 
   4350 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   4351 
   4352 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   4353 
   4354 		"%f32f = OpTypeFunction %f32\n"
   4355 		"%id = OpVariable %uvec3ptr Input\n"
   4356 		"%zero = OpConstant %i32 0\n"
   4357 		"%constf10 = OpConstant %f32 10.0\n"
   4358 
   4359 		"%main         = OpFunction %void None %voidf\n"
   4360 		"%entry        = OpLabel\n"
   4361 		"%idval        = OpLoad %uvec3 %id\n"
   4362 		"%x            = OpCompositeExtract %u32 %idval 0\n"
   4363 		"%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
   4364 		"%inval        = OpLoad %f32 %inloc\n"
   4365 		"%ret_10       = OpFunctionCall %f32 %func_const10\n"
   4366 		"%fadd         = OpFAdd %f32 %inval %ret_10\n"
   4367 		"%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
   4368 		"                OpStore %outloc %fadd\n"
   4369 		"                OpReturn\n"
   4370 		"                OpFunctionEnd\n"
   4371 
   4372 		"%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
   4373 		"%label        = OpLabel\n"
   4374 		"                OpReturnValue %constf10\n"
   4375 		"                OpFunctionEnd\n");
   4376 
   4377 	cases.push_back(CaseParameter("none",						"None"));
   4378 	cases.push_back(CaseParameter("inline",						"Inline"));
   4379 	cases.push_back(CaseParameter("dont_inline",				"DontInline"));
   4380 	cases.push_back(CaseParameter("pure",						"Pure"));
   4381 	cases.push_back(CaseParameter("const",						"Const"));
   4382 	cases.push_back(CaseParameter("inline_pure",				"Inline|Pure"));
   4383 	cases.push_back(CaseParameter("const_dont_inline",			"Const|DontInline"));
   4384 	cases.push_back(CaseParameter("inline_dont_inline",			"Inline|DontInline"));
   4385 	cases.push_back(CaseParameter("pure_inline_dont_inline",	"Pure|Inline|DontInline"));
   4386 
   4387 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
   4388 
   4389 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
   4390 	floorAll(inputFloats);
   4391 
   4392 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   4393 		outputFloats[ndx] = inputFloats[ndx] + 10.f;
   4394 
   4395 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   4396 	{
   4397 		map<string, string>		specializations;
   4398 		ComputeShaderSpec		spec;
   4399 
   4400 		specializations["CONTROL"] = cases[caseNdx].param;
   4401 		spec.assembly = shaderTemplate.specialize(specializations);
   4402 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   4403 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   4404 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   4405 
   4406 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   4407 	}
   4408 
   4409 	return group.release();
   4410 }
   4411 
   4412 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
   4413 {
   4414 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
   4415 	vector<CaseParameter>			cases;
   4416 	de::Random						rnd				(deStringHash(group->getName()));
   4417 	const int						numElements		= 100;
   4418 	vector<float>					inputFloats		(numElements, 0);
   4419 	vector<float>					outputFloats	(numElements, 0);
   4420 	const StringTemplate			shaderTemplate	(
   4421 		string(getComputeAsmShaderPreamble()) +
   4422 
   4423 		"OpSource GLSL 430\n"
   4424 		"OpName %main           \"main\"\n"
   4425 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   4426 
   4427 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   4428 
   4429 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
   4430 
   4431 		"%f32ptr_f  = OpTypePointer Function %f32\n"
   4432 
   4433 		"%id        = OpVariable %uvec3ptr Input\n"
   4434 		"%zero      = OpConstant %i32 0\n"
   4435 		"%four      = OpConstant %i32 4\n"
   4436 
   4437 		"%main      = OpFunction %void None %voidf\n"
   4438 		"%label     = OpLabel\n"
   4439 		"%copy      = OpVariable %f32ptr_f Function\n"
   4440 		"%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
   4441 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   4442 		"%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
   4443 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   4444 		"             OpCopyMemory %copy %inloc ${ACCESS}\n"
   4445 		"%val1      = OpLoad %f32 %copy\n"
   4446 		"%val2      = OpLoad %f32 %inloc\n"
   4447 		"%add       = OpFAdd %f32 %val1 %val2\n"
   4448 		"             OpStore %outloc %add ${ACCESS}\n"
   4449 		"             OpReturn\n"
   4450 		"             OpFunctionEnd\n");
   4451 
   4452 	cases.push_back(CaseParameter("null",					""));
   4453 	cases.push_back(CaseParameter("none",					"None"));
   4454 	cases.push_back(CaseParameter("volatile",				"Volatile"));
   4455 	cases.push_back(CaseParameter("aligned",				"Aligned 4"));
   4456 	cases.push_back(CaseParameter("nontemporal",			"Nontemporal"));
   4457 	cases.push_back(CaseParameter("aligned_nontemporal",	"Aligned|Nontemporal 4"));
   4458 	cases.push_back(CaseParameter("aligned_volatile",		"Volatile|Aligned 4"));
   4459 
   4460 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
   4461 
   4462 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   4463 		outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
   4464 
   4465 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   4466 	{
   4467 		map<string, string>		specializations;
   4468 		ComputeShaderSpec		spec;
   4469 
   4470 		specializations["ACCESS"] = cases[caseNdx].param;
   4471 		spec.assembly = shaderTemplate.specialize(specializations);
   4472 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
   4473 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
   4474 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   4475 
   4476 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   4477 	}
   4478 
   4479 	return group.release();
   4480 }
   4481 
   4482 // Checks that we can get undefined values for various types, without exercising a computation with it.
   4483 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
   4484 {
   4485 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
   4486 	vector<CaseParameter>			cases;
   4487 	de::Random						rnd				(deStringHash(group->getName()));
   4488 	const int						numElements		= 100;
   4489 	vector<float>					positiveFloats	(numElements, 0);
   4490 	vector<float>					negativeFloats	(numElements, 0);
   4491 	const StringTemplate			shaderTemplate	(
   4492 		string(getComputeAsmShaderPreamble()) +
   4493 
   4494 		"OpSource GLSL 430\n"
   4495 		"OpName %main           \"main\"\n"
   4496 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   4497 
   4498 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   4499 
   4500 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
   4501 		"%uvec2     = OpTypeVector %u32 2\n"
   4502 		"%fvec4     = OpTypeVector %f32 4\n"
   4503 		"%fmat33    = OpTypeMatrix %fvec3 3\n"
   4504 		"%image     = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
   4505 		"%sampler   = OpTypeSampler\n"
   4506 		"%simage    = OpTypeSampledImage %image\n"
   4507 		"%const100  = OpConstant %u32 100\n"
   4508 		"%uarr100   = OpTypeArray %i32 %const100\n"
   4509 		"%struct    = OpTypeStruct %f32 %i32 %u32\n"
   4510 		"%pointer   = OpTypePointer Function %i32\n"
   4511 		+ string(getComputeAsmInputOutputBuffer()) +
   4512 
   4513 		"%id        = OpVariable %uvec3ptr Input\n"
   4514 		"%zero      = OpConstant %i32 0\n"
   4515 
   4516 		"%main      = OpFunction %void None %voidf\n"
   4517 		"%label     = OpLabel\n"
   4518 
   4519 		"%undef     = OpUndef ${TYPE}\n"
   4520 
   4521 		"%idval     = OpLoad %uvec3 %id\n"
   4522 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   4523 
   4524 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
   4525 		"%inval     = OpLoad %f32 %inloc\n"
   4526 		"%neg       = OpFNegate %f32 %inval\n"
   4527 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
   4528 		"             OpStore %outloc %neg\n"
   4529 		"             OpReturn\n"
   4530 		"             OpFunctionEnd\n");
   4531 
   4532 	cases.push_back(CaseParameter("bool",			"%bool"));
   4533 	cases.push_back(CaseParameter("sint32",			"%i32"));
   4534 	cases.push_back(CaseParameter("uint32",			"%u32"));
   4535 	cases.push_back(CaseParameter("float32",		"%f32"));
   4536 	cases.push_back(CaseParameter("vec4float32",	"%fvec4"));
   4537 	cases.push_back(CaseParameter("vec2uint32",		"%uvec2"));
   4538 	cases.push_back(CaseParameter("matrix",			"%fmat33"));
   4539 	cases.push_back(CaseParameter("image",			"%image"));
   4540 	cases.push_back(CaseParameter("sampler",		"%sampler"));
   4541 	cases.push_back(CaseParameter("sampledimage",	"%simage"));
   4542 	cases.push_back(CaseParameter("array",			"%uarr100"));
   4543 	cases.push_back(CaseParameter("runtimearray",	"%f32arr"));
   4544 	cases.push_back(CaseParameter("struct",			"%struct"));
   4545 	cases.push_back(CaseParameter("pointer",		"%pointer"));
   4546 
   4547 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
   4548 
   4549 	for (size_t ndx = 0; ndx < numElements; ++ndx)
   4550 		negativeFloats[ndx] = -positiveFloats[ndx];
   4551 
   4552 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   4553 	{
   4554 		map<string, string>		specializations;
   4555 		ComputeShaderSpec		spec;
   4556 
   4557 		specializations["TYPE"] = cases[caseNdx].param;
   4558 		spec.assembly = shaderTemplate.specialize(specializations);
   4559 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
   4560 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
   4561 		spec.numWorkGroups = IVec3(numElements, 1, 1);
   4562 
   4563 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
   4564 	}
   4565 
   4566 		return group.release();
   4567 }
   4568 } // anonymous
   4569 
   4570 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
   4571 {
   4572 	struct NameCodePair { string name, code; };
   4573 	RGBA							defaultColors[4];
   4574 	de::MovePtr<tcu::TestCaseGroup> opSourceTests			(new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
   4575 	const std::string				opsourceGLSLWithFile	= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
   4576 	map<string, string>				fragments				= passthruFragments();
   4577 	const NameCodePair				tests[]					=
   4578 	{
   4579 		{"unknown", "OpSource Unknown 321"},
   4580 		{"essl", "OpSource ESSL 310"},
   4581 		{"glsl", "OpSource GLSL 450"},
   4582 		{"opencl_cpp", "OpSource OpenCL_CPP 120"},
   4583 		{"opencl_c", "OpSource OpenCL_C 120"},
   4584 		{"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
   4585 		{"file", opsourceGLSLWithFile},
   4586 		{"source", opsourceGLSLWithFile + "\"void main(){}\""},
   4587 		// Longest possible source string: SPIR-V limits instructions to 65535
   4588 		// words, of which the first 4 are opsourceGLSLWithFile; the rest will
   4589 		// contain 65530 UTF8 characters (one word each) plus one last word
   4590 		// containing 3 ASCII characters and \0.
   4591 		{"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
   4592 	};
   4593 
   4594 	getDefaultColors(defaultColors);
   4595 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
   4596 	{
   4597 		fragments["debug"] = tests[testNdx].code;
   4598 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
   4599 	}
   4600 
   4601 	return opSourceTests.release();
   4602 }
   4603 
   4604 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
   4605 {
   4606 	struct NameCodePair { string name, code; };
   4607 	RGBA								defaultColors[4];
   4608 	de::MovePtr<tcu::TestCaseGroup>		opSourceTests		(new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
   4609 	map<string, string>					fragments			= passthruFragments();
   4610 	const std::string					opsource			= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
   4611 	const NameCodePair					tests[]				=
   4612 	{
   4613 		{"empty", opsource + "OpSourceContinued \"\""},
   4614 		{"short", opsource + "OpSourceContinued \"abcde\""},
   4615 		{"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
   4616 		// Longest possible source string: SPIR-V limits instructions to 65535
   4617 		// words, of which the first one is OpSourceContinued/length; the rest
   4618 		// will contain 65533 UTF8 characters (one word each) plus one last word
   4619 		// containing 3 ASCII characters and \0.
   4620 		{"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
   4621 	};
   4622 
   4623 	getDefaultColors(defaultColors);
   4624 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
   4625 	{
   4626 		fragments["debug"] = tests[testNdx].code;
   4627 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
   4628 	}
   4629 
   4630 	return opSourceTests.release();
   4631 }
   4632 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
   4633 {
   4634 	RGBA								 defaultColors[4];
   4635 	de::MovePtr<tcu::TestCaseGroup>		 opLineTests		 (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
   4636 	map<string, string>					 fragments;
   4637 	getDefaultColors(defaultColors);
   4638 	fragments["debug"]			=
   4639 		"%name = OpString \"name\"\n";
   4640 
   4641 	fragments["pre_main"]	=
   4642 		"OpNoLine\n"
   4643 		"OpNoLine\n"
   4644 		"OpLine %name 1 1\n"
   4645 		"OpNoLine\n"
   4646 		"OpLine %name 1 1\n"
   4647 		"OpLine %name 1 1\n"
   4648 		"%second_function = OpFunction %v4f32 None %v4f32_function\n"
   4649 		"OpNoLine\n"
   4650 		"OpLine %name 1 1\n"
   4651 		"OpNoLine\n"
   4652 		"OpLine %name 1 1\n"
   4653 		"OpLine %name 1 1\n"
   4654 		"%second_param1 = OpFunctionParameter %v4f32\n"
   4655 		"OpNoLine\n"
   4656 		"OpNoLine\n"
   4657 		"%label_secondfunction = OpLabel\n"
   4658 		"OpNoLine\n"
   4659 		"OpReturnValue %second_param1\n"
   4660 		"OpFunctionEnd\n"
   4661 		"OpNoLine\n"
   4662 		"OpNoLine\n";
   4663 
   4664 	fragments["testfun"]		=
   4665 		// A %test_code function that returns its argument unchanged.
   4666 		"OpNoLine\n"
   4667 		"OpNoLine\n"
   4668 		"OpLine %name 1 1\n"
   4669 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   4670 		"OpNoLine\n"
   4671 		"%param1 = OpFunctionParameter %v4f32\n"
   4672 		"OpNoLine\n"
   4673 		"OpNoLine\n"
   4674 		"%label_testfun = OpLabel\n"
   4675 		"OpNoLine\n"
   4676 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
   4677 		"OpReturnValue %val1\n"
   4678 		"OpFunctionEnd\n"
   4679 		"OpLine %name 1 1\n"
   4680 		"OpNoLine\n";
   4681 
   4682 	createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
   4683 
   4684 	return opLineTests.release();
   4685 }
   4686 
   4687 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
   4688 {
   4689 	RGBA								defaultColors[4];
   4690 	de::MovePtr<tcu::TestCaseGroup>		opModuleProcessedTests			(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
   4691 	map<string, string>					fragments;
   4692 	std::vector<std::string>			noExtensions;
   4693 	GraphicsResources					resources;
   4694 
   4695 	getDefaultColors(defaultColors);
   4696 	resources.verifyBinary = veryfiBinaryShader;
   4697 	resources.spirvVersion = SPIRV_VERSION_1_3;
   4698 
   4699 	fragments["moduleprocessed"]							=
   4700 		"OpModuleProcessed \"VULKAN CTS\"\n"
   4701 		"OpModuleProcessed \"Negative values\"\n"
   4702 		"OpModuleProcessed \"Date: 2017/09/21\"\n";
   4703 
   4704 	fragments["pre_main"]	=
   4705 		"%second_function = OpFunction %v4f32 None %v4f32_function\n"
   4706 		"%second_param1 = OpFunctionParameter %v4f32\n"
   4707 		"%label_secondfunction = OpLabel\n"
   4708 		"OpReturnValue %second_param1\n"
   4709 		"OpFunctionEnd\n";
   4710 
   4711 	fragments["testfun"]		=
   4712 		// A %test_code function that returns its argument unchanged.
   4713 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   4714 		"%param1 = OpFunctionParameter %v4f32\n"
   4715 		"%label_testfun = OpLabel\n"
   4716 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
   4717 		"OpReturnValue %val1\n"
   4718 		"OpFunctionEnd\n";
   4719 
   4720 	createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
   4721 
   4722 	return opModuleProcessedTests.release();
   4723 }
   4724 
   4725 
   4726 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
   4727 {
   4728 	RGBA													defaultColors[4];
   4729 	de::MovePtr<tcu::TestCaseGroup>							opLineTests			(new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
   4730 	map<string, string>										fragments;
   4731 	std::vector<std::pair<std::string, std::string> >		problemStrings;
   4732 
   4733 	problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
   4734 	problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
   4735 	problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
   4736 	getDefaultColors(defaultColors);
   4737 
   4738 	fragments["debug"]			=
   4739 		"%other_name = OpString \"other_name\"\n";
   4740 
   4741 	fragments["pre_main"]	=
   4742 		"OpLine %file_name 32 0\n"
   4743 		"OpLine %file_name 32 32\n"
   4744 		"OpLine %file_name 32 40\n"
   4745 		"OpLine %other_name 32 40\n"
   4746 		"OpLine %other_name 0 100\n"
   4747 		"OpLine %other_name 0 4294967295\n"
   4748 		"OpLine %other_name 4294967295 0\n"
   4749 		"OpLine %other_name 32 40\n"
   4750 		"OpLine %file_name 0 0\n"
   4751 		"%second_function = OpFunction %v4f32 None %v4f32_function\n"
   4752 		"OpLine %file_name 1 0\n"
   4753 		"%second_param1 = OpFunctionParameter %v4f32\n"
   4754 		"OpLine %file_name 1 3\n"
   4755 		"OpLine %file_name 1 2\n"
   4756 		"%label_secondfunction = OpLabel\n"
   4757 		"OpLine %file_name 0 2\n"
   4758 		"OpReturnValue %second_param1\n"
   4759 		"OpFunctionEnd\n"
   4760 		"OpLine %file_name 0 2\n"
   4761 		"OpLine %file_name 0 2\n";
   4762 
   4763 	fragments["testfun"]		=
   4764 		// A %test_code function that returns its argument unchanged.
   4765 		"OpLine %file_name 1 0\n"
   4766 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   4767 		"OpLine %file_name 16 330\n"
   4768 		"%param1 = OpFunctionParameter %v4f32\n"
   4769 		"OpLine %file_name 14 442\n"
   4770 		"%label_testfun = OpLabel\n"
   4771 		"OpLine %file_name 11 1024\n"
   4772 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
   4773 		"OpLine %file_name 2 97\n"
   4774 		"OpReturnValue %val1\n"
   4775 		"OpFunctionEnd\n"
   4776 		"OpLine %file_name 5 32\n";
   4777 
   4778 	for (size_t i = 0; i < problemStrings.size(); ++i)
   4779 	{
   4780 		map<string, string> testFragments = fragments;
   4781 		testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
   4782 		createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
   4783 	}
   4784 
   4785 	return opLineTests.release();
   4786 }
   4787 
   4788 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
   4789 {
   4790 	de::MovePtr<tcu::TestCaseGroup> opConstantNullTests		(new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
   4791 	RGBA							colors[4];
   4792 
   4793 
   4794 	const char						functionStart[] =
   4795 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   4796 		"%param1 = OpFunctionParameter %v4f32\n"
   4797 		"%lbl    = OpLabel\n";
   4798 
   4799 	const char						functionEnd[]	=
   4800 		"OpReturnValue %transformed_param\n"
   4801 		"OpFunctionEnd\n";
   4802 
   4803 	struct NameConstantsCode
   4804 	{
   4805 		string name;
   4806 		string constants;
   4807 		string code;
   4808 	};
   4809 
   4810 	NameConstantsCode tests[] =
   4811 	{
   4812 		{
   4813 			"vec4",
   4814 			"%cnull = OpConstantNull %v4f32\n",
   4815 			"%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
   4816 		},
   4817 		{
   4818 			"float",
   4819 			"%cnull = OpConstantNull %f32\n",
   4820 			"%vp = OpVariable %fp_v4f32 Function\n"
   4821 			"%v  = OpLoad %v4f32 %vp\n"
   4822 			"%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
   4823 			"%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
   4824 			"%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
   4825 			"%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
   4826 			"%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
   4827 		},
   4828 		{
   4829 			"bool",
   4830 			"%cnull             = OpConstantNull %bool\n",
   4831 			"%v                 = OpVariable %fp_v4f32 Function\n"
   4832 			"                     OpStore %v %param1\n"
   4833 			"                     OpSelectionMerge %false_label None\n"
   4834 			"                     OpBranchConditional %cnull %true_label %false_label\n"
   4835 			"%true_label        = OpLabel\n"
   4836 			"                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
   4837 			"                     OpBranch %false_label\n"
   4838 			"%false_label       = OpLabel\n"
   4839 			"%transformed_param = OpLoad %v4f32 %v\n"
   4840 		},
   4841 		{
   4842 			"i32",
   4843 			"%cnull             = OpConstantNull %i32\n",
   4844 			"%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
   4845 			"%b                 = OpIEqual %bool %cnull %c_i32_0\n"
   4846 			"                     OpSelectionMerge %false_label None\n"
   4847 			"                     OpBranchConditional %b %true_label %false_label\n"
   4848 			"%true_label        = OpLabel\n"
   4849 			"                     OpStore %v %param1\n"
   4850 			"                     OpBranch %false_label\n"
   4851 			"%false_label       = OpLabel\n"
   4852 			"%transformed_param = OpLoad %v4f32 %v\n"
   4853 		},
   4854 		{
   4855 			"struct",
   4856 			"%stype             = OpTypeStruct %f32 %v4f32\n"
   4857 			"%fp_stype          = OpTypePointer Function %stype\n"
   4858 			"%cnull             = OpConstantNull %stype\n",
   4859 			"%v                 = OpVariable %fp_stype Function %cnull\n"
   4860 			"%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
   4861 			"%f_val             = OpLoad %v4f32 %f\n"
   4862 			"%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
   4863 		},
   4864 		{
   4865 			"array",
   4866 			"%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
   4867 			"%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
   4868 			"%cnull             = OpConstantNull %a4_v4f32\n",
   4869 			"%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
   4870 			"%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
   4871 			"%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
   4872 			"%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
   4873 			"%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
   4874 			"%f_val             = OpLoad %v4f32 %f\n"
   4875 			"%f1_val            = OpLoad %v4f32 %f1\n"
   4876 			"%f2_val            = OpLoad %v4f32 %f2\n"
   4877 			"%f3_val            = OpLoad %v4f32 %f3\n"
   4878 			"%t0                = OpFAdd %v4f32 %param1 %f_val\n"
   4879 			"%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
   4880 			"%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
   4881 			"%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
   4882 		},
   4883 		{
   4884 			"matrix",
   4885 			"%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
   4886 			"%cnull             = OpConstantNull %mat4x4_f32\n",
   4887 			// Our null matrix * any vector should result in a zero vector.
   4888 			"%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
   4889 			"%transformed_param = OpFAdd %v4f32 %param1 %v\n"
   4890 		}
   4891 	};
   4892 
   4893 	getHalfColorsFullAlpha(colors);
   4894 
   4895 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
   4896 	{
   4897 		map<string, string> fragments;
   4898 		fragments["pre_main"] = tests[testNdx].constants;
   4899 		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
   4900 		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
   4901 	}
   4902 	return opConstantNullTests.release();
   4903 }
   4904 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
   4905 {
   4906 	de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests		(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
   4907 	RGBA							inputColors[4];
   4908 	RGBA							outputColors[4];
   4909 
   4910 
   4911 	const char						functionStart[]	 =
   4912 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   4913 		"%param1 = OpFunctionParameter %v4f32\n"
   4914 		"%lbl    = OpLabel\n";
   4915 
   4916 	const char						functionEnd[]		=
   4917 		"OpReturnValue %transformed_param\n"
   4918 		"OpFunctionEnd\n";
   4919 
   4920 	struct NameConstantsCode
   4921 	{
   4922 		string name;
   4923 		string constants;
   4924 		string code;
   4925 	};
   4926 
   4927 	NameConstantsCode tests[] =
   4928 	{
   4929 		{
   4930 			"vec4",
   4931 
   4932 			"%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
   4933 			"%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
   4934 		},
   4935 		{
   4936 			"struct",
   4937 
   4938 			"%stype             = OpTypeStruct %v4f32 %f32\n"
   4939 			"%fp_stype          = OpTypePointer Function %stype\n"
   4940 			"%f32_n_1           = OpConstant %f32 -1.0\n"
   4941 			"%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
   4942 			"%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
   4943 			"%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
   4944 
   4945 			"%v                 = OpVariable %fp_stype Function %cval\n"
   4946 			"%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
   4947 			"%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
   4948 			"%vec_val           = OpLoad %v4f32 %vec_ptr\n"
   4949 			"%f32_val           = OpLoad %f32 %f32_ptr\n"
   4950 			"%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
   4951 			"%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
   4952 			"%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
   4953 		},
   4954 		{
   4955 			// [1|0|0|0.5] [x] = x + 0.5
   4956 			// [0|1|0|0.5] [y] = y + 0.5
   4957 			// [0|0|1|0.5] [z] = z + 0.5
   4958 			// [0|0|0|1  ] [1] = 1
   4959 			"matrix",
   4960 
   4961 			"%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
   4962 		    "%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
   4963 		    "%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
   4964 		    "%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
   4965 		    "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
   4966 			"%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
   4967 
   4968 			"%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"
   4969 		},
   4970 		{
   4971 			"array",
   4972 
   4973 			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
   4974 			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
   4975 			"%f32_n_1             = OpConstant %f32 -1.0\n"
   4976 			"%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
   4977 			"%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
   4978 
   4979 			"%v                   = OpVariable %fp_a4f32 Function %carr\n"
   4980 			"%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
   4981 			"%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
   4982 			"%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
   4983 			"%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
   4984 			"%f_val               = OpLoad %f32 %f\n"
   4985 			"%f1_val              = OpLoad %f32 %f1\n"
   4986 			"%f2_val              = OpLoad %f32 %f2\n"
   4987 			"%f3_val              = OpLoad %f32 %f3\n"
   4988 			"%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
   4989 			"%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
   4990 			"%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
   4991 			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
   4992 			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
   4993 		},
   4994 		{
   4995 			//
   4996 			// [
   4997 			//   {
   4998 			//      0.0,
   4999 			//      [ 1.0, 1.0, 1.0, 1.0]
   5000 			//   },
   5001 			//   {
   5002 			//      1.0,
   5003 			//      [ 0.0, 0.5, 0.0, 0.0]
   5004 			//   }, //     ^^^
   5005 			//   {
   5006 			//      0.0,
   5007 			//      [ 1.0, 1.0, 1.0, 1.0]
   5008 			//   }
   5009 			// ]
   5010 			"array_of_struct_of_array",
   5011 
   5012 			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
   5013 			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
   5014 			"%stype               = OpTypeStruct %f32 %a4f32\n"
   5015 			"%a3stype             = OpTypeArray %stype %c_u32_3\n"
   5016 			"%fp_a3stype          = OpTypePointer Function %a3stype\n"
   5017 			"%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
   5018 			"%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
   5019 			"%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
   5020 			"%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
   5021 			"%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
   5022 
   5023 			"%v                   = OpVariable %fp_a3stype Function %carr\n"
   5024 			"%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
   5025 			"%f_l                 = OpLoad %f32 %f\n"
   5026 			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
   5027 			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
   5028 		}
   5029 	};
   5030 
   5031 	getHalfColorsFullAlpha(inputColors);
   5032 	outputColors[0] = RGBA(255, 255, 255, 255);
   5033 	outputColors[1] = RGBA(255, 127, 127, 255);
   5034 	outputColors[2] = RGBA(127, 255, 127, 255);
   5035 	outputColors[3] = RGBA(127, 127, 255, 255);
   5036 
   5037 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
   5038 	{
   5039 		map<string, string> fragments;
   5040 		fragments["pre_main"] = tests[testNdx].constants;
   5041 		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
   5042 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
   5043 	}
   5044 	return opConstantCompositeTests.release();
   5045 }
   5046 
   5047 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
   5048 {
   5049 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
   5050 	RGBA							inputColors[4];
   5051 	RGBA							outputColors[4];
   5052 	map<string, string>				fragments;
   5053 
   5054 	// vec4 test_code(vec4 param) {
   5055 	//   vec4 result = param;
   5056 	//   for (int i = 0; i < 4; ++i) {
   5057 	//     if (i == 0) result[i] = 0.;
   5058 	//     else        result[i] = 1. - result[i];
   5059 	//   }
   5060 	//   return result;
   5061 	// }
   5062 	const char						function[]			=
   5063 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5064 		"%param1    = OpFunctionParameter %v4f32\n"
   5065 		"%lbl       = OpLabel\n"
   5066 		"%iptr      = OpVariable %fp_i32 Function\n"
   5067 		"%result    = OpVariable %fp_v4f32 Function\n"
   5068 		"             OpStore %iptr %c_i32_0\n"
   5069 		"             OpStore %result %param1\n"
   5070 		"             OpBranch %loop\n"
   5071 
   5072 		// Loop entry block.
   5073 		"%loop      = OpLabel\n"
   5074 		"%ival      = OpLoad %i32 %iptr\n"
   5075 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
   5076 		"             OpLoopMerge %exit %if_entry None\n"
   5077 		"             OpBranchConditional %lt_4 %if_entry %exit\n"
   5078 
   5079 		// Merge block for loop.
   5080 		"%exit      = OpLabel\n"
   5081 		"%ret       = OpLoad %v4f32 %result\n"
   5082 		"             OpReturnValue %ret\n"
   5083 
   5084 		// If-statement entry block.
   5085 		"%if_entry  = OpLabel\n"
   5086 		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
   5087 		"%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
   5088 		"             OpSelectionMerge %if_exit None\n"
   5089 		"             OpBranchConditional %eq_0 %if_true %if_false\n"
   5090 
   5091 		// False branch for if-statement.
   5092 		"%if_false  = OpLabel\n"
   5093 		"%val       = OpLoad %f32 %loc\n"
   5094 		"%sub       = OpFSub %f32 %c_f32_1 %val\n"
   5095 		"             OpStore %loc %sub\n"
   5096 		"             OpBranch %if_exit\n"
   5097 
   5098 		// Merge block for if-statement.
   5099 		"%if_exit   = OpLabel\n"
   5100 		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
   5101 		"             OpStore %iptr %ival_next\n"
   5102 		"             OpBranch %loop\n"
   5103 
   5104 		// True branch for if-statement.
   5105 		"%if_true   = OpLabel\n"
   5106 		"             OpStore %loc %c_f32_0\n"
   5107 		"             OpBranch %if_exit\n"
   5108 
   5109 		"             OpFunctionEnd\n";
   5110 
   5111 	fragments["testfun"]	= function;
   5112 
   5113 	inputColors[0]			= RGBA(127, 127, 127, 0);
   5114 	inputColors[1]			= RGBA(127, 0,   0,   0);
   5115 	inputColors[2]			= RGBA(0,   127, 0,   0);
   5116 	inputColors[3]			= RGBA(0,   0,   127, 0);
   5117 
   5118 	outputColors[0]			= RGBA(0, 128, 128, 255);
   5119 	outputColors[1]			= RGBA(0, 255, 255, 255);
   5120 	outputColors[2]			= RGBA(0, 128, 255, 255);
   5121 	outputColors[3]			= RGBA(0, 255, 128, 255);
   5122 
   5123 	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
   5124 
   5125 	return group.release();
   5126 }
   5127 
   5128 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
   5129 {
   5130 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
   5131 	RGBA							inputColors[4];
   5132 	RGBA							outputColors[4];
   5133 	map<string, string>				fragments;
   5134 
   5135 	const char						typesAndConstants[]	=
   5136 		"%c_f32_p2  = OpConstant %f32 0.2\n"
   5137 		"%c_f32_p4  = OpConstant %f32 0.4\n"
   5138 		"%c_f32_p6  = OpConstant %f32 0.6\n"
   5139 		"%c_f32_p8  = OpConstant %f32 0.8\n";
   5140 
   5141 	// vec4 test_code(vec4 param) {
   5142 	//   vec4 result = param;
   5143 	//   for (int i = 0; i < 4; ++i) {
   5144 	//     switch (i) {
   5145 	//       case 0: result[i] += .2; break;
   5146 	//       case 1: result[i] += .6; break;
   5147 	//       case 2: result[i] += .4; break;
   5148 	//       case 3: result[i] += .8; break;
   5149 	//       default: break; // unreachable
   5150 	//     }
   5151 	//   }
   5152 	//   return result;
   5153 	// }
   5154 	const char						function[]			=
   5155 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5156 		"%param1    = OpFunctionParameter %v4f32\n"
   5157 		"%lbl       = OpLabel\n"
   5158 		"%iptr      = OpVariable %fp_i32 Function\n"
   5159 		"%result    = OpVariable %fp_v4f32 Function\n"
   5160 		"             OpStore %iptr %c_i32_0\n"
   5161 		"             OpStore %result %param1\n"
   5162 		"             OpBranch %loop\n"
   5163 
   5164 		// Loop entry block.
   5165 		"%loop      = OpLabel\n"
   5166 		"%ival      = OpLoad %i32 %iptr\n"
   5167 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
   5168 		"             OpLoopMerge %exit %switch_exit None\n"
   5169 		"             OpBranchConditional %lt_4 %switch_entry %exit\n"
   5170 
   5171 		// Merge block for loop.
   5172 		"%exit      = OpLabel\n"
   5173 		"%ret       = OpLoad %v4f32 %result\n"
   5174 		"             OpReturnValue %ret\n"
   5175 
   5176 		// Switch-statement entry block.
   5177 		"%switch_entry   = OpLabel\n"
   5178 		"%loc            = OpAccessChain %fp_f32 %result %ival\n"
   5179 		"%val            = OpLoad %f32 %loc\n"
   5180 		"                  OpSelectionMerge %switch_exit None\n"
   5181 		"                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
   5182 
   5183 		"%case2          = OpLabel\n"
   5184 		"%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
   5185 		"                  OpStore %loc %addp4\n"
   5186 		"                  OpBranch %switch_exit\n"
   5187 
   5188 		"%switch_default = OpLabel\n"
   5189 		"                  OpUnreachable\n"
   5190 
   5191 		"%case3          = OpLabel\n"
   5192 		"%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
   5193 		"                  OpStore %loc %addp8\n"
   5194 		"                  OpBranch %switch_exit\n"
   5195 
   5196 		"%case0          = OpLabel\n"
   5197 		"%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
   5198 		"                  OpStore %loc %addp2\n"
   5199 		"                  OpBranch %switch_exit\n"
   5200 
   5201 		// Merge block for switch-statement.
   5202 		"%switch_exit    = OpLabel\n"
   5203 		"%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
   5204 		"                  OpStore %iptr %ival_next\n"
   5205 		"                  OpBranch %loop\n"
   5206 
   5207 		"%case1          = OpLabel\n"
   5208 		"%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
   5209 		"                  OpStore %loc %addp6\n"
   5210 		"                  OpBranch %switch_exit\n"
   5211 
   5212 		"                  OpFunctionEnd\n";
   5213 
   5214 	fragments["pre_main"]	= typesAndConstants;
   5215 	fragments["testfun"]	= function;
   5216 
   5217 	inputColors[0]			= RGBA(127, 27,  127, 51);
   5218 	inputColors[1]			= RGBA(127, 0,   0,   51);
   5219 	inputColors[2]			= RGBA(0,   27,  0,   51);
   5220 	inputColors[3]			= RGBA(0,   0,   127, 51);
   5221 
   5222 	outputColors[0]			= RGBA(178, 180, 229, 255);
   5223 	outputColors[1]			= RGBA(178, 153, 102, 255);
   5224 	outputColors[2]			= RGBA(51,  180, 102, 255);
   5225 	outputColors[3]			= RGBA(51,  153, 229, 255);
   5226 
   5227 	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
   5228 
   5229 	return group.release();
   5230 }
   5231 
   5232 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
   5233 {
   5234 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
   5235 	RGBA							inputColors[4];
   5236 	RGBA							outputColors[4];
   5237 	map<string, string>				fragments;
   5238 
   5239 	const char						decorations[]		=
   5240 		"OpDecorate %array_group         ArrayStride 4\n"
   5241 		"OpDecorate %struct_member_group Offset 0\n"
   5242 		"%array_group         = OpDecorationGroup\n"
   5243 		"%struct_member_group = OpDecorationGroup\n"
   5244 
   5245 		"OpDecorate %group1 RelaxedPrecision\n"
   5246 		"OpDecorate %group3 RelaxedPrecision\n"
   5247 		"OpDecorate %group3 Invariant\n"
   5248 		"OpDecorate %group3 Restrict\n"
   5249 		"%group0 = OpDecorationGroup\n"
   5250 		"%group1 = OpDecorationGroup\n"
   5251 		"%group3 = OpDecorationGroup\n";
   5252 
   5253 	const char						typesAndConstants[]	=
   5254 		"%a3f32     = OpTypeArray %f32 %c_u32_3\n"
   5255 		"%struct1   = OpTypeStruct %a3f32\n"
   5256 		"%struct2   = OpTypeStruct %a3f32\n"
   5257 		"%fp_struct1 = OpTypePointer Function %struct1\n"
   5258 		"%fp_struct2 = OpTypePointer Function %struct2\n"
   5259 		"%c_f32_2    = OpConstant %f32 2.\n"
   5260 		"%c_f32_n2   = OpConstant %f32 -2.\n"
   5261 
   5262 		"%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
   5263 		"%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
   5264 		"%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
   5265 		"%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
   5266 
   5267 	const char						function[]			=
   5268 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5269 		"%param     = OpFunctionParameter %v4f32\n"
   5270 		"%entry     = OpLabel\n"
   5271 		"%result    = OpVariable %fp_v4f32 Function\n"
   5272 		"%v_struct1 = OpVariable %fp_struct1 Function\n"
   5273 		"%v_struct2 = OpVariable %fp_struct2 Function\n"
   5274 		"             OpStore %result %param\n"
   5275 		"             OpStore %v_struct1 %c_struct1\n"
   5276 		"             OpStore %v_struct2 %c_struct2\n"
   5277 		"%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
   5278 		"%val1      = OpLoad %f32 %ptr1\n"
   5279 		"%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
   5280 		"%val2      = OpLoad %f32 %ptr2\n"
   5281 		"%addvalues = OpFAdd %f32 %val1 %val2\n"
   5282 		"%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
   5283 		"%val       = OpLoad %f32 %ptr\n"
   5284 		"%addresult = OpFAdd %f32 %addvalues %val\n"
   5285 		"             OpStore %ptr %addresult\n"
   5286 		"%ret       = OpLoad %v4f32 %result\n"
   5287 		"             OpReturnValue %ret\n"
   5288 		"             OpFunctionEnd\n";
   5289 
   5290 	struct CaseNameDecoration
   5291 	{
   5292 		string name;
   5293 		string decoration;
   5294 	};
   5295 
   5296 	CaseNameDecoration tests[] =
   5297 	{
   5298 		{
   5299 			"same_decoration_group_on_multiple_types",
   5300 			"OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
   5301 		},
   5302 		{
   5303 			"empty_decoration_group",
   5304 			"OpGroupDecorate %group0      %a3f32\n"
   5305 			"OpGroupDecorate %group0      %result\n"
   5306 		},
   5307 		{
   5308 			"one_element_decoration_group",
   5309 			"OpGroupDecorate %array_group %a3f32\n"
   5310 		},
   5311 		{
   5312 			"multiple_elements_decoration_group",
   5313 			"OpGroupDecorate %group3      %v_struct1\n"
   5314 		},
   5315 		{
   5316 			"multiple_decoration_groups_on_same_variable",
   5317 			"OpGroupDecorate %group0      %v_struct2\n"
   5318 			"OpGroupDecorate %group1      %v_struct2\n"
   5319 			"OpGroupDecorate %group3      %v_struct2\n"
   5320 		},
   5321 		{
   5322 			"same_decoration_group_multiple_times",
   5323 			"OpGroupDecorate %group1      %addvalues\n"
   5324 			"OpGroupDecorate %group1      %addvalues\n"
   5325 			"OpGroupDecorate %group1      %addvalues\n"
   5326 		},
   5327 
   5328 	};
   5329 
   5330 	getHalfColorsFullAlpha(inputColors);
   5331 	getHalfColorsFullAlpha(outputColors);
   5332 
   5333 	for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
   5334 	{
   5335 		fragments["decoration"]	= decorations + tests[idx].decoration;
   5336 		fragments["pre_main"]	= typesAndConstants;
   5337 		fragments["testfun"]	= function;
   5338 
   5339 		createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
   5340 	}
   5341 
   5342 	return group.release();
   5343 }
   5344 
   5345 struct SpecConstantTwoIntGraphicsCase
   5346 {
   5347 	const char*		caseName;
   5348 	const char*		scDefinition0;
   5349 	const char*		scDefinition1;
   5350 	const char*		scResultType;
   5351 	const char*		scOperation;
   5352 	deInt32			scActualValue0;
   5353 	deInt32			scActualValue1;
   5354 	const char*		resultOperation;
   5355 	RGBA			expectedColors[4];
   5356 
   5357 					SpecConstantTwoIntGraphicsCase (const char* name,
   5358 											const char* definition0,
   5359 											const char* definition1,
   5360 											const char* resultType,
   5361 											const char* operation,
   5362 											deInt32		value0,
   5363 											deInt32		value1,
   5364 											const char* resultOp,
   5365 											const RGBA	(&output)[4])
   5366 						: caseName			(name)
   5367 						, scDefinition0		(definition0)
   5368 						, scDefinition1		(definition1)
   5369 						, scResultType		(resultType)
   5370 						, scOperation		(operation)
   5371 						, scActualValue0	(value0)
   5372 						, scActualValue1	(value1)
   5373 						, resultOperation	(resultOp)
   5374 	{
   5375 		expectedColors[0] = output[0];
   5376 		expectedColors[1] = output[1];
   5377 		expectedColors[2] = output[2];
   5378 		expectedColors[3] = output[3];
   5379 	}
   5380 };
   5381 
   5382 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
   5383 {
   5384 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
   5385 	vector<SpecConstantTwoIntGraphicsCase>	cases;
   5386 	RGBA							inputColors[4];
   5387 	RGBA							outputColors0[4];
   5388 	RGBA							outputColors1[4];
   5389 	RGBA							outputColors2[4];
   5390 
   5391 	const char	decorations1[]			=
   5392 		"OpDecorate %sc_0  SpecId 0\n"
   5393 		"OpDecorate %sc_1  SpecId 1\n";
   5394 
   5395 	const char	typesAndConstants1[]	=
   5396 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
   5397 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
   5398 		"%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
   5399 
   5400 	const char	function1[]				=
   5401 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5402 		"%param     = OpFunctionParameter %v4f32\n"
   5403 		"%label     = OpLabel\n"
   5404 		"%result    = OpVariable %fp_v4f32 Function\n"
   5405 		"             OpStore %result %param\n"
   5406 		"%gen       = ${GEN_RESULT}\n"
   5407 		"%index     = OpIAdd %i32 %gen %c_i32_1\n"
   5408 		"%loc       = OpAccessChain %fp_f32 %result %index\n"
   5409 		"%val       = OpLoad %f32 %loc\n"
   5410 		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
   5411 		"             OpStore %loc %add\n"
   5412 		"%ret       = OpLoad %v4f32 %result\n"
   5413 		"             OpReturnValue %ret\n"
   5414 		"             OpFunctionEnd\n";
   5415 
   5416 	inputColors[0] = RGBA(127, 127, 127, 255);
   5417 	inputColors[1] = RGBA(127, 0,   0,   255);
   5418 	inputColors[2] = RGBA(0,   127, 0,   255);
   5419 	inputColors[3] = RGBA(0,   0,   127, 255);
   5420 
   5421 	// Derived from inputColors[x] by adding 128 to inputColors[x][0].
   5422 	outputColors0[0] = RGBA(255, 127, 127, 255);
   5423 	outputColors0[1] = RGBA(255, 0,   0,   255);
   5424 	outputColors0[2] = RGBA(128, 127, 0,   255);
   5425 	outputColors0[3] = RGBA(128, 0,   127, 255);
   5426 
   5427 	// Derived from inputColors[x] by adding 128 to inputColors[x][1].
   5428 	outputColors1[0] = RGBA(127, 255, 127, 255);
   5429 	outputColors1[1] = RGBA(127, 128, 0,   255);
   5430 	outputColors1[2] = RGBA(0,   255, 0,   255);
   5431 	outputColors1[3] = RGBA(0,   128, 127, 255);
   5432 
   5433 	// Derived from inputColors[x] by adding 128 to inputColors[x][2].
   5434 	outputColors2[0] = RGBA(127, 127, 255, 255);
   5435 	outputColors2[1] = RGBA(127, 0,   128, 255);
   5436 	outputColors2[2] = RGBA(0,   127, 128, 255);
   5437 	outputColors2[3] = RGBA(0,   0,   255, 255);
   5438 
   5439 	const char addZeroToSc[]		= "OpIAdd %i32 %c_i32_0 %sc_op";
   5440 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
   5441 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
   5442 
   5443 	cases.push_back(SpecConstantTwoIntGraphicsCase("iadd",					" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",				19,		-20,	addZeroToSc,		outputColors0));
   5444 	cases.push_back(SpecConstantTwoIntGraphicsCase("isub",					" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",				19,		20,		addZeroToSc,		outputColors0));
   5445 	cases.push_back(SpecConstantTwoIntGraphicsCase("imul",					" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",				-1,		-1,		addZeroToSc,		outputColors2));
   5446 	cases.push_back(SpecConstantTwoIntGraphicsCase("sdiv",					" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",				-126,	126,	addZeroToSc,		outputColors0));
   5447 	cases.push_back(SpecConstantTwoIntGraphicsCase("udiv",					" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",				126,	126,	addZeroToSc,		outputColors2));
   5448 	cases.push_back(SpecConstantTwoIntGraphicsCase("srem",					" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",				3,		2,		addZeroToSc,		outputColors2));
   5449 	cases.push_back(SpecConstantTwoIntGraphicsCase("smod",					" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",				3,		2,		addZeroToSc,		outputColors2));
   5450 	cases.push_back(SpecConstantTwoIntGraphicsCase("umod",					" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",				1001,	500,	addZeroToSc,		outputColors2));
   5451 	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseand",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",				0x33,	0x0d,	addZeroToSc,		outputColors2));
   5452 	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseor",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",				0,		1,		addZeroToSc,		outputColors2));
   5453 	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwisexor",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",				0x2e,	0x2f,	addZeroToSc,		outputColors2));
   5454 	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,		1,		addZeroToSc,		outputColors2));
   5455 	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightarithmetic",	" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,		2,		addZeroToSc,		outputColors0));
   5456 	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftleftlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,		0,		addZeroToSc,		outputColors2));
   5457 	cases.push_back(SpecConstantTwoIntGraphicsCase("slessthan",				" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",				-20,	-10,	selectTrueUsingSc,	outputColors2));
   5458 	cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthan",				" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",				10,		20,		selectTrueUsingSc,	outputColors2));
   5459 	cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",				-1000,	50,		selectFalseUsingSc,	outputColors2));
   5460 	cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",				10,		5,		selectTrueUsingSc,	outputColors2));
   5461 	cases.push_back(SpecConstantTwoIntGraphicsCase("slessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",				-10,	-10,	selectTrueUsingSc,	outputColors2));
   5462 	cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",				50,		100,	selectTrueUsingSc,	outputColors2));
   5463 	cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",				-1000,	50,		selectFalseUsingSc,	outputColors2));
   5464 	cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",				10,		10,		selectTrueUsingSc,	outputColors2));
   5465 	cases.push_back(SpecConstantTwoIntGraphicsCase("iequal",				" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",				42,		24,		selectFalseUsingSc,	outputColors2));
   5466 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicaland",			"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",				0,		1,		selectFalseUsingSc,	outputColors2));
   5467 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalor",				"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",				1,		0,		selectTrueUsingSc,	outputColors2));
   5468 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalequal",			"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",				0,		1,		selectFalseUsingSc,	outputColors2));
   5469 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnotequal",		"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",				1,		0,		selectTrueUsingSc,	outputColors2));
   5470 	cases.push_back(SpecConstantTwoIntGraphicsCase("snegate",				" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",					-1,		0,		addZeroToSc,		outputColors2));
   5471 	cases.push_back(SpecConstantTwoIntGraphicsCase("not",					" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",					-2,		0,		addZeroToSc,		outputColors2));
   5472 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnot",			"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",					1,		0,		selectFalseUsingSc,	outputColors2));
   5473 	cases.push_back(SpecConstantTwoIntGraphicsCase("select",				"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %c_i32_0",	1,		1,		addZeroToSc,		outputColors2));
   5474 	// OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
   5475 	// \todo[2015-12-1 antiagainst] OpQuantizeToF16
   5476 
   5477 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
   5478 	{
   5479 		map<string, string>	specializations;
   5480 		map<string, string>	fragments;
   5481 		vector<deInt32>		specConstants;
   5482 
   5483 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
   5484 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
   5485 		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
   5486 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
   5487 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
   5488 
   5489 		fragments["decoration"]				= tcu::StringTemplate(decorations1).specialize(specializations);
   5490 		fragments["pre_main"]				= tcu::StringTemplate(typesAndConstants1).specialize(specializations);
   5491 		fragments["testfun"]				= tcu::StringTemplate(function1).specialize(specializations);
   5492 
   5493 		specConstants.push_back(cases[caseNdx].scActualValue0);
   5494 		specConstants.push_back(cases[caseNdx].scActualValue1);
   5495 
   5496 		createTestsForAllStages(cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants, group.get());
   5497 	}
   5498 
   5499 	const char	decorations2[]			=
   5500 		"OpDecorate %sc_0  SpecId 0\n"
   5501 		"OpDecorate %sc_1  SpecId 1\n"
   5502 		"OpDecorate %sc_2  SpecId 2\n";
   5503 
   5504 	const char	typesAndConstants2[]	=
   5505 		"%v3i32       = OpTypeVector %i32 3\n"
   5506 		"%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
   5507 		"%vec3_undef  = OpUndef %v3i32\n"
   5508 
   5509 		"%sc_0        = OpSpecConstant %i32 0\n"
   5510 		"%sc_1        = OpSpecConstant %i32 0\n"
   5511 		"%sc_2        = OpSpecConstant %i32 0\n"
   5512 		"%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0      0\n"							// (sc_0, 0,    0)
   5513 		"%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0      1\n"							// (0,    sc_1, 0)
   5514 		"%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0      2\n"							// (0,    0,    sc_2)
   5515 		"%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"	// (sc_0, ???,  0)
   5516 		"%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"	// (???,  sc_1, 0)
   5517 		"%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"	// (sc_2, ???,  sc_2)
   5518 		"%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"						// (0,    sc_0, sc_1)
   5519 		"%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"						// (sc_2, sc_0, sc_1)
   5520 		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"							// sc_2
   5521 		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"							// sc_0
   5522 		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"							// sc_1
   5523 		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"								// (sc_2 - sc_0)
   5524 		"%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";								// (sc_2 - sc_0) * sc_1
   5525 
   5526 	const char	function2[]				=
   5527 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5528 		"%param     = OpFunctionParameter %v4f32\n"
   5529 		"%label     = OpLabel\n"
   5530 		"%result    = OpVariable %fp_v4f32 Function\n"
   5531 		"             OpStore %result %param\n"
   5532 		"%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
   5533 		"%val       = OpLoad %f32 %loc\n"
   5534 		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
   5535 		"             OpStore %loc %add\n"
   5536 		"%ret       = OpLoad %v4f32 %result\n"
   5537 		"             OpReturnValue %ret\n"
   5538 		"             OpFunctionEnd\n";
   5539 
   5540 	map<string, string>	fragments;
   5541 	vector<deInt32>		specConstants;
   5542 
   5543 	fragments["decoration"]	= decorations2;
   5544 	fragments["pre_main"]	= typesAndConstants2;
   5545 	fragments["testfun"]	= function2;
   5546 
   5547 	specConstants.push_back(56789);
   5548 	specConstants.push_back(-2);
   5549 	specConstants.push_back(56788);
   5550 
   5551 	createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
   5552 
   5553 	return group.release();
   5554 }
   5555 
   5556 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
   5557 {
   5558 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
   5559 	RGBA							inputColors[4];
   5560 	RGBA							outputColors1[4];
   5561 	RGBA							outputColors2[4];
   5562 	RGBA							outputColors3[4];
   5563 	map<string, string>				fragments1;
   5564 	map<string, string>				fragments2;
   5565 	map<string, string>				fragments3;
   5566 
   5567 	const char	typesAndConstants1[]	=
   5568 		"%c_f32_p2  = OpConstant %f32 0.2\n"
   5569 		"%c_f32_p4  = OpConstant %f32 0.4\n"
   5570 		"%c_f32_p5  = OpConstant %f32 0.5\n"
   5571 		"%c_f32_p8  = OpConstant %f32 0.8\n";
   5572 
   5573 	// vec4 test_code(vec4 param) {
   5574 	//   vec4 result = param;
   5575 	//   for (int i = 0; i < 4; ++i) {
   5576 	//     float operand;
   5577 	//     switch (i) {
   5578 	//       case 0: operand = .2; break;
   5579 	//       case 1: operand = .5; break;
   5580 	//       case 2: operand = .4; break;
   5581 	//       case 3: operand = .0; break;
   5582 	//       default: break; // unreachable
   5583 	//     }
   5584 	//     result[i] += operand;
   5585 	//   }
   5586 	//   return result;
   5587 	// }
   5588 	const char	function1[]				=
   5589 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5590 		"%param1    = OpFunctionParameter %v4f32\n"
   5591 		"%lbl       = OpLabel\n"
   5592 		"%iptr      = OpVariable %fp_i32 Function\n"
   5593 		"%result    = OpVariable %fp_v4f32 Function\n"
   5594 		"             OpStore %iptr %c_i32_0\n"
   5595 		"             OpStore %result %param1\n"
   5596 		"             OpBranch %loop\n"
   5597 
   5598 		"%loop      = OpLabel\n"
   5599 		"%ival      = OpLoad %i32 %iptr\n"
   5600 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
   5601 		"             OpLoopMerge %exit %phi None\n"
   5602 		"             OpBranchConditional %lt_4 %entry %exit\n"
   5603 
   5604 		"%entry     = OpLabel\n"
   5605 		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
   5606 		"%val       = OpLoad %f32 %loc\n"
   5607 		"             OpSelectionMerge %phi None\n"
   5608 		"             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
   5609 
   5610 		"%case0     = OpLabel\n"
   5611 		"             OpBranch %phi\n"
   5612 		"%case1     = OpLabel\n"
   5613 		"             OpBranch %phi\n"
   5614 		"%case2     = OpLabel\n"
   5615 		"             OpBranch %phi\n"
   5616 		"%case3     = OpLabel\n"
   5617 		"             OpBranch %phi\n"
   5618 
   5619 		"%default   = OpLabel\n"
   5620 		"             OpUnreachable\n"
   5621 
   5622 		"%phi       = OpLabel\n"
   5623 		"%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
   5624 		"%add       = OpFAdd %f32 %val %operand\n"
   5625 		"             OpStore %loc %add\n"
   5626 		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
   5627 		"             OpStore %iptr %ival_next\n"
   5628 		"             OpBranch %loop\n"
   5629 
   5630 		"%exit      = OpLabel\n"
   5631 		"%ret       = OpLoad %v4f32 %result\n"
   5632 		"             OpReturnValue %ret\n"
   5633 
   5634 		"             OpFunctionEnd\n";
   5635 
   5636 	fragments1["pre_main"]	= typesAndConstants1;
   5637 	fragments1["testfun"]	= function1;
   5638 
   5639 	getHalfColorsFullAlpha(inputColors);
   5640 
   5641 	outputColors1[0]		= RGBA(178, 255, 229, 255);
   5642 	outputColors1[1]		= RGBA(178, 127, 102, 255);
   5643 	outputColors1[2]		= RGBA(51,  255, 102, 255);
   5644 	outputColors1[3]		= RGBA(51,  127, 229, 255);
   5645 
   5646 	createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
   5647 
   5648 	const char	typesAndConstants2[]	=
   5649 		"%c_f32_p2  = OpConstant %f32 0.2\n";
   5650 
   5651 	// Add .4 to the second element of the given parameter.
   5652 	const char	function2[]				=
   5653 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5654 		"%param     = OpFunctionParameter %v4f32\n"
   5655 		"%entry     = OpLabel\n"
   5656 		"%result    = OpVariable %fp_v4f32 Function\n"
   5657 		"             OpStore %result %param\n"
   5658 		"%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
   5659 		"%val       = OpLoad %f32 %loc\n"
   5660 		"             OpBranch %phi\n"
   5661 
   5662 		"%phi        = OpLabel\n"
   5663 		"%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
   5664 		"%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
   5665 		"%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
   5666 		"%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
   5667 		"%still_loop = OpSLessThan %bool %step %c_i32_2\n"
   5668 		"              OpLoopMerge %exit %phi None\n"
   5669 		"              OpBranchConditional %still_loop %phi %exit\n"
   5670 
   5671 		"%exit       = OpLabel\n"
   5672 		"              OpStore %loc %accum\n"
   5673 		"%ret        = OpLoad %v4f32 %result\n"
   5674 		"              OpReturnValue %ret\n"
   5675 
   5676 		"              OpFunctionEnd\n";
   5677 
   5678 	fragments2["pre_main"]	= typesAndConstants2;
   5679 	fragments2["testfun"]	= function2;
   5680 
   5681 	outputColors2[0]			= RGBA(127, 229, 127, 255);
   5682 	outputColors2[1]			= RGBA(127, 102, 0,   255);
   5683 	outputColors2[2]			= RGBA(0,   229, 0,   255);
   5684 	outputColors2[3]			= RGBA(0,   102, 127, 255);
   5685 
   5686 	createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
   5687 
   5688 	const char	typesAndConstants3[]	=
   5689 		"%true      = OpConstantTrue %bool\n"
   5690 		"%false     = OpConstantFalse %bool\n"
   5691 		"%c_f32_p2  = OpConstant %f32 0.2\n";
   5692 
   5693 	// Swap the second and the third element of the given parameter.
   5694 	const char	function3[]				=
   5695 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5696 		"%param     = OpFunctionParameter %v4f32\n"
   5697 		"%entry     = OpLabel\n"
   5698 		"%result    = OpVariable %fp_v4f32 Function\n"
   5699 		"             OpStore %result %param\n"
   5700 		"%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
   5701 		"%a_init    = OpLoad %f32 %a_loc\n"
   5702 		"%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
   5703 		"%b_init    = OpLoad %f32 %b_loc\n"
   5704 		"             OpBranch %phi\n"
   5705 
   5706 		"%phi        = OpLabel\n"
   5707 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
   5708 		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
   5709 		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
   5710 		"              OpLoopMerge %exit %phi None\n"
   5711 		"              OpBranchConditional %still_loop %phi %exit\n"
   5712 
   5713 		"%exit       = OpLabel\n"
   5714 		"              OpStore %a_loc %a_next\n"
   5715 		"              OpStore %b_loc %b_next\n"
   5716 		"%ret        = OpLoad %v4f32 %result\n"
   5717 		"              OpReturnValue %ret\n"
   5718 
   5719 		"              OpFunctionEnd\n";
   5720 
   5721 	fragments3["pre_main"]	= typesAndConstants3;
   5722 	fragments3["testfun"]	= function3;
   5723 
   5724 	outputColors3[0]			= RGBA(127, 127, 127, 255);
   5725 	outputColors3[1]			= RGBA(127, 0,   0,   255);
   5726 	outputColors3[2]			= RGBA(0,   0,   127, 255);
   5727 	outputColors3[3]			= RGBA(0,   127, 0,   255);
   5728 
   5729 	createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
   5730 
   5731 	return group.release();
   5732 }
   5733 
   5734 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
   5735 {
   5736 	de::MovePtr<tcu::TestCaseGroup> group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
   5737 	RGBA							inputColors[4];
   5738 	RGBA							outputColors[4];
   5739 
   5740 	// With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
   5741 	// For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
   5742 	// only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
   5743 	// On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
   5744 	const char						constantsAndTypes[]	 =
   5745 		"%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
   5746 		"%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
   5747 		"%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
   5748 		"%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
   5749 		"%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n"
   5750 		;
   5751 
   5752 	const char						function[]	 =
   5753 		"%test_code      = OpFunction %v4f32 None %v4f32_function\n"
   5754 		"%param          = OpFunctionParameter %v4f32\n"
   5755 		"%label          = OpLabel\n"
   5756 		"%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
   5757 		"%var2           = OpVariable %fp_f32 Function\n"
   5758 		"%red            = OpCompositeExtract %f32 %param 0\n"
   5759 		"%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
   5760 		"                  OpStore %var2 %plus_red\n"
   5761 		"%val1           = OpLoad %f32 %var1\n"
   5762 		"%val2           = OpLoad %f32 %var2\n"
   5763 		"%mul            = OpFMul %f32 %val1 %val2\n"
   5764 		"%add            = OpFAdd %f32 %mul %c_f32_n1\n"
   5765 		"%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
   5766 		"%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
   5767 		"%success        = OpLogicalOr %bool %is0 %isn1n24\n"
   5768 		"%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
   5769 		"%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
   5770 		"                  OpReturnValue %ret\n"
   5771 		"                  OpFunctionEnd\n";
   5772 
   5773 	struct CaseNameDecoration
   5774 	{
   5775 		string name;
   5776 		string decoration;
   5777 	};
   5778 
   5779 
   5780 	CaseNameDecoration tests[] = {
   5781 		{"multiplication",	"OpDecorate %mul NoContraction"},
   5782 		{"addition",		"OpDecorate %add NoContraction"},
   5783 		{"both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
   5784 	};
   5785 
   5786 	getHalfColorsFullAlpha(inputColors);
   5787 
   5788 	for (deUint8 idx = 0; idx < 4; ++idx)
   5789 	{
   5790 		inputColors[idx].setRed(0);
   5791 		outputColors[idx] = RGBA(0, 0, 0, 255);
   5792 	}
   5793 
   5794 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
   5795 	{
   5796 		map<string, string> fragments;
   5797 
   5798 		fragments["decoration"] = tests[testNdx].decoration;
   5799 		fragments["pre_main"] = constantsAndTypes;
   5800 		fragments["testfun"] = function;
   5801 
   5802 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
   5803 	}
   5804 
   5805 	return group.release();
   5806 }
   5807 
   5808 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
   5809 {
   5810 	de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
   5811 	RGBA							colors[4];
   5812 
   5813 	const char						constantsAndTypes[]	 =
   5814 		"%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
   5815 		"%fp_a2f32          = OpTypePointer Function %a2f32\n"
   5816 		"%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
   5817 		"%fp_stype          = OpTypePointer Function %stype\n";
   5818 
   5819 	const char						function[]	 =
   5820 		"%test_code         = OpFunction %v4f32 None %v4f32_function\n"
   5821 		"%param1            = OpFunctionParameter %v4f32\n"
   5822 		"%lbl               = OpLabel\n"
   5823 		"%v1                = OpVariable %fp_v4f32 Function\n"
   5824 		"%v2                = OpVariable %fp_a2f32 Function\n"
   5825 		"%v3                = OpVariable %fp_f32 Function\n"
   5826 		"%v                 = OpVariable %fp_stype Function\n"
   5827 		"%vv                = OpVariable %fp_stype Function\n"
   5828 		"%vvv               = OpVariable %fp_f32 Function\n"
   5829 
   5830 		"                     OpStore %v1 %c_v4f32_1_1_1_1\n"
   5831 		"                     OpStore %v2 %c_a2f32_1\n"
   5832 		"                     OpStore %v3 %c_f32_1\n"
   5833 
   5834 		"%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
   5835 		"%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
   5836 		"%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
   5837 		"%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
   5838 		"%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
   5839 		"%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
   5840 
   5841 		"                    OpStore %p_v4f32 %v1_v ${access_type}\n"
   5842 		"                    OpStore %p_a2f32 %v2_v ${access_type}\n"
   5843 		"                    OpStore %p_f32 %v3_v ${access_type}\n"
   5844 
   5845 		"                    OpCopyMemory %vv %v ${access_type}\n"
   5846 		"                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
   5847 
   5848 		"%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
   5849 		"%v_f32_2          = OpLoad %f32 %p_f32_2\n"
   5850 		"%v_f32_3          = OpLoad %f32 %vvv\n"
   5851 
   5852 		"%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
   5853 		"%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
   5854 		"                    OpReturnValue %ret2\n"
   5855 		"                    OpFunctionEnd\n";
   5856 
   5857 	struct NameMemoryAccess
   5858 	{
   5859 		string name;
   5860 		string accessType;
   5861 	};
   5862 
   5863 
   5864 	NameMemoryAccess tests[] =
   5865 	{
   5866 		{ "none", "" },
   5867 		{ "volatile", "Volatile" },
   5868 		{ "aligned",  "Aligned 1" },
   5869 		{ "volatile_aligned",  "Volatile|Aligned 1" },
   5870 		{ "nontemporal_aligned",  "Nontemporal|Aligned 1" },
   5871 		{ "volatile_nontemporal",  "Volatile|Nontemporal" },
   5872 		{ "volatile_nontermporal_aligned",  "Volatile|Nontemporal|Aligned 1" },
   5873 	};
   5874 
   5875 	getHalfColorsFullAlpha(colors);
   5876 
   5877 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
   5878 	{
   5879 		map<string, string> fragments;
   5880 		map<string, string> memoryAccess;
   5881 		memoryAccess["access_type"] = tests[testNdx].accessType;
   5882 
   5883 		fragments["pre_main"] = constantsAndTypes;
   5884 		fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
   5885 		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
   5886 	}
   5887 	return memoryAccessTests.release();
   5888 }
   5889 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
   5890 {
   5891 	de::MovePtr<tcu::TestCaseGroup>		opUndefTests		 (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
   5892 	RGBA								defaultColors[4];
   5893 	map<string, string>					fragments;
   5894 	getDefaultColors(defaultColors);
   5895 
   5896 	// First, simple cases that don't do anything with the OpUndef result.
   5897 	struct NameCodePair { string name, decl, type; };
   5898 	const NameCodePair tests[] =
   5899 	{
   5900 		{"bool", "", "%bool"},
   5901 		{"vec2uint32", "%type = OpTypeVector %u32 2", "%type"},
   5902 		{"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
   5903 		{"sampler", "%type = OpTypeSampler", "%type"},
   5904 		{"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
   5905 		{"pointer", "", "%fp_i32"},
   5906 		{"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
   5907 		{"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
   5908 		{"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
   5909 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
   5910 	{
   5911 		fragments["undef_type"] = tests[testNdx].type;
   5912 		fragments["testfun"] = StringTemplate(
   5913 			"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5914 			"%param1 = OpFunctionParameter %v4f32\n"
   5915 			"%label_testfun = OpLabel\n"
   5916 			"%undef = OpUndef ${undef_type}\n"
   5917 			"OpReturnValue %param1\n"
   5918 			"OpFunctionEnd\n").specialize(fragments);
   5919 		fragments["pre_main"] = tests[testNdx].decl;
   5920 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
   5921 	}
   5922 	fragments.clear();
   5923 
   5924 	fragments["testfun"] =
   5925 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5926 		"%param1 = OpFunctionParameter %v4f32\n"
   5927 		"%label_testfun = OpLabel\n"
   5928 		"%undef = OpUndef %f32\n"
   5929 		"%zero = OpFMul %f32 %undef %c_f32_0\n"
   5930 		"%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
   5931 		"%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
   5932 		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   5933 		"%b = OpFAdd %f32 %a %actually_zero\n"
   5934 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
   5935 		"OpReturnValue %ret\n"
   5936 		"OpFunctionEnd\n"
   5937 		;
   5938 	createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
   5939 
   5940 	fragments["testfun"] =
   5941 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5942 		"%param1 = OpFunctionParameter %v4f32\n"
   5943 		"%label_testfun = OpLabel\n"
   5944 		"%undef = OpUndef %i32\n"
   5945 		"%zero = OpIMul %i32 %undef %c_i32_0\n"
   5946 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
   5947 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
   5948 		"OpReturnValue %ret\n"
   5949 		"OpFunctionEnd\n"
   5950 		;
   5951 	createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
   5952 
   5953 	fragments["testfun"] =
   5954 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5955 		"%param1 = OpFunctionParameter %v4f32\n"
   5956 		"%label_testfun = OpLabel\n"
   5957 		"%undef = OpUndef %u32\n"
   5958 		"%zero = OpIMul %u32 %undef %c_i32_0\n"
   5959 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
   5960 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
   5961 		"OpReturnValue %ret\n"
   5962 		"OpFunctionEnd\n"
   5963 		;
   5964 	createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
   5965 
   5966 	fragments["testfun"] =
   5967 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   5968 		"%param1 = OpFunctionParameter %v4f32\n"
   5969 		"%label_testfun = OpLabel\n"
   5970 		"%undef = OpUndef %v4f32\n"
   5971 		"%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
   5972 		"%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
   5973 		"%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
   5974 		"%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
   5975 		"%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
   5976 		"%is_nan_0 = OpIsNan %bool %zero_0\n"
   5977 		"%is_nan_1 = OpIsNan %bool %zero_1\n"
   5978 		"%is_nan_2 = OpIsNan %bool %zero_2\n"
   5979 		"%is_nan_3 = OpIsNan %bool %zero_3\n"
   5980 		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
   5981 		"%actually_zero_1 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_1\n"
   5982 		"%actually_zero_2 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_2\n"
   5983 		"%actually_zero_3 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_3\n"
   5984 		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   5985 		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
   5986 		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
   5987 		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
   5988 		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
   5989 		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
   5990 		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
   5991 		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
   5992 		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
   5993 		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
   5994 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
   5995 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
   5996 		"OpReturnValue %ret\n"
   5997 		"OpFunctionEnd\n"
   5998 		;
   5999 	createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
   6000 
   6001 	fragments["pre_main"] =
   6002 		"%m2x2f32 = OpTypeMatrix %v2f32 2\n";
   6003 	fragments["testfun"] =
   6004 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6005 		"%param1 = OpFunctionParameter %v4f32\n"
   6006 		"%label_testfun = OpLabel\n"
   6007 		"%undef = OpUndef %m2x2f32\n"
   6008 		"%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
   6009 		"%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
   6010 		"%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
   6011 		"%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
   6012 		"%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
   6013 		"%is_nan_0 = OpIsNan %bool %zero_0\n"
   6014 		"%is_nan_1 = OpIsNan %bool %zero_1\n"
   6015 		"%is_nan_2 = OpIsNan %bool %zero_2\n"
   6016 		"%is_nan_3 = OpIsNan %bool %zero_3\n"
   6017 		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
   6018 		"%actually_zero_1 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_1\n"
   6019 		"%actually_zero_2 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_2\n"
   6020 		"%actually_zero_3 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_3\n"
   6021 		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6022 		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
   6023 		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
   6024 		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
   6025 		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
   6026 		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
   6027 		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
   6028 		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
   6029 		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
   6030 		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
   6031 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
   6032 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
   6033 		"OpReturnValue %ret\n"
   6034 		"OpFunctionEnd\n"
   6035 		;
   6036 	createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
   6037 
   6038 	return opUndefTests.release();
   6039 }
   6040 
   6041 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
   6042 {
   6043 	const RGBA		inputColors[4]		=
   6044 	{
   6045 		RGBA(0,		0,		0,		255),
   6046 		RGBA(0,		0,		255,	255),
   6047 		RGBA(0,		255,	0,		255),
   6048 		RGBA(0,		255,	255,	255)
   6049 	};
   6050 
   6051 	const RGBA		expectedColors[4]	=
   6052 	{
   6053 		RGBA(255,	 0,		 0,		 255),
   6054 		RGBA(255,	 0,		 0,		 255),
   6055 		RGBA(255,	 0,		 0,		 255),
   6056 		RGBA(255,	 0,		 0,		 255)
   6057 	};
   6058 
   6059 	const struct SingleFP16Possibility
   6060 	{
   6061 		const char* name;
   6062 		const char* constant;  // Value to assign to %test_constant.
   6063 		float		valueAsFloat;
   6064 		const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
   6065 	}				tests[]				=
   6066 	{
   6067 		{
   6068 			"negative",
   6069 			"-0x1.3p1\n",
   6070 			-constructNormalizedFloat(1, 0x300000),
   6071 			"%cond = OpFOrdEqual %bool %c %test_constant\n"
   6072 		}, // -19
   6073 		{
   6074 			"positive",
   6075 			"0x1.0p7\n",
   6076 			constructNormalizedFloat(7, 0x000000),
   6077 			"%cond = OpFOrdEqual %bool %c %test_constant\n"
   6078 		},  // +128
   6079 		// SPIR-V requires that OpQuantizeToF16 flushes
   6080 		// any numbers that would end up denormalized in F16 to zero.
   6081 		{
   6082 			"denorm",
   6083 			"0x0.0006p-126\n",
   6084 			std::ldexp(1.5f, -140),
   6085 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
   6086 		},  // denorm
   6087 		{
   6088 			"negative_denorm",
   6089 			"-0x0.0006p-126\n",
   6090 			-std::ldexp(1.5f, -140),
   6091 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
   6092 		}, // -denorm
   6093 		{
   6094 			"too_small",
   6095 			"0x1.0p-16\n",
   6096 			std::ldexp(1.0f, -16),
   6097 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
   6098 		},     // too small positive
   6099 		{
   6100 			"negative_too_small",
   6101 			"-0x1.0p-32\n",
   6102 			-std::ldexp(1.0f, -32),
   6103 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
   6104 		},      // too small negative
   6105 		{
   6106 			"negative_inf",
   6107 			"-0x1.0p128\n",
   6108 			-std::ldexp(1.0f, 128),
   6109 
   6110 			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
   6111 			"%inf = OpIsInf %bool %c\n"
   6112 			"%cond = OpLogicalAnd %bool %gz %inf\n"
   6113 		},     // -inf to -inf
   6114 		{
   6115 			"inf",
   6116 			"0x1.0p128\n",
   6117 			std::ldexp(1.0f, 128),
   6118 
   6119 			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
   6120 			"%inf = OpIsInf %bool %c\n"
   6121 			"%cond = OpLogicalAnd %bool %gz %inf\n"
   6122 		},     // +inf to +inf
   6123 		{
   6124 			"round_to_negative_inf",
   6125 			"-0x1.0p32\n",
   6126 			-std::ldexp(1.0f, 32),
   6127 
   6128 			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
   6129 			"%inf = OpIsInf %bool %c\n"
   6130 			"%cond = OpLogicalAnd %bool %gz %inf\n"
   6131 		},     // round to -inf
   6132 		{
   6133 			"round_to_inf",
   6134 			"0x1.0p16\n",
   6135 			std::ldexp(1.0f, 16),
   6136 
   6137 			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
   6138 			"%inf = OpIsInf %bool %c\n"
   6139 			"%cond = OpLogicalAnd %bool %gz %inf\n"
   6140 		},     // round to +inf
   6141 		{
   6142 			"nan",
   6143 			"0x1.1p128\n",
   6144 			std::numeric_limits<float>::quiet_NaN(),
   6145 
   6146 			// Test for any NaN value, as NaNs are not preserved
   6147 			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
   6148 			"%cond = OpIsNan %bool %direct_quant\n"
   6149 		}, // nan
   6150 		{
   6151 			"negative_nan",
   6152 			"-0x1.0001p128\n",
   6153 			std::numeric_limits<float>::quiet_NaN(),
   6154 
   6155 			// Test for any NaN value, as NaNs are not preserved
   6156 			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
   6157 			"%cond = OpIsNan %bool %direct_quant\n"
   6158 		} // -nan
   6159 	};
   6160 	const char*		constants			=
   6161 		"%test_constant = OpConstant %f32 ";  // The value will be test.constant.
   6162 
   6163 	StringTemplate	function			(
   6164 		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
   6165 		"%param1        = OpFunctionParameter %v4f32\n"
   6166 		"%label_testfun = OpLabel\n"
   6167 		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6168 		"%b             = OpFAdd %f32 %test_constant %a\n"
   6169 		"%c             = OpQuantizeToF16 %f32 %b\n"
   6170 		"${condition}\n"
   6171 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
   6172 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
   6173 		"                 OpReturnValue %retval\n"
   6174 		"OpFunctionEnd\n"
   6175 	);
   6176 
   6177 	const char*		specDecorations		= "OpDecorate %test_constant SpecId 0\n";
   6178 	const char*		specConstants		=
   6179 			"%test_constant = OpSpecConstant %f32 0.\n"
   6180 			"%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
   6181 
   6182 	StringTemplate	specConstantFunction(
   6183 		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
   6184 		"%param1        = OpFunctionParameter %v4f32\n"
   6185 		"%label_testfun = OpLabel\n"
   6186 		"${condition}\n"
   6187 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
   6188 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
   6189 		"                 OpReturnValue %retval\n"
   6190 		"OpFunctionEnd\n"
   6191 	);
   6192 
   6193 	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
   6194 	{
   6195 		map<string, string>								codeSpecialization;
   6196 		map<string, string>								fragments;
   6197 		codeSpecialization["condition"]					= tests[idx].condition;
   6198 		fragments["testfun"]							= function.specialize(codeSpecialization);
   6199 		fragments["pre_main"]							= string(constants) + tests[idx].constant + "\n";
   6200 		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
   6201 	}
   6202 
   6203 	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
   6204 	{
   6205 		map<string, string>								codeSpecialization;
   6206 		map<string, string>								fragments;
   6207 		vector<deInt32>									passConstants;
   6208 		deInt32											specConstant;
   6209 
   6210 		codeSpecialization["condition"]					= tests[idx].condition;
   6211 		fragments["testfun"]							= specConstantFunction.specialize(codeSpecialization);
   6212 		fragments["decoration"]							= specDecorations;
   6213 		fragments["pre_main"]							= specConstants;
   6214 
   6215 		memcpy(&specConstant, &tests[idx].valueAsFloat, sizeof(float));
   6216 		passConstants.push_back(specConstant);
   6217 
   6218 		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
   6219 	}
   6220 }
   6221 
   6222 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
   6223 {
   6224 	RGBA inputColors[4] =  {
   6225 		RGBA(0,		0,		0,		255),
   6226 		RGBA(0,		0,		255,	255),
   6227 		RGBA(0,		255,	0,		255),
   6228 		RGBA(0,		255,	255,	255)
   6229 	};
   6230 
   6231 	RGBA expectedColors[4] =
   6232 	{
   6233 		RGBA(255,	 0,		 0,		 255),
   6234 		RGBA(255,	 0,		 0,		 255),
   6235 		RGBA(255,	 0,		 0,		 255),
   6236 		RGBA(255,	 0,		 0,		 255)
   6237 	};
   6238 
   6239 	struct DualFP16Possibility
   6240 	{
   6241 		const char* name;
   6242 		const char* input;
   6243 		float		inputAsFloat;
   6244 		const char* possibleOutput1;
   6245 		const char* possibleOutput2;
   6246 	} tests[] = {
   6247 		{
   6248 			"positive_round_up_or_round_down",
   6249 			"0x1.3003p8",
   6250 			constructNormalizedFloat(8, 0x300300),
   6251 			"0x1.304p8",
   6252 			"0x1.3p8"
   6253 		},
   6254 		{
   6255 			"negative_round_up_or_round_down",
   6256 			"-0x1.6008p-7",
   6257 			-constructNormalizedFloat(-7, 0x600800),
   6258 			"-0x1.6p-7",
   6259 			"-0x1.604p-7"
   6260 		},
   6261 		{
   6262 			"carry_bit",
   6263 			"0x1.01ep2",
   6264 			constructNormalizedFloat(2, 0x01e000),
   6265 			"0x1.01cp2",
   6266 			"0x1.02p2"
   6267 		},
   6268 		{
   6269 			"carry_to_exponent",
   6270 			"0x1.ffep1",
   6271 			constructNormalizedFloat(1, 0xffe000),
   6272 			"0x1.ffcp1",
   6273 			"0x1.0p2"
   6274 		},
   6275 	};
   6276 	StringTemplate constants (
   6277 		"%input_const = OpConstant %f32 ${input}\n"
   6278 		"%possible_solution1 = OpConstant %f32 ${output1}\n"
   6279 		"%possible_solution2 = OpConstant %f32 ${output2}\n"
   6280 		);
   6281 
   6282 	StringTemplate specConstants (
   6283 		"%input_const = OpSpecConstant %f32 0.\n"
   6284 		"%possible_solution1 = OpConstant %f32 ${output1}\n"
   6285 		"%possible_solution2 = OpConstant %f32 ${output2}\n"
   6286 	);
   6287 
   6288 	const char* specDecorations = "OpDecorate %input_const  SpecId 0\n";
   6289 
   6290 	const char* function  =
   6291 		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
   6292 		"%param1        = OpFunctionParameter %v4f32\n"
   6293 		"%label_testfun = OpLabel\n"
   6294 		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6295 		// For the purposes of this test we assume that 0.f will always get
   6296 		// faithfully passed through the pipeline stages.
   6297 		"%b             = OpFAdd %f32 %input_const %a\n"
   6298 		"%c             = OpQuantizeToF16 %f32 %b\n"
   6299 		"%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
   6300 		"%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
   6301 		"%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
   6302 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
   6303 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
   6304 		"                 OpReturnValue %retval\n"
   6305 		"OpFunctionEnd\n";
   6306 
   6307 	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
   6308 		map<string, string>									fragments;
   6309 		map<string, string>									constantSpecialization;
   6310 
   6311 		constantSpecialization["input"]						= tests[idx].input;
   6312 		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
   6313 		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
   6314 		fragments["testfun"]								= function;
   6315 		fragments["pre_main"]								= constants.specialize(constantSpecialization);
   6316 		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
   6317 	}
   6318 
   6319 	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
   6320 		map<string, string>									fragments;
   6321 		map<string, string>									constantSpecialization;
   6322 		vector<deInt32>										passConstants;
   6323 		deInt32												specConstant;
   6324 
   6325 		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
   6326 		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
   6327 		fragments["testfun"]								= function;
   6328 		fragments["decoration"]								= specDecorations;
   6329 		fragments["pre_main"]								= specConstants.specialize(constantSpecialization);
   6330 
   6331 		memcpy(&specConstant, &tests[idx].inputAsFloat, sizeof(float));
   6332 		passConstants.push_back(specConstant);
   6333 
   6334 		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
   6335 	}
   6336 }
   6337 
   6338 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
   6339 {
   6340 	de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
   6341 	createOpQuantizeSingleOptionTests(opQuantizeTests.get());
   6342 	createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
   6343 	return opQuantizeTests.release();
   6344 }
   6345 
   6346 struct ShaderPermutation
   6347 {
   6348 	deUint8 vertexPermutation;
   6349 	deUint8 geometryPermutation;
   6350 	deUint8 tesscPermutation;
   6351 	deUint8 tessePermutation;
   6352 	deUint8 fragmentPermutation;
   6353 };
   6354 
   6355 ShaderPermutation getShaderPermutation(deUint8 inputValue)
   6356 {
   6357 	ShaderPermutation	permutation =
   6358 	{
   6359 		static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
   6360 		static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
   6361 		static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
   6362 		static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
   6363 		static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
   6364 	};
   6365 	return permutation;
   6366 }
   6367 
   6368 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
   6369 {
   6370 	RGBA								defaultColors[4];
   6371 	RGBA								invertedColors[4];
   6372 	de::MovePtr<tcu::TestCaseGroup>		moduleTests			(new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
   6373 
   6374 	const ShaderElement					combinedPipeline[]	=
   6375 	{
   6376 		ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
   6377 		ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
   6378 		ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
   6379 		ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
   6380 		ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
   6381 	};
   6382 
   6383 	getDefaultColors(defaultColors);
   6384 	getInvertedDefaultColors(invertedColors);
   6385 	addFunctionCaseWithPrograms<InstanceContext>(
   6386 			moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline,
   6387 			createInstanceContext(combinedPipeline, map<string, string>()));
   6388 
   6389 	const char* numbers[] =
   6390 	{
   6391 		"1", "2"
   6392 	};
   6393 
   6394 	for (deInt8 idx = 0; idx < 32; ++idx)
   6395 	{
   6396 		ShaderPermutation			permutation		= getShaderPermutation(idx);
   6397 		string						name			= string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
   6398 		const ShaderElement			pipeline[]		=
   6399 		{
   6400 			ShaderElement("vert",	string("vert") +	numbers[permutation.vertexPermutation],		VK_SHADER_STAGE_VERTEX_BIT),
   6401 			ShaderElement("geom",	string("geom") +	numbers[permutation.geometryPermutation],	VK_SHADER_STAGE_GEOMETRY_BIT),
   6402 			ShaderElement("tessc",	string("tessc") +	numbers[permutation.tesscPermutation],		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
   6403 			ShaderElement("tesse",	string("tesse") +	numbers[permutation.tessePermutation],		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
   6404 			ShaderElement("frag",	string("frag") +	numbers[permutation.fragmentPermutation],	VK_SHADER_STAGE_FRAGMENT_BIT)
   6405 		};
   6406 
   6407 		// If there are an even number of swaps, then it should be no-op.
   6408 		// If there are an odd number, the color should be flipped.
   6409 		if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
   6410 		{
   6411 			addFunctionCaseWithPrograms<InstanceContext>(
   6412 					moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
   6413 					createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
   6414 		}
   6415 		else
   6416 		{
   6417 			addFunctionCaseWithPrograms<InstanceContext>(
   6418 					moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
   6419 					createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
   6420 		}
   6421 	}
   6422 	return moduleTests.release();
   6423 }
   6424 
   6425 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
   6426 {
   6427 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
   6428 	RGBA defaultColors[4];
   6429 	getDefaultColors(defaultColors);
   6430 	map<string, string> fragments;
   6431 	fragments["pre_main"] =
   6432 		"%c_f32_5 = OpConstant %f32 5.\n";
   6433 
   6434 	// A loop with a single block. The Continue Target is the loop block
   6435 	// itself. In SPIR-V terms, the "loop construct" contains no blocks at all
   6436 	// -- the "continue construct" forms the entire loop.
   6437 	fragments["testfun"] =
   6438 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6439 		"%param1 = OpFunctionParameter %v4f32\n"
   6440 
   6441 		"%entry = OpLabel\n"
   6442 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6443 		"OpBranch %loop\n"
   6444 
   6445 		";adds and subtracts 1.0 to %val in alternate iterations\n"
   6446 		"%loop = OpLabel\n"
   6447 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
   6448 		"%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
   6449 		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
   6450 		"%val = OpFAdd %f32 %val1 %delta\n"
   6451 		"%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
   6452 		"%count__ = OpISub %i32 %count %c_i32_1\n"
   6453 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
   6454 		"OpLoopMerge %exit %loop None\n"
   6455 		"OpBranchConditional %again %loop %exit\n"
   6456 
   6457 		"%exit = OpLabel\n"
   6458 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
   6459 		"OpReturnValue %result\n"
   6460 
   6461 		"OpFunctionEnd\n"
   6462 		;
   6463 	createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
   6464 
   6465 	// Body comprised of multiple basic blocks.
   6466 	const StringTemplate multiBlock(
   6467 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6468 		"%param1 = OpFunctionParameter %v4f32\n"
   6469 
   6470 		"%entry = OpLabel\n"
   6471 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6472 		"OpBranch %loop\n"
   6473 
   6474 		";adds and subtracts 1.0 to %val in alternate iterations\n"
   6475 		"%loop = OpLabel\n"
   6476 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %gather\n"
   6477 		"%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %gather\n"
   6478 		"%val1 = OpPhi %f32 %val0 %entry %val %gather\n"
   6479 		// There are several possibilities for the Continue Target below.  Each
   6480 		// will be specialized into a separate test case.
   6481 		"OpLoopMerge %exit ${continue_target} None\n"
   6482 		"OpBranch %if\n"
   6483 
   6484 		"%if = OpLabel\n"
   6485 		";delta_next = (delta > 0) ? -1 : 1;\n"
   6486 		"%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
   6487 		"OpSelectionMerge %gather DontFlatten\n"
   6488 		"OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
   6489 
   6490 		"%odd = OpLabel\n"
   6491 		"OpBranch %gather\n"
   6492 
   6493 		"%even = OpLabel\n"
   6494 		"OpBranch %gather\n"
   6495 
   6496 		"%gather = OpLabel\n"
   6497 		"%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
   6498 		"%val = OpFAdd %f32 %val1 %delta\n"
   6499 		"%count__ = OpISub %i32 %count %c_i32_1\n"
   6500 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
   6501 		"OpBranchConditional %again %loop %exit\n"
   6502 
   6503 		"%exit = OpLabel\n"
   6504 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
   6505 		"OpReturnValue %result\n"
   6506 
   6507 		"OpFunctionEnd\n");
   6508 
   6509 	map<string, string> continue_target;
   6510 
   6511 	// The Continue Target is the loop block itself.
   6512 	continue_target["continue_target"] = "%loop";
   6513 	fragments["testfun"] = multiBlock.specialize(continue_target);
   6514 	createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
   6515 
   6516 	// The Continue Target is at the end of the loop.
   6517 	continue_target["continue_target"] = "%gather";
   6518 	fragments["testfun"] = multiBlock.specialize(continue_target);
   6519 	createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
   6520 
   6521 	// A loop with continue statement.
   6522 	fragments["testfun"] =
   6523 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6524 		"%param1 = OpFunctionParameter %v4f32\n"
   6525 
   6526 		"%entry = OpLabel\n"
   6527 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6528 		"OpBranch %loop\n"
   6529 
   6530 		";adds 4, 3, and 1 to %val0 (skips 2)\n"
   6531 		"%loop = OpLabel\n"
   6532 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
   6533 		"%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
   6534 		"OpLoopMerge %exit %continue None\n"
   6535 		"OpBranch %if\n"
   6536 
   6537 		"%if = OpLabel\n"
   6538 		";skip if %count==2\n"
   6539 		"%eq2 = OpIEqual %bool %count %c_i32_2\n"
   6540 		"OpSelectionMerge %continue DontFlatten\n"
   6541 		"OpBranchConditional %eq2 %continue %body\n"
   6542 
   6543 		"%body = OpLabel\n"
   6544 		"%fcount = OpConvertSToF %f32 %count\n"
   6545 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
   6546 		"OpBranch %continue\n"
   6547 
   6548 		"%continue = OpLabel\n"
   6549 		"%val = OpPhi %f32 %val2 %body %val1 %if\n"
   6550 		"%count__ = OpISub %i32 %count %c_i32_1\n"
   6551 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
   6552 		"OpBranchConditional %again %loop %exit\n"
   6553 
   6554 		"%exit = OpLabel\n"
   6555 		"%same = OpFSub %f32 %val %c_f32_8\n"
   6556 		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
   6557 		"OpReturnValue %result\n"
   6558 		"OpFunctionEnd\n";
   6559 	createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
   6560 
   6561 	// A loop with break.
   6562 	fragments["testfun"] =
   6563 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6564 		"%param1 = OpFunctionParameter %v4f32\n"
   6565 
   6566 		"%entry = OpLabel\n"
   6567 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
   6568 		"%dot = OpDot %f32 %param1 %param1\n"
   6569 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
   6570 		"%zero = OpConvertFToU %u32 %div\n"
   6571 		"%two = OpIAdd %i32 %zero %c_i32_2\n"
   6572 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6573 		"OpBranch %loop\n"
   6574 
   6575 		";adds 4 and 3 to %val0 (exits early)\n"
   6576 		"%loop = OpLabel\n"
   6577 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
   6578 		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
   6579 		"OpLoopMerge %exit %continue None\n"
   6580 		"OpBranch %if\n"
   6581 
   6582 		"%if = OpLabel\n"
   6583 		";end loop if %count==%two\n"
   6584 		"%above2 = OpSGreaterThan %bool %count %two\n"
   6585 		"OpSelectionMerge %continue DontFlatten\n"
   6586 		"OpBranchConditional %above2 %body %exit\n"
   6587 
   6588 		"%body = OpLabel\n"
   6589 		"%fcount = OpConvertSToF %f32 %count\n"
   6590 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
   6591 		"OpBranch %continue\n"
   6592 
   6593 		"%continue = OpLabel\n"
   6594 		"%count__ = OpISub %i32 %count %c_i32_1\n"
   6595 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
   6596 		"OpBranchConditional %again %loop %exit\n"
   6597 
   6598 		"%exit = OpLabel\n"
   6599 		"%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
   6600 		"%same = OpFSub %f32 %val_post %c_f32_7\n"
   6601 		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
   6602 		"OpReturnValue %result\n"
   6603 		"OpFunctionEnd\n";
   6604 	createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
   6605 
   6606 	// A loop with return.
   6607 	fragments["testfun"] =
   6608 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6609 		"%param1 = OpFunctionParameter %v4f32\n"
   6610 
   6611 		"%entry = OpLabel\n"
   6612 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
   6613 		"%dot = OpDot %f32 %param1 %param1\n"
   6614 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
   6615 		"%zero = OpConvertFToU %u32 %div\n"
   6616 		"%two = OpIAdd %i32 %zero %c_i32_2\n"
   6617 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6618 		"OpBranch %loop\n"
   6619 
   6620 		";returns early without modifying %param1\n"
   6621 		"%loop = OpLabel\n"
   6622 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
   6623 		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
   6624 		"OpLoopMerge %exit %continue None\n"
   6625 		"OpBranch %if\n"
   6626 
   6627 		"%if = OpLabel\n"
   6628 		";return if %count==%two\n"
   6629 		"%above2 = OpSGreaterThan %bool %count %two\n"
   6630 		"OpSelectionMerge %continue DontFlatten\n"
   6631 		"OpBranchConditional %above2 %body %early_exit\n"
   6632 
   6633 		"%early_exit = OpLabel\n"
   6634 		"OpReturnValue %param1\n"
   6635 
   6636 		"%body = OpLabel\n"
   6637 		"%fcount = OpConvertSToF %f32 %count\n"
   6638 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
   6639 		"OpBranch %continue\n"
   6640 
   6641 		"%continue = OpLabel\n"
   6642 		"%count__ = OpISub %i32 %count %c_i32_1\n"
   6643 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
   6644 		"OpBranchConditional %again %loop %exit\n"
   6645 
   6646 		"%exit = OpLabel\n"
   6647 		";should never get here, so return an incorrect result\n"
   6648 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
   6649 		"OpReturnValue %result\n"
   6650 		"OpFunctionEnd\n";
   6651 	createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
   6652 
   6653 	return testGroup.release();
   6654 }
   6655 
   6656 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
   6657 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
   6658 {
   6659 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
   6660 	map<string, string> fragments;
   6661 
   6662 	// A barrier inside a function body.
   6663 	fragments["pre_main"] =
   6664 		"%Workgroup = OpConstant %i32 2\n"
   6665 		"%SequentiallyConsistent = OpConstant %i32 0x10\n";
   6666 	fragments["testfun"] =
   6667 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6668 		"%param1 = OpFunctionParameter %v4f32\n"
   6669 		"%label_testfun = OpLabel\n"
   6670 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
   6671 		"OpReturnValue %param1\n"
   6672 		"OpFunctionEnd\n";
   6673 	addTessCtrlTest(testGroup.get(), "in_function", fragments);
   6674 
   6675 	// Common setup code for the following tests.
   6676 	fragments["pre_main"] =
   6677 		"%Workgroup = OpConstant %i32 2\n"
   6678 		"%SequentiallyConsistent = OpConstant %i32 0x10\n"
   6679 		"%c_f32_5 = OpConstant %f32 5.\n";
   6680 	const string setupPercentZero =	 // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
   6681 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6682 		"%param1 = OpFunctionParameter %v4f32\n"
   6683 		"%entry = OpLabel\n"
   6684 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
   6685 		"%dot = OpDot %f32 %param1 %param1\n"
   6686 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
   6687 		"%zero = OpConvertFToU %u32 %div\n";
   6688 
   6689 	// Barriers inside OpSwitch branches.
   6690 	fragments["testfun"] =
   6691 		setupPercentZero +
   6692 		"OpSelectionMerge %switch_exit None\n"
   6693 		"OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
   6694 
   6695 		"%case1 = OpLabel\n"
   6696 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
   6697 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
   6698 		"%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
   6699 		"OpBranch %switch_exit\n"
   6700 
   6701 		"%switch_default = OpLabel\n"
   6702 		"%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
   6703 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
   6704 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
   6705 		"OpBranch %switch_exit\n"
   6706 
   6707 		"%case0 = OpLabel\n"
   6708 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
   6709 		"OpBranch %switch_exit\n"
   6710 
   6711 		"%switch_exit = OpLabel\n"
   6712 		"%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
   6713 		"OpReturnValue %ret\n"
   6714 		"OpFunctionEnd\n";
   6715 	addTessCtrlTest(testGroup.get(), "in_switch", fragments);
   6716 
   6717 	// Barriers inside if-then-else.
   6718 	fragments["testfun"] =
   6719 		setupPercentZero +
   6720 		"%eq0 = OpIEqual %bool %zero %c_u32_0\n"
   6721 		"OpSelectionMerge %exit DontFlatten\n"
   6722 		"OpBranchConditional %eq0 %then %else\n"
   6723 
   6724 		"%else = OpLabel\n"
   6725 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
   6726 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
   6727 		"%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
   6728 		"OpBranch %exit\n"
   6729 
   6730 		"%then = OpLabel\n"
   6731 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
   6732 		"OpBranch %exit\n"
   6733 
   6734 		"%exit = OpLabel\n"
   6735 		"%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
   6736 		"OpReturnValue %ret\n"
   6737 		"OpFunctionEnd\n";
   6738 	addTessCtrlTest(testGroup.get(), "in_if", fragments);
   6739 
   6740 	// A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
   6741 	// http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
   6742 	fragments["testfun"] =
   6743 		setupPercentZero +
   6744 		"%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
   6745 		"%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
   6746 		"OpSelectionMerge %exit DontFlatten\n"
   6747 		"OpBranchConditional %thread0 %then %else\n"
   6748 
   6749 		"%else = OpLabel\n"
   6750 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6751 		"OpBranch %exit\n"
   6752 
   6753 		"%then = OpLabel\n"
   6754 		"%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
   6755 		"OpBranch %exit\n"
   6756 
   6757 		"%exit = OpLabel\n"
   6758 		"%val = OpPhi %f32 %val0 %else %val1 %then\n"
   6759 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
   6760 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
   6761 		"OpReturnValue %ret\n"
   6762 		"OpFunctionEnd\n";
   6763 	addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
   6764 
   6765 	// A barrier inside a loop.
   6766 	fragments["pre_main"] =
   6767 		"%Workgroup = OpConstant %i32 2\n"
   6768 		"%SequentiallyConsistent = OpConstant %i32 0x10\n"
   6769 		"%c_f32_10 = OpConstant %f32 10.\n";
   6770 	fragments["testfun"] =
   6771 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6772 		"%param1 = OpFunctionParameter %v4f32\n"
   6773 		"%entry = OpLabel\n"
   6774 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   6775 		"OpBranch %loop\n"
   6776 
   6777 		";adds 4, 3, 2, and 1 to %val0\n"
   6778 		"%loop = OpLabel\n"
   6779 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
   6780 		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
   6781 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
   6782 		"%fcount = OpConvertSToF %f32 %count\n"
   6783 		"%val = OpFAdd %f32 %val1 %fcount\n"
   6784 		"%count__ = OpISub %i32 %count %c_i32_1\n"
   6785 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
   6786 		"OpLoopMerge %exit %loop None\n"
   6787 		"OpBranchConditional %again %loop %exit\n"
   6788 
   6789 		"%exit = OpLabel\n"
   6790 		"%same = OpFSub %f32 %val %c_f32_10\n"
   6791 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
   6792 		"OpReturnValue %ret\n"
   6793 		"OpFunctionEnd\n";
   6794 	addTessCtrlTest(testGroup.get(), "in_loop", fragments);
   6795 
   6796 	return testGroup.release();
   6797 }
   6798 
   6799 // Test for the OpFRem instruction.
   6800 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
   6801 {
   6802 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
   6803 	map<string, string>					fragments;
   6804 	RGBA								inputColors[4];
   6805 	RGBA								outputColors[4];
   6806 
   6807 	fragments["pre_main"]				 =
   6808 		"%c_f32_3 = OpConstant %f32 3.0\n"
   6809 		"%c_f32_n3 = OpConstant %f32 -3.0\n"
   6810 		"%c_f32_4 = OpConstant %f32 4.0\n"
   6811 		"%c_f32_p75 = OpConstant %f32 0.75\n"
   6812 		"%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
   6813 		"%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
   6814 		"%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
   6815 
   6816 	// The test does the following.
   6817 	// vec4 result = (param1 * 8.0) - 4.0;
   6818 	// return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
   6819 	fragments["testfun"]				 =
   6820 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6821 		"%param1 = OpFunctionParameter %v4f32\n"
   6822 		"%label_testfun = OpLabel\n"
   6823 		"%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
   6824 		"%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
   6825 		"%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
   6826 		"%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
   6827 		"%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
   6828 		"%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
   6829 		"OpReturnValue %xy_0_1\n"
   6830 		"OpFunctionEnd\n";
   6831 
   6832 
   6833 	inputColors[0]		= RGBA(16,	16,		0, 255);
   6834 	inputColors[1]		= RGBA(232, 232,	0, 255);
   6835 	inputColors[2]		= RGBA(232, 16,		0, 255);
   6836 	inputColors[3]		= RGBA(16,	232,	0, 255);
   6837 
   6838 	outputColors[0]		= RGBA(64,	64,		0, 255);
   6839 	outputColors[1]		= RGBA(255, 255,	0, 255);
   6840 	outputColors[2]		= RGBA(255, 64,		0, 255);
   6841 	outputColors[3]		= RGBA(64,	255,	0, 255);
   6842 
   6843 	createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
   6844 	return testGroup.release();
   6845 }
   6846 
   6847 // Test for the OpSRem instruction.
   6848 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
   6849 {
   6850 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
   6851 	map<string, string>					fragments;
   6852 
   6853 	fragments["pre_main"]				 =
   6854 		"%c_f32_255 = OpConstant %f32 255.0\n"
   6855 		"%c_i32_128 = OpConstant %i32 128\n"
   6856 		"%c_i32_255 = OpConstant %i32 255\n"
   6857 		"%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
   6858 		"%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
   6859 		"%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
   6860 
   6861 	// The test does the following.
   6862 	// ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
   6863 	// ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
   6864 	// return float(result + 128) / 255.0;
   6865 	fragments["testfun"]				 =
   6866 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6867 		"%param1 = OpFunctionParameter %v4f32\n"
   6868 		"%label_testfun = OpLabel\n"
   6869 		"%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
   6870 		"%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
   6871 		"%uints_in = OpConvertFToS %v4i32 %add0_5\n"
   6872 		"%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
   6873 		"%x_in = OpCompositeExtract %i32 %ints_in 0\n"
   6874 		"%y_in = OpCompositeExtract %i32 %ints_in 1\n"
   6875 		"%z_in = OpCompositeExtract %i32 %ints_in 2\n"
   6876 		"%x_out = OpSRem %i32 %x_in %y_in\n"
   6877 		"%y_out = OpSRem %i32 %y_in %z_in\n"
   6878 		"%z_out = OpSRem %i32 %z_in %x_in\n"
   6879 		"%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
   6880 		"%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
   6881 		"%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
   6882 		"%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
   6883 		"OpReturnValue %float_out\n"
   6884 		"OpFunctionEnd\n";
   6885 
   6886 	const struct CaseParams
   6887 	{
   6888 		const char*		name;
   6889 		const char*		failMessageTemplate;	// customized status message
   6890 		qpTestResult	failResult;				// override status on failure
   6891 		int				operands[4][3];			// four (x, y, z) vectors of operands
   6892 		int				results[4][3];			// four (x, y, z) vectors of results
   6893 	} cases[] =
   6894 	{
   6895 		{
   6896 			"positive",
   6897 			"${reason}",
   6898 			QP_TEST_RESULT_FAIL,
   6899 			{ { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },			// operands
   6900 			{ { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },			// results
   6901 		},
   6902 		{
   6903 			"all",
   6904 			"Inconsistent results, but within specification: ${reason}",
   6905 			negFailResult,															// negative operands, not required by the spec
   6906 			{ { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } },	// operands
   6907 			{ { 5, 12,  -2 }, {  0, -5, 2 }, {  3, 8,  -6 }, { 25, -60,   0 } },	// results
   6908 		},
   6909 	};
   6910 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
   6911 
   6912 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
   6913 	{
   6914 		const CaseParams&	params			= cases[caseNdx];
   6915 		RGBA				inputColors[4];
   6916 		RGBA				outputColors[4];
   6917 
   6918 		for (int i = 0; i < 4; ++i)
   6919 		{
   6920 			inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
   6921 			outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
   6922 		}
   6923 
   6924 		createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
   6925 	}
   6926 
   6927 	return testGroup.release();
   6928 }
   6929 
   6930 // Test for the OpSMod instruction.
   6931 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
   6932 {
   6933 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
   6934 	map<string, string>					fragments;
   6935 
   6936 	fragments["pre_main"]				 =
   6937 		"%c_f32_255 = OpConstant %f32 255.0\n"
   6938 		"%c_i32_128 = OpConstant %i32 128\n"
   6939 		"%c_i32_255 = OpConstant %i32 255\n"
   6940 		"%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
   6941 		"%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
   6942 		"%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
   6943 
   6944 	// The test does the following.
   6945 	// ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
   6946 	// ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
   6947 	// return float(result + 128) / 255.0;
   6948 	fragments["testfun"]				 =
   6949 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   6950 		"%param1 = OpFunctionParameter %v4f32\n"
   6951 		"%label_testfun = OpLabel\n"
   6952 		"%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
   6953 		"%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
   6954 		"%uints_in = OpConvertFToS %v4i32 %add0_5\n"
   6955 		"%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
   6956 		"%x_in = OpCompositeExtract %i32 %ints_in 0\n"
   6957 		"%y_in = OpCompositeExtract %i32 %ints_in 1\n"
   6958 		"%z_in = OpCompositeExtract %i32 %ints_in 2\n"
   6959 		"%x_out = OpSMod %i32 %x_in %y_in\n"
   6960 		"%y_out = OpSMod %i32 %y_in %z_in\n"
   6961 		"%z_out = OpSMod %i32 %z_in %x_in\n"
   6962 		"%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
   6963 		"%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
   6964 		"%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
   6965 		"%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
   6966 		"OpReturnValue %float_out\n"
   6967 		"OpFunctionEnd\n";
   6968 
   6969 	const struct CaseParams
   6970 	{
   6971 		const char*		name;
   6972 		const char*		failMessageTemplate;	// customized status message
   6973 		qpTestResult	failResult;				// override status on failure
   6974 		int				operands[4][3];			// four (x, y, z) vectors of operands
   6975 		int				results[4][3];			// four (x, y, z) vectors of results
   6976 	} cases[] =
   6977 	{
   6978 		{
   6979 			"positive",
   6980 			"${reason}",
   6981 			QP_TEST_RESULT_FAIL,
   6982 			{ { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },				// operands
   6983 			{ { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },				// results
   6984 		},
   6985 		{
   6986 			"all",
   6987 			"Inconsistent results, but within specification: ${reason}",
   6988 			negFailResult,																// negative operands, not required by the spec
   6989 			{ { 5, 12, -17 }, { -5, -5,  7 }, { 75,   8, -81 }, {  25, -60, 100 } },	// operands
   6990 			{ { 5, -5,   3 }, {  0,  2, -3 }, {  3, -73,  69 }, { -35,  40,   0 } },	// results
   6991 		},
   6992 	};
   6993 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
   6994 
   6995 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
   6996 	{
   6997 		const CaseParams&	params			= cases[caseNdx];
   6998 		RGBA				inputColors[4];
   6999 		RGBA				outputColors[4];
   7000 
   7001 		for (int i = 0; i < 4; ++i)
   7002 		{
   7003 			inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
   7004 			outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
   7005 		}
   7006 
   7007 		createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
   7008 	}
   7009 	return testGroup.release();
   7010 }
   7011 
   7012 
   7013 enum IntegerType
   7014 {
   7015 	INTEGER_TYPE_SIGNED_16,
   7016 	INTEGER_TYPE_SIGNED_32,
   7017 	INTEGER_TYPE_SIGNED_64,
   7018 
   7019 	INTEGER_TYPE_UNSIGNED_16,
   7020 	INTEGER_TYPE_UNSIGNED_32,
   7021 	INTEGER_TYPE_UNSIGNED_64,
   7022 };
   7023 
   7024 const string getBitWidthStr (IntegerType type)
   7025 {
   7026 	switch (type)
   7027 	{
   7028 		case INTEGER_TYPE_SIGNED_16:
   7029 		case INTEGER_TYPE_UNSIGNED_16:	return "16";
   7030 
   7031 		case INTEGER_TYPE_SIGNED_32:
   7032 		case INTEGER_TYPE_UNSIGNED_32:	return "32";
   7033 
   7034 		case INTEGER_TYPE_SIGNED_64:
   7035 		case INTEGER_TYPE_UNSIGNED_64:	return "64";
   7036 
   7037 		default:						DE_ASSERT(false);
   7038 										return "";
   7039 	}
   7040 }
   7041 
   7042 const string getByteWidthStr (IntegerType type)
   7043 {
   7044 	switch (type)
   7045 	{
   7046 		case INTEGER_TYPE_SIGNED_16:
   7047 		case INTEGER_TYPE_UNSIGNED_16:	return "2";
   7048 
   7049 		case INTEGER_TYPE_SIGNED_32:
   7050 		case INTEGER_TYPE_UNSIGNED_32:	return "4";
   7051 
   7052 		case INTEGER_TYPE_SIGNED_64:
   7053 		case INTEGER_TYPE_UNSIGNED_64:	return "8";
   7054 
   7055 		default:						DE_ASSERT(false);
   7056 										return "";
   7057 	}
   7058 }
   7059 
   7060 bool isSigned (IntegerType type)
   7061 {
   7062 	return (type <= INTEGER_TYPE_SIGNED_64);
   7063 }
   7064 
   7065 const string getTypeName (IntegerType type)
   7066 {
   7067 	string prefix = isSigned(type) ? "" : "u";
   7068 	return prefix + "int" + getBitWidthStr(type);
   7069 }
   7070 
   7071 const string getTestName (IntegerType from, IntegerType to)
   7072 {
   7073 	return getTypeName(from) + "_to_" + getTypeName(to);
   7074 }
   7075 
   7076 const string getAsmTypeDeclaration (IntegerType type)
   7077 {
   7078 	string sign = isSigned(type) ? " 1" : " 0";
   7079 	return "OpTypeInt " + getBitWidthStr(type) + sign;
   7080 }
   7081 
   7082 template<typename T>
   7083 BufferSp getSpecializedBuffer (deInt64 number)
   7084 {
   7085 	return BufferSp(new Buffer<T>(vector<T>(1, (T)number)));
   7086 }
   7087 
   7088 BufferSp getBuffer (IntegerType type, deInt64 number)
   7089 {
   7090 	switch (type)
   7091 	{
   7092 		case INTEGER_TYPE_SIGNED_16:	return getSpecializedBuffer<deInt16>(number);
   7093 		case INTEGER_TYPE_SIGNED_32:	return getSpecializedBuffer<deInt32>(number);
   7094 		case INTEGER_TYPE_SIGNED_64:	return getSpecializedBuffer<deInt64>(number);
   7095 
   7096 		case INTEGER_TYPE_UNSIGNED_16:	return getSpecializedBuffer<deUint16>(number);
   7097 		case INTEGER_TYPE_UNSIGNED_32:	return getSpecializedBuffer<deUint32>(number);
   7098 		case INTEGER_TYPE_UNSIGNED_64:	return getSpecializedBuffer<deUint64>(number);
   7099 
   7100 		default:						DE_ASSERT(false);
   7101 										return BufferSp(new Buffer<deInt32>(vector<deInt32>(1, 0)));
   7102 	}
   7103 }
   7104 
   7105 bool usesInt16 (IntegerType from, IntegerType to)
   7106 {
   7107 	return (from == INTEGER_TYPE_SIGNED_16 || from == INTEGER_TYPE_UNSIGNED_16
   7108 			|| to == INTEGER_TYPE_SIGNED_16 || to == INTEGER_TYPE_UNSIGNED_16);
   7109 }
   7110 
   7111 bool usesInt64 (IntegerType from, IntegerType to)
   7112 {
   7113 	return (from == INTEGER_TYPE_SIGNED_64 || from == INTEGER_TYPE_UNSIGNED_64
   7114 			|| to == INTEGER_TYPE_SIGNED_64 || to == INTEGER_TYPE_UNSIGNED_64);
   7115 }
   7116 
   7117 ComputeTestFeatures getConversionUsedFeatures (IntegerType from, IntegerType to)
   7118 {
   7119 	if (usesInt16(from, to))
   7120 	{
   7121 		if (usesInt64(from, to))
   7122 		{
   7123 			return COMPUTE_TEST_USES_INT16_INT64;
   7124 		}
   7125 		else
   7126 		{
   7127 			return COMPUTE_TEST_USES_INT16;
   7128 		}
   7129 	}
   7130 	else
   7131 	{
   7132 		return COMPUTE_TEST_USES_INT64;
   7133 	}
   7134 }
   7135 
   7136 struct ConvertCase
   7137 {
   7138 	ConvertCase (IntegerType from, IntegerType to, deInt64 number)
   7139 	: m_fromType		(from)
   7140 	, m_toType			(to)
   7141 	, m_features		(getConversionUsedFeatures(from, to))
   7142 	, m_name			(getTestName(from, to))
   7143 	, m_inputBuffer		(getBuffer(from, number))
   7144 	, m_outputBuffer	(getBuffer(to, number))
   7145 	{
   7146 		m_asmTypes["inputType"]		= getAsmTypeDeclaration(from);
   7147 		m_asmTypes["outputType"]	= getAsmTypeDeclaration(to);
   7148 
   7149 		if (m_features == COMPUTE_TEST_USES_INT16)
   7150 		{
   7151 			m_asmTypes["int_capabilities"] = "OpCapability Int16\n"
   7152 											 "OpCapability StorageUniformBufferBlock16\n";
   7153 			m_asmTypes["int_extensions"]   = "OpExtension \"SPV_KHR_16bit_storage\"\n";
   7154 		}
   7155 		else if (m_features == COMPUTE_TEST_USES_INT64)
   7156 		{
   7157 			m_asmTypes["int_capabilities"] = "OpCapability Int64\n";
   7158 			m_asmTypes["int_extensions"]   = "";
   7159 		}
   7160 		else if (m_features == COMPUTE_TEST_USES_INT16_INT64)
   7161 		{
   7162 			m_asmTypes["int_capabilities"] = "OpCapability Int16\n"
   7163 											 "OpCapability StorageUniformBufferBlock16\n"
   7164 											 "OpCapability Int64\n";
   7165 			m_asmTypes["int_extensions"] = "OpExtension \"SPV_KHR_16bit_storage\"\n";
   7166 		}
   7167 		else
   7168 		{
   7169 			DE_ASSERT(false);
   7170 		}
   7171 	}
   7172 
   7173 	IntegerType				m_fromType;
   7174 	IntegerType				m_toType;
   7175 	ComputeTestFeatures		m_features;
   7176 	string					m_name;
   7177 	map<string, string>		m_asmTypes;
   7178 	BufferSp				m_inputBuffer;
   7179 	BufferSp				m_outputBuffer;
   7180 };
   7181 
   7182 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase)
   7183 {
   7184 	map<string, string> params = convertCase.m_asmTypes;
   7185 
   7186 	params["instruction"] = instruction;
   7187 
   7188 	params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
   7189 	params["outDecorator"] = getByteWidthStr(convertCase.m_toType);
   7190 
   7191 	const StringTemplate shader (
   7192 		"OpCapability Shader\n"
   7193 		"${int_capabilities}"
   7194 		"${int_extensions}"
   7195 		"OpMemoryModel Logical GLSL450\n"
   7196 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   7197 		"OpExecutionMode %main LocalSize 1 1 1\n"
   7198 		"OpSource GLSL 430\n"
   7199 		"OpName %main           \"main\"\n"
   7200 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   7201 		// Decorators
   7202 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   7203 		"OpDecorate %indata DescriptorSet 0\n"
   7204 		"OpDecorate %indata Binding 0\n"
   7205 		"OpDecorate %outdata DescriptorSet 0\n"
   7206 		"OpDecorate %outdata Binding 1\n"
   7207 		"OpDecorate %in_arr ArrayStride ${inDecorator}\n"
   7208 		"OpDecorate %out_arr ArrayStride ${outDecorator}\n"
   7209 		"OpDecorate %in_buf BufferBlock\n"
   7210 		"OpDecorate %out_buf BufferBlock\n"
   7211 		"OpMemberDecorate %in_buf 0 Offset 0\n"
   7212 		"OpMemberDecorate %out_buf 0 Offset 0\n"
   7213 		// Base types
   7214 		"%void       = OpTypeVoid\n"
   7215 		"%voidf      = OpTypeFunction %void\n"
   7216 		"%u32        = OpTypeInt 32 0\n"
   7217 		"%i32        = OpTypeInt 32 1\n"
   7218 		"%uvec3      = OpTypeVector %u32 3\n"
   7219 		"%uvec3ptr   = OpTypePointer Input %uvec3\n"
   7220 		// Custom types
   7221 		"%in_type    = ${inputType}\n"
   7222 		"%out_type   = ${outputType}\n"
   7223 		// Derived types
   7224 		"%in_ptr     = OpTypePointer Uniform %in_type\n"
   7225 		"%out_ptr    = OpTypePointer Uniform %out_type\n"
   7226 		"%in_arr     = OpTypeRuntimeArray %in_type\n"
   7227 		"%out_arr    = OpTypeRuntimeArray %out_type\n"
   7228 		"%in_buf     = OpTypeStruct %in_arr\n"
   7229 		"%out_buf    = OpTypeStruct %out_arr\n"
   7230 		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
   7231 		"%out_bufptr = OpTypePointer Uniform %out_buf\n"
   7232 		"%indata     = OpVariable %in_bufptr Uniform\n"
   7233 		"%outdata    = OpVariable %out_bufptr Uniform\n"
   7234 		"%inputptr   = OpTypePointer Input %in_type\n"
   7235 		"%id         = OpVariable %uvec3ptr Input\n"
   7236 		// Constants
   7237 		"%zero       = OpConstant %i32 0\n"
   7238 		// Main function
   7239 		"%main       = OpFunction %void None %voidf\n"
   7240 		"%label      = OpLabel\n"
   7241 		"%idval      = OpLoad %uvec3 %id\n"
   7242 		"%x          = OpCompositeExtract %u32 %idval 0\n"
   7243 		"%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
   7244 		"%outloc     = OpAccessChain %out_ptr %outdata %zero %x\n"
   7245 		"%inval      = OpLoad %in_type %inloc\n"
   7246 		"%conv       = ${instruction} %out_type %inval\n"
   7247 		"              OpStore %outloc %conv\n"
   7248 		"              OpReturn\n"
   7249 		"              OpFunctionEnd\n"
   7250 	);
   7251 
   7252 	return shader.specialize(params);
   7253 }
   7254 
   7255 void createSConvertCases (vector<ConvertCase>& testCases)
   7256 {
   7257 	// Convert int to int
   7258 	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_16,	INTEGER_TYPE_SIGNED_32,		14669));
   7259 	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_16,	INTEGER_TYPE_SIGNED_64,		3341));
   7260 
   7261 	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_32,	INTEGER_TYPE_SIGNED_64,		973610259));
   7262 
   7263 	// Convert int to unsigned int
   7264 	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_16,	INTEGER_TYPE_UNSIGNED_32,	9288));
   7265 	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_16,	INTEGER_TYPE_UNSIGNED_64,	15460));
   7266 
   7267 	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_32,	INTEGER_TYPE_UNSIGNED_64,	346213461));
   7268 }
   7269 
   7270 //  Test for the OpSConvert instruction.
   7271 tcu::TestCaseGroup* createSConvertTests (tcu::TestContext& testCtx)
   7272 {
   7273 	const string instruction				("OpSConvert");
   7274 	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "sconvert", "OpSConvert"));
   7275 	vector<ConvertCase>				testCases;
   7276 	createSConvertCases(testCases);
   7277 
   7278 	for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
   7279 	{
   7280 		ComputeShaderSpec	spec;
   7281 
   7282 		spec.assembly = getConvertCaseShaderStr(instruction, *test);
   7283 		spec.inputs.push_back(test->m_inputBuffer);
   7284 		spec.outputs.push_back(test->m_outputBuffer);
   7285 		spec.numWorkGroups = IVec3(1, 1, 1);
   7286 
   7287 		if (test->m_features == COMPUTE_TEST_USES_INT16 || test->m_features == COMPUTE_TEST_USES_INT16_INT64)
   7288 		{
   7289 			spec.extensions.push_back("VK_KHR_16bit_storage");
   7290 			spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
   7291 		}
   7292 
   7293 		group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "Convert integers with OpSConvert.", spec, test->m_features));
   7294 	}
   7295 
   7296 	return group.release();
   7297 }
   7298 
   7299 void createUConvertCases (vector<ConvertCase>& testCases)
   7300 {
   7301 	// Convert unsigned int to unsigned int
   7302 	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_16,	INTEGER_TYPE_UNSIGNED_32,	60653));
   7303 	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_16,	INTEGER_TYPE_UNSIGNED_64,	17991));
   7304 
   7305 	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_32,	INTEGER_TYPE_UNSIGNED_64,	904256275));
   7306 }
   7307 
   7308 //  Test for the OpUConvert instruction.
   7309 tcu::TestCaseGroup* createUConvertTests (tcu::TestContext& testCtx)
   7310 {
   7311 	const string instruction				("OpUConvert");
   7312 	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "uconvert", "OpUConvert"));
   7313 	vector<ConvertCase>				testCases;
   7314 	createUConvertCases(testCases);
   7315 
   7316 	for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
   7317 	{
   7318 		ComputeShaderSpec	spec;
   7319 
   7320 		spec.assembly = getConvertCaseShaderStr(instruction, *test);
   7321 		spec.inputs.push_back(test->m_inputBuffer);
   7322 		spec.outputs.push_back(test->m_outputBuffer);
   7323 		spec.numWorkGroups = IVec3(1, 1, 1);
   7324 
   7325 		if (test->m_features == COMPUTE_TEST_USES_INT16 || test->m_features == COMPUTE_TEST_USES_INT16_INT64)
   7326 		{
   7327 			spec.extensions.push_back("VK_KHR_16bit_storage");
   7328 			spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
   7329 		}
   7330 
   7331 		group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "Convert integers with OpUConvert.", spec, test->m_features));
   7332 	}
   7333 	return group.release();
   7334 }
   7335 
   7336 const string getNumberTypeName (const NumberType type)
   7337 {
   7338 	if (type == NUMBERTYPE_INT32)
   7339 	{
   7340 		return "int";
   7341 	}
   7342 	else if (type == NUMBERTYPE_UINT32)
   7343 	{
   7344 		return "uint";
   7345 	}
   7346 	else if (type == NUMBERTYPE_FLOAT32)
   7347 	{
   7348 		return "float";
   7349 	}
   7350 	else
   7351 	{
   7352 		DE_ASSERT(false);
   7353 		return "";
   7354 	}
   7355 }
   7356 
   7357 deInt32 getInt(de::Random& rnd)
   7358 {
   7359 	return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
   7360 }
   7361 
   7362 const string repeatString (const string& str, int times)
   7363 {
   7364 	string filler;
   7365 	for (int i = 0; i < times; ++i)
   7366 	{
   7367 		filler += str;
   7368 	}
   7369 	return filler;
   7370 }
   7371 
   7372 const string getRandomConstantString (const NumberType type, de::Random& rnd)
   7373 {
   7374 	if (type == NUMBERTYPE_INT32)
   7375 	{
   7376 		return numberToString<deInt32>(getInt(rnd));
   7377 	}
   7378 	else if (type == NUMBERTYPE_UINT32)
   7379 	{
   7380 		return numberToString<deUint32>(rnd.getUint32());
   7381 	}
   7382 	else if (type == NUMBERTYPE_FLOAT32)
   7383 	{
   7384 		return numberToString<float>(rnd.getFloat());
   7385 	}
   7386 	else
   7387 	{
   7388 		DE_ASSERT(false);
   7389 		return "";
   7390 	}
   7391 }
   7392 
   7393 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
   7394 {
   7395 	map<string, string> params;
   7396 
   7397 	// Vec2 to Vec4
   7398 	for (int width = 2; width <= 4; ++width)
   7399 	{
   7400 		string randomConst = numberToString(getInt(rnd));
   7401 		string widthStr = numberToString(width);
   7402 		int index = rnd.getInt(0, width-1);
   7403 
   7404 		params["type"]					= "vec";
   7405 		params["name"]					= params["type"] + "_" + widthStr;
   7406 		params["compositeType"]			= "%composite = OpTypeVector %custom " + widthStr +"\n";
   7407 		params["filler"]				= string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
   7408 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
   7409 		params["indexes"]				= numberToString(index);
   7410 		testCases.push_back(params);
   7411 	}
   7412 }
   7413 
   7414 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
   7415 {
   7416 	const int limit = 10;
   7417 	map<string, string> params;
   7418 
   7419 	for (int width = 2; width <= limit; ++width)
   7420 	{
   7421 		string randomConst = numberToString(getInt(rnd));
   7422 		string widthStr = numberToString(width);
   7423 		int index = rnd.getInt(0, width-1);
   7424 
   7425 		params["type"]					= "array";
   7426 		params["name"]					= params["type"] + "_" + widthStr;
   7427 		params["compositeType"]			= string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
   7428 											+	 "%composite = OpTypeArray %custom %arraywidth\n";
   7429 
   7430 		params["filler"]				= string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
   7431 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
   7432 		params["indexes"]				= numberToString(index);
   7433 		testCases.push_back(params);
   7434 	}
   7435 }
   7436 
   7437 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
   7438 {
   7439 	const int limit = 10;
   7440 	map<string, string> params;
   7441 
   7442 	for (int width = 2; width <= limit; ++width)
   7443 	{
   7444 		string randomConst = numberToString(getInt(rnd));
   7445 		int index = rnd.getInt(0, width-1);
   7446 
   7447 		params["type"]					= "struct";
   7448 		params["name"]					= params["type"] + "_" + numberToString(width);
   7449 		params["compositeType"]			= "%composite = OpTypeStruct" + repeatString(" %custom", width) + "\n";
   7450 		params["filler"]				= string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
   7451 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
   7452 		params["indexes"]				= numberToString(index);
   7453 		testCases.push_back(params);
   7454 	}
   7455 }
   7456 
   7457 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
   7458 {
   7459 	map<string, string> params;
   7460 
   7461 	// Vec2 to Vec4
   7462 	for (int width = 2; width <= 4; ++width)
   7463 	{
   7464 		string widthStr = numberToString(width);
   7465 
   7466 		for (int column = 2 ; column <= 4; ++column)
   7467 		{
   7468 			int index_0 = rnd.getInt(0, column-1);
   7469 			int index_1 = rnd.getInt(0, width-1);
   7470 			string columnStr = numberToString(column);
   7471 
   7472 			params["type"]					= "matrix";
   7473 			params["name"]					= params["type"] + "_" + widthStr + "x" + columnStr;
   7474 			params["compositeType"]			= string("%vectype   = OpTypeVector %custom " + widthStr + "\n")
   7475 												+	 "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
   7476 
   7477 			params["filler"]				= string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n"
   7478 												+	 "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
   7479 
   7480 			params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
   7481 			params["indexes"]				= numberToString(index_0) + " " + numberToString(index_1);
   7482 			testCases.push_back(params);
   7483 		}
   7484 	}
   7485 }
   7486 
   7487 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
   7488 {
   7489 	createVectorCompositeCases(testCases, rnd, type);
   7490 	createArrayCompositeCases(testCases, rnd, type);
   7491 	createStructCompositeCases(testCases, rnd, type);
   7492 	// Matrix only supports float types
   7493 	if (type == NUMBERTYPE_FLOAT32)
   7494 	{
   7495 		createMatrixCompositeCases(testCases, rnd, type);
   7496 	}
   7497 }
   7498 
   7499 const string getAssemblyTypeDeclaration (const NumberType type)
   7500 {
   7501 	switch (type)
   7502 	{
   7503 		case NUMBERTYPE_INT32:		return "OpTypeInt 32 1";
   7504 		case NUMBERTYPE_UINT32:		return "OpTypeInt 32 0";
   7505 		case NUMBERTYPE_FLOAT32:	return "OpTypeFloat 32";
   7506 		default:			DE_ASSERT(false); return "";
   7507 	}
   7508 }
   7509 
   7510 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
   7511 {
   7512 	map<string, string>	parameters(params);
   7513 
   7514 	parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
   7515 
   7516 	parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
   7517 
   7518 	return StringTemplate (
   7519 		"OpCapability Shader\n"
   7520 		"OpCapability Matrix\n"
   7521 		"OpMemoryModel Logical GLSL450\n"
   7522 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   7523 		"OpExecutionMode %main LocalSize 1 1 1\n"
   7524 
   7525 		"OpSource GLSL 430\n"
   7526 		"OpName %main           \"main\"\n"
   7527 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   7528 
   7529 		// Decorators
   7530 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   7531 		"OpDecorate %buf BufferBlock\n"
   7532 		"OpDecorate %indata DescriptorSet 0\n"
   7533 		"OpDecorate %indata Binding 0\n"
   7534 		"OpDecorate %outdata DescriptorSet 0\n"
   7535 		"OpDecorate %outdata Binding 1\n"
   7536 		"OpDecorate %customarr ArrayStride 4\n"
   7537 		"${compositeDecorator}"
   7538 		"OpMemberDecorate %buf 0 Offset 0\n"
   7539 
   7540 		// General types
   7541 		"%void      = OpTypeVoid\n"
   7542 		"%voidf     = OpTypeFunction %void\n"
   7543 		"%u32       = OpTypeInt 32 0\n"
   7544 		"%i32       = OpTypeInt 32 1\n"
   7545 		"%uvec3     = OpTypeVector %u32 3\n"
   7546 		"%uvec3ptr  = OpTypePointer Input %uvec3\n"
   7547 
   7548 		// Custom type
   7549 		"%custom    = ${typeDeclaration}\n"
   7550 		"${compositeType}"
   7551 
   7552 		// Constants
   7553 		"${filler}"
   7554 
   7555 		// Inherited from custom
   7556 		"%customptr = OpTypePointer Uniform %custom\n"
   7557 		"%customarr = OpTypeRuntimeArray %custom\n"
   7558 		"%buf       = OpTypeStruct %customarr\n"
   7559 		"%bufptr    = OpTypePointer Uniform %buf\n"
   7560 
   7561 		"%indata    = OpVariable %bufptr Uniform\n"
   7562 		"%outdata   = OpVariable %bufptr Uniform\n"
   7563 
   7564 		"%id        = OpVariable %uvec3ptr Input\n"
   7565 		"%zero      = OpConstant %i32 0\n"
   7566 
   7567 		"%main      = OpFunction %void None %voidf\n"
   7568 		"%label     = OpLabel\n"
   7569 		"%idval     = OpLoad %uvec3 %id\n"
   7570 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   7571 
   7572 		"%inloc     = OpAccessChain %customptr %indata %zero %x\n"
   7573 		"%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
   7574 		// Read the input value
   7575 		"%inval     = OpLoad %custom %inloc\n"
   7576 		// Create the composite and fill it
   7577 		"${compositeConstruct}"
   7578 		// Insert the input value to a place
   7579 		"%instance2 = OpCompositeInsert %composite %inval %instance ${indexes}\n"
   7580 		// Read back the value from the position
   7581 		"%out_val   = OpCompositeExtract %custom %instance2 ${indexes}\n"
   7582 		// Store it in the output position
   7583 		"             OpStore %outloc %out_val\n"
   7584 		"             OpReturn\n"
   7585 		"             OpFunctionEnd\n"
   7586 	).specialize(parameters);
   7587 }
   7588 
   7589 template<typename T>
   7590 BufferSp createCompositeBuffer(T number)
   7591 {
   7592 	return BufferSp(new Buffer<T>(vector<T>(1, number)));
   7593 }
   7594 
   7595 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
   7596 {
   7597 	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
   7598 	de::Random						rnd		(deStringHash(group->getName()));
   7599 
   7600 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
   7601 	{
   7602 		NumberType						numberType		= NumberType(type);
   7603 		const string					typeName		= getNumberTypeName(numberType);
   7604 		const string					description		= "Test the OpCompositeInsert instruction with " + typeName + "s";
   7605 		de::MovePtr<tcu::TestCaseGroup>	subGroup		(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
   7606 		vector<map<string, string> >	testCases;
   7607 
   7608 		createCompositeCases(testCases, rnd, numberType);
   7609 
   7610 		for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
   7611 		{
   7612 			ComputeShaderSpec	spec;
   7613 
   7614 			spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
   7615 
   7616 			switch (numberType)
   7617 			{
   7618 				case NUMBERTYPE_INT32:
   7619 				{
   7620 					deInt32 number = getInt(rnd);
   7621 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
   7622 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
   7623 					break;
   7624 				}
   7625 				case NUMBERTYPE_UINT32:
   7626 				{
   7627 					deUint32 number = rnd.getUint32();
   7628 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
   7629 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
   7630 					break;
   7631 				}
   7632 				case NUMBERTYPE_FLOAT32:
   7633 				{
   7634 					float number = rnd.getFloat();
   7635 					spec.inputs.push_back(createCompositeBuffer<float>(number));
   7636 					spec.outputs.push_back(createCompositeBuffer<float>(number));
   7637 					break;
   7638 				}
   7639 				default:
   7640 					DE_ASSERT(false);
   7641 			}
   7642 
   7643 			spec.numWorkGroups = IVec3(1, 1, 1);
   7644 			subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpCompositeInsert test", spec));
   7645 		}
   7646 		group->addChild(subGroup.release());
   7647 	}
   7648 	return group.release();
   7649 }
   7650 
   7651 struct AssemblyStructInfo
   7652 {
   7653 	AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
   7654 	: components	(comp)
   7655 	, index			(idx)
   7656 	{}
   7657 
   7658 	deUint32 components;
   7659 	deUint32 index;
   7660 };
   7661 
   7662 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
   7663 {
   7664 	// Create the full index string
   7665 	string				fullIndex	= numberToString(structInfo.index) + " " + params.at("indexes");
   7666 	// Convert it to list of indexes
   7667 	vector<string>		indexes		= de::splitString(fullIndex, ' ');
   7668 
   7669 	map<string, string>	parameters	(params);
   7670 	parameters["typeDeclaration"]	= getAssemblyTypeDeclaration(type);
   7671 	parameters["structType"]		= repeatString(" %composite", structInfo.components);
   7672 	parameters["structConstruct"]	= repeatString(" %instance", structInfo.components);
   7673 	parameters["insertIndexes"]		= fullIndex;
   7674 
   7675 	// In matrix cases the last two index is the CompositeExtract indexes
   7676 	const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
   7677 
   7678 	// Construct the extractIndex
   7679 	for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
   7680 	{
   7681 		parameters["extractIndexes"] += " " + *index;
   7682 	}
   7683 
   7684 	// Remove the last 1 or 2 element depends on matrix case or not
   7685 	indexes.erase(indexes.end() - extractIndexes, indexes.end());
   7686 
   7687 	deUint32 id = 0;
   7688 	// Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
   7689 	for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
   7690 	{
   7691 		string indexId = "%index_" + numberToString(id++);
   7692 		parameters["accessChainConstDeclaration"] += indexId + "   = OpConstant %u32 " + *index + "\n";
   7693 		parameters["accessChainIndexes"] += " " + indexId;
   7694 	}
   7695 
   7696 	parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
   7697 
   7698 	return StringTemplate (
   7699 		"OpCapability Shader\n"
   7700 		"OpCapability Matrix\n"
   7701 		"OpMemoryModel Logical GLSL450\n"
   7702 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   7703 		"OpExecutionMode %main LocalSize 1 1 1\n"
   7704 
   7705 		"OpSource GLSL 430\n"
   7706 		"OpName %main           \"main\"\n"
   7707 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   7708 		// Decorators
   7709 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   7710 		"OpDecorate %buf BufferBlock\n"
   7711 		"OpDecorate %indata DescriptorSet 0\n"
   7712 		"OpDecorate %indata Binding 0\n"
   7713 		"OpDecorate %outdata DescriptorSet 0\n"
   7714 		"OpDecorate %outdata Binding 1\n"
   7715 		"OpDecorate %customarr ArrayStride 4\n"
   7716 		"${compositeDecorator}"
   7717 		"OpMemberDecorate %buf 0 Offset 0\n"
   7718 		// General types
   7719 		"%void      = OpTypeVoid\n"
   7720 		"%voidf     = OpTypeFunction %void\n"
   7721 		"%u32       = OpTypeInt 32 0\n"
   7722 		"%uvec3     = OpTypeVector %u32 3\n"
   7723 		"%uvec3ptr  = OpTypePointer Input %uvec3\n"
   7724 		// Custom type
   7725 		"%custom    = ${typeDeclaration}\n"
   7726 		// Custom types
   7727 		"${compositeType}"
   7728 		// Inherited from composite
   7729 		"%composite_p = OpTypePointer Function %composite\n"
   7730 		"%struct_t  = OpTypeStruct${structType}\n"
   7731 		"%struct_p  = OpTypePointer Function %struct_t\n"
   7732 		// Constants
   7733 		"${filler}"
   7734 		"${accessChainConstDeclaration}"
   7735 		// Inherited from custom
   7736 		"%customptr = OpTypePointer Uniform %custom\n"
   7737 		"%customarr = OpTypeRuntimeArray %custom\n"
   7738 		"%buf       = OpTypeStruct %customarr\n"
   7739 		"%bufptr    = OpTypePointer Uniform %buf\n"
   7740 		"%indata    = OpVariable %bufptr Uniform\n"
   7741 		"%outdata   = OpVariable %bufptr Uniform\n"
   7742 
   7743 		"%id        = OpVariable %uvec3ptr Input\n"
   7744 		"%zero      = OpConstant %u32 0\n"
   7745 		"%main      = OpFunction %void None %voidf\n"
   7746 		"%label     = OpLabel\n"
   7747 		"%struct_v  = OpVariable %struct_p Function\n"
   7748 		"%idval     = OpLoad %uvec3 %id\n"
   7749 		"%x         = OpCompositeExtract %u32 %idval 0\n"
   7750 		// Create the input/output type
   7751 		"%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
   7752 		"%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
   7753 		// Read the input value
   7754 		"%inval     = OpLoad %custom %inloc\n"
   7755 		// Create the composite and fill it
   7756 		"${compositeConstruct}"
   7757 		// Create the struct and fill it with the composite
   7758 		"%struct    = OpCompositeConstruct %struct_t${structConstruct}\n"
   7759 		// Insert the value
   7760 		"%comp_obj  = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
   7761 		// Store the object
   7762 		"             OpStore %struct_v %comp_obj\n"
   7763 		// Get deepest possible composite pointer
   7764 		"%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
   7765 		"%read_obj  = OpLoad %composite %inner_ptr\n"
   7766 		// Read back the stored value
   7767 		"%read_val  = OpCompositeExtract %custom %read_obj${extractIndexes}\n"
   7768 		"             OpStore %outloc %read_val\n"
   7769 		"             OpReturn\n"
   7770 		"             OpFunctionEnd\n").specialize(parameters);
   7771 }
   7772 
   7773 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
   7774 {
   7775 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
   7776 	de::Random						rnd				(deStringHash(group->getName()));
   7777 
   7778 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
   7779 	{
   7780 		NumberType						numberType	= NumberType(type);
   7781 		const string					typeName	= getNumberTypeName(numberType);
   7782 		const string					description	= "Test the OpInBoundsAccessChain instruction with " + typeName + "s";
   7783 		de::MovePtr<tcu::TestCaseGroup>	subGroup	(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
   7784 
   7785 		vector<map<string, string> >	testCases;
   7786 		createCompositeCases(testCases, rnd, numberType);
   7787 
   7788 		for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
   7789 		{
   7790 			ComputeShaderSpec	spec;
   7791 
   7792 			// Number of components inside of a struct
   7793 			deUint32 structComponents = rnd.getInt(2, 8);
   7794 			// Component index value
   7795 			deUint32 structIndex = rnd.getInt(0, structComponents - 1);
   7796 			AssemblyStructInfo structInfo(structComponents, structIndex);
   7797 
   7798 			spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
   7799 
   7800 			switch (numberType)
   7801 			{
   7802 				case NUMBERTYPE_INT32:
   7803 				{
   7804 					deInt32 number = getInt(rnd);
   7805 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
   7806 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
   7807 					break;
   7808 				}
   7809 				case NUMBERTYPE_UINT32:
   7810 				{
   7811 					deUint32 number = rnd.getUint32();
   7812 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
   7813 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
   7814 					break;
   7815 				}
   7816 				case NUMBERTYPE_FLOAT32:
   7817 				{
   7818 					float number = rnd.getFloat();
   7819 					spec.inputs.push_back(createCompositeBuffer<float>(number));
   7820 					spec.outputs.push_back(createCompositeBuffer<float>(number));
   7821 					break;
   7822 				}
   7823 				default:
   7824 					DE_ASSERT(false);
   7825 			}
   7826 			spec.numWorkGroups = IVec3(1, 1, 1);
   7827 			subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpInBoundsAccessChain test", spec));
   7828 		}
   7829 		group->addChild(subGroup.release());
   7830 	}
   7831 	return group.release();
   7832 }
   7833 
   7834 // If the params missing, uninitialized case
   7835 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
   7836 {
   7837 	map<string, string> parameters(params);
   7838 
   7839 	parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
   7840 
   7841 	// Declare the const value, and use it in the initializer
   7842 	if (params.find("constValue") != params.end())
   7843 	{
   7844 		parameters["constDeclaration"]		= "%const      = OpConstant %in_type " + params.at("constValue") + "\n";
   7845 		parameters["variableInitializer"]	= "%const";
   7846 	}
   7847 	// Uninitialized case
   7848 	else
   7849 	{
   7850 		parameters["constDeclaration"]		= "";
   7851 		parameters["variableInitializer"]	= "";
   7852 	}
   7853 
   7854 	return StringTemplate(
   7855 		"OpCapability Shader\n"
   7856 		"OpMemoryModel Logical GLSL450\n"
   7857 		"OpEntryPoint GLCompute %main \"main\" %id\n"
   7858 		"OpExecutionMode %main LocalSize 1 1 1\n"
   7859 		"OpSource GLSL 430\n"
   7860 		"OpName %main           \"main\"\n"
   7861 		"OpName %id             \"gl_GlobalInvocationID\"\n"
   7862 		// Decorators
   7863 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
   7864 		"OpDecorate %indata DescriptorSet 0\n"
   7865 		"OpDecorate %indata Binding 0\n"
   7866 		"OpDecorate %outdata DescriptorSet 0\n"
   7867 		"OpDecorate %outdata Binding 1\n"
   7868 		"OpDecorate %in_arr ArrayStride 4\n"
   7869 		"OpDecorate %in_buf BufferBlock\n"
   7870 		"OpMemberDecorate %in_buf 0 Offset 0\n"
   7871 		// Base types
   7872 		"%void       = OpTypeVoid\n"
   7873 		"%voidf      = OpTypeFunction %void\n"
   7874 		"%u32        = OpTypeInt 32 0\n"
   7875 		"%i32        = OpTypeInt 32 1\n"
   7876 		"%uvec3      = OpTypeVector %u32 3\n"
   7877 		"%uvec3ptr   = OpTypePointer Input %uvec3\n"
   7878 		// Custom types
   7879 		"%in_type    = ${typeDeclaration}\n"
   7880 		// "%const      = OpConstant %in_type ${constValue}\n"
   7881 		"${constDeclaration}\n"
   7882 		// Derived types
   7883 		"%in_ptr     = OpTypePointer Uniform %in_type\n"
   7884 		"%in_arr     = OpTypeRuntimeArray %in_type\n"
   7885 		"%in_buf     = OpTypeStruct %in_arr\n"
   7886 		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
   7887 		"%indata     = OpVariable %in_bufptr Uniform\n"
   7888 		"%outdata    = OpVariable %in_bufptr Uniform\n"
   7889 		"%id         = OpVariable %uvec3ptr Input\n"
   7890 		"%var_ptr    = OpTypePointer Function %in_type\n"
   7891 		// Constants
   7892 		"%zero       = OpConstant %i32 0\n"
   7893 		// Main function
   7894 		"%main       = OpFunction %void None %voidf\n"
   7895 		"%label      = OpLabel\n"
   7896 		"%out_var    = OpVariable %var_ptr Function ${variableInitializer}\n"
   7897 		"%idval      = OpLoad %uvec3 %id\n"
   7898 		"%x          = OpCompositeExtract %u32 %idval 0\n"
   7899 		"%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
   7900 		"%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
   7901 
   7902 		"%outval     = OpLoad %in_type %out_var\n"
   7903 		"              OpStore %outloc %outval\n"
   7904 		"              OpReturn\n"
   7905 		"              OpFunctionEnd\n"
   7906 	).specialize(parameters);
   7907 }
   7908 
   7909 bool compareFloats (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog& log)
   7910 {
   7911 	DE_ASSERT(outputAllocs.size() != 0);
   7912 	DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
   7913 
   7914 	// Use custom epsilon because of the float->string conversion
   7915 	const float	epsilon	= 0.00001f;
   7916 
   7917 	for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
   7918 	{
   7919 		vector<deUint8>	expectedBytes;
   7920 		float			expected;
   7921 		float			actual;
   7922 
   7923 		expectedOutputs[outputNdx]->getBytes(expectedBytes);
   7924 		memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
   7925 		memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
   7926 
   7927 		// Test with epsilon
   7928 		if (fabs(expected - actual) > epsilon)
   7929 		{
   7930 			log << TestLog::Message << "Error: The actual and expected values not matching."
   7931 				<< " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
   7932 			return false;
   7933 		}
   7934 	}
   7935 	return true;
   7936 }
   7937 
   7938 // Checks if the driver crash with uninitialized cases
   7939 bool passthruVerify (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
   7940 {
   7941 	DE_ASSERT(outputAllocs.size() != 0);
   7942 	DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
   7943 
   7944 	// Copy and discard the result.
   7945 	for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
   7946 	{
   7947 		vector<deUint8>	expectedBytes;
   7948 		expectedOutputs[outputNdx]->getBytes(expectedBytes);
   7949 
   7950 		const size_t	width			= expectedBytes.size();
   7951 		vector<char>	data			(width);
   7952 
   7953 		memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
   7954 	}
   7955 	return true;
   7956 }
   7957 
   7958 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
   7959 {
   7960 	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
   7961 	de::Random						rnd		(deStringHash(group->getName()));
   7962 
   7963 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
   7964 	{
   7965 		NumberType						numberType	= NumberType(type);
   7966 		const string					typeName	= getNumberTypeName(numberType);
   7967 		const string					description	= "Test the OpVariable initializer with " + typeName + ".";
   7968 		de::MovePtr<tcu::TestCaseGroup>	subGroup	(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
   7969 
   7970 		// 2 similar subcases (initialized and uninitialized)
   7971 		for (int subCase = 0; subCase < 2; ++subCase)
   7972 		{
   7973 			ComputeShaderSpec spec;
   7974 			spec.numWorkGroups = IVec3(1, 1, 1);
   7975 
   7976 			map<string, string>				params;
   7977 
   7978 			switch (numberType)
   7979 			{
   7980 				case NUMBERTYPE_INT32:
   7981 				{
   7982 					deInt32 number = getInt(rnd);
   7983 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
   7984 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
   7985 					params["constValue"] = numberToString(number);
   7986 					break;
   7987 				}
   7988 				case NUMBERTYPE_UINT32:
   7989 				{
   7990 					deUint32 number = rnd.getUint32();
   7991 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
   7992 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
   7993 					params["constValue"] = numberToString(number);
   7994 					break;
   7995 				}
   7996 				case NUMBERTYPE_FLOAT32:
   7997 				{
   7998 					float number = rnd.getFloat();
   7999 					spec.inputs.push_back(createCompositeBuffer<float>(number));
   8000 					spec.outputs.push_back(createCompositeBuffer<float>(number));
   8001 					spec.verifyIO = &compareFloats;
   8002 					params["constValue"] = numberToString(number);
   8003 					break;
   8004 				}
   8005 				default:
   8006 					DE_ASSERT(false);
   8007 			}
   8008 
   8009 			// Initialized subcase
   8010 			if (!subCase)
   8011 			{
   8012 				spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
   8013 				subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", "OpVariable initializer tests.", spec));
   8014 			}
   8015 			// Uninitialized subcase
   8016 			else
   8017 			{
   8018 				spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
   8019 				spec.verifyIO = &passthruVerify;
   8020 				subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", "OpVariable initializer tests.", spec));
   8021 			}
   8022 		}
   8023 		group->addChild(subGroup.release());
   8024 	}
   8025 	return group.release();
   8026 }
   8027 
   8028 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
   8029 {
   8030 	de::MovePtr<tcu::TestCaseGroup>	testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
   8031 	RGBA							defaultColors[4];
   8032 	map<string, string>				opNopFragments;
   8033 
   8034 	getDefaultColors(defaultColors);
   8035 
   8036 	opNopFragments["testfun"]		=
   8037 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
   8038 		"%param1 = OpFunctionParameter %v4f32\n"
   8039 		"%label_testfun = OpLabel\n"
   8040 		"OpNop\n"
   8041 		"OpNop\n"
   8042 		"OpNop\n"
   8043 		"OpNop\n"
   8044 		"OpNop\n"
   8045 		"OpNop\n"
   8046 		"OpNop\n"
   8047 		"OpNop\n"
   8048 		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
   8049 		"%b = OpFAdd %f32 %a %a\n"
   8050 		"OpNop\n"
   8051 		"%c = OpFSub %f32 %b %a\n"
   8052 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
   8053 		"OpNop\n"
   8054 		"OpNop\n"
   8055 		"OpReturnValue %ret\n"
   8056 		"OpFunctionEnd\n";
   8057 
   8058 	createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
   8059 
   8060 	return testGroup.release();
   8061 }
   8062 
   8063 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
   8064 {
   8065 	const bool testComputePipeline = true;
   8066 
   8067 	de::MovePtr<tcu::TestCaseGroup> instructionTests	(new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
   8068 	de::MovePtr<tcu::TestCaseGroup> computeTests		(new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
   8069 	de::MovePtr<tcu::TestCaseGroup> graphicsTests		(new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
   8070 
   8071 	computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
   8072 	computeTests->addChild(createOpNopGroup(testCtx));
   8073 	computeTests->addChild(createOpFUnordGroup(testCtx));
   8074 	computeTests->addChild(createOpAtomicGroup(testCtx, false));
   8075 	computeTests->addChild(createOpAtomicGroup(testCtx, true)); // Using new StorageBuffer decoration
   8076 	computeTests->addChild(createOpLineGroup(testCtx));
   8077 	computeTests->addChild(createOpModuleProcessedGroup(testCtx));
   8078 	computeTests->addChild(createOpNoLineGroup(testCtx));
   8079 	computeTests->addChild(createOpConstantNullGroup(testCtx));
   8080 	computeTests->addChild(createOpConstantCompositeGroup(testCtx));
   8081 	computeTests->addChild(createOpConstantUsageGroup(testCtx));
   8082 	computeTests->addChild(createSpecConstantGroup(testCtx));
   8083 	computeTests->addChild(createOpSourceGroup(testCtx));
   8084 	computeTests->addChild(createOpSourceExtensionGroup(testCtx));
   8085 	computeTests->addChild(createDecorationGroupGroup(testCtx));
   8086 	computeTests->addChild(createOpPhiGroup(testCtx));
   8087 	computeTests->addChild(createLoopControlGroup(testCtx));
   8088 	computeTests->addChild(createFunctionControlGroup(testCtx));
   8089 	computeTests->addChild(createSelectionControlGroup(testCtx));
   8090 	computeTests->addChild(createBlockOrderGroup(testCtx));
   8091 	computeTests->addChild(createMultipleShaderGroup(testCtx));
   8092 	computeTests->addChild(createMemoryAccessGroup(testCtx));
   8093 	computeTests->addChild(createOpCopyMemoryGroup(testCtx));
   8094 	computeTests->addChild(createOpCopyObjectGroup(testCtx));
   8095 	computeTests->addChild(createNoContractionGroup(testCtx));
   8096 	computeTests->addChild(createOpUndefGroup(testCtx));
   8097 	computeTests->addChild(createOpUnreachableGroup(testCtx));
   8098 	computeTests ->addChild(createOpQuantizeToF16Group(testCtx));
   8099 	computeTests ->addChild(createOpFRemGroup(testCtx));
   8100 	computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
   8101 	computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
   8102 	computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
   8103 	computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
   8104 	computeTests->addChild(createSConvertTests(testCtx));
   8105 	computeTests->addChild(createUConvertTests(testCtx));
   8106 	computeTests->addChild(createOpCompositeInsertGroup(testCtx));
   8107 	computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
   8108 	computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
   8109 	computeTests->addChild(createOpNMinGroup(testCtx));
   8110 	computeTests->addChild(createOpNMaxGroup(testCtx));
   8111 	computeTests->addChild(createOpNClampGroup(testCtx));
   8112 	{
   8113 		de::MovePtr<tcu::TestCaseGroup>	computeAndroidTests	(new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
   8114 
   8115 		computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
   8116 		computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
   8117 
   8118 		computeTests->addChild(computeAndroidTests.release());
   8119 	}
   8120 
   8121 	computeTests->addChild(create16BitStorageComputeGroup(testCtx));
   8122 	computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
   8123 	computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
   8124 	computeTests->addChild(createIndexingComputeGroup(testCtx));
   8125 	computeTests->addChild(createVariablePointersComputeGroup(testCtx));
   8126 	graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
   8127 	graphicsTests->addChild(createOpNopTests(testCtx));
   8128 	graphicsTests->addChild(createOpSourceTests(testCtx));
   8129 	graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
   8130 	graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
   8131 	graphicsTests->addChild(createOpLineTests(testCtx));
   8132 	graphicsTests->addChild(createOpNoLineTests(testCtx));
   8133 	graphicsTests->addChild(createOpConstantNullTests(testCtx));
   8134 	graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
   8135 	graphicsTests->addChild(createMemoryAccessTests(testCtx));
   8136 	graphicsTests->addChild(createOpUndefTests(testCtx));
   8137 	graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
   8138 	graphicsTests->addChild(createModuleTests(testCtx));
   8139 	graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
   8140 	graphicsTests->addChild(createOpPhiTests(testCtx));
   8141 	graphicsTests->addChild(createNoContractionTests(testCtx));
   8142 	graphicsTests->addChild(createOpQuantizeTests(testCtx));
   8143 	graphicsTests->addChild(createLoopTests(testCtx));
   8144 	graphicsTests->addChild(createSpecConstantTests(testCtx));
   8145 	graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
   8146 	graphicsTests->addChild(createBarrierTests(testCtx));
   8147 	graphicsTests->addChild(createDecorationGroupTests(testCtx));
   8148 	graphicsTests->addChild(createFRemTests(testCtx));
   8149 	graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
   8150 	graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
   8151 
   8152 	{
   8153 		de::MovePtr<tcu::TestCaseGroup>	graphicsAndroidTests	(new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
   8154 
   8155 		graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
   8156 		graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
   8157 
   8158 		graphicsTests->addChild(graphicsAndroidTests.release());
   8159 	}
   8160 
   8161 	graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
   8162 	graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
   8163 	graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
   8164 	graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
   8165 	graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
   8166 
   8167 	instructionTests->addChild(computeTests.release());
   8168 	instructionTests->addChild(graphicsTests.release());
   8169 
   8170 	return instructionTests.release();
   8171 }
   8172 
   8173 } // SpirVAssembly
   8174 } // vkt
   8175