Home | History | Annotate | Download | only in performance
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program OpenGL ES 2.0 Module
      3  * -------------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Optimized vs unoptimized shader performance tests.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "es2pShaderOptimizationTests.hpp"
     25 #include "glsShaderPerformanceMeasurer.hpp"
     26 #include "gluRenderContext.hpp"
     27 #include "gluShaderProgram.hpp"
     28 #include "tcuTestLog.hpp"
     29 #include "tcuVector.hpp"
     30 #include "tcuStringTemplate.hpp"
     31 #include "deSharedPtr.hpp"
     32 #include "deStringUtil.hpp"
     33 #include "deMath.h"
     34 
     35 #include "glwFunctions.hpp"
     36 
     37 #include <vector>
     38 #include <string>
     39 #include <map>
     40 
     41 using glu::ShaderProgram;
     42 using tcu::TestLog;
     43 using tcu::Vec4;
     44 using de::SharedPtr;
     45 using de::toString;
     46 
     47 using std::vector;
     48 using std::string;
     49 
     50 namespace deqp
     51 {
     52 
     53 using gls::ShaderPerformanceMeasurer;
     54 
     55 namespace gles2
     56 {
     57 namespace Performance
     58 {
     59 
     60 static inline std::map<string, string> singleMap (const string& key, const string& value)
     61 {
     62 	std::map<string, string> res;
     63 	res[key] = value;
     64 	return res;
     65 }
     66 
     67 static inline string repeat (const string& str, int numRepeats, const string& delim = "")
     68 {
     69 	string result = str;
     70 	for (int i = 1; i < numRepeats; i++)
     71 		result += delim + str;
     72 	return result;
     73 }
     74 
     75 static inline string repeatIndexedTemplate (const string& strTempl, int numRepeats, const string& delim = "", int ndxStart = 0)
     76 {
     77 	const tcu::StringTemplate	templ(strTempl);
     78 	string						result;
     79 	std::map<string, string>	params;
     80 
     81 	for (int i = 0; i < numRepeats; i++)
     82 	{
     83 		params["PREV_NDX"]	= toString(i + ndxStart - 1);
     84 		params["NDX"]		= toString(i + ndxStart);
     85 
     86 		result += (i > 0 ? delim : "") + templ.specialize(params);
     87 	}
     88 
     89 	return result;
     90 }
     91 
     92 namespace
     93 {
     94 
     95 enum CaseShaderType
     96 {
     97 	CASESHADERTYPE_VERTEX = 0,
     98 	CASESHADERTYPE_FRAGMENT,
     99 
    100 	CASESHADERTYPE_LAST
    101 };
    102 
    103 static inline string getShaderPrecision (CaseShaderType shaderType)
    104 {
    105 	switch (shaderType)
    106 	{
    107 		case CASESHADERTYPE_VERTEX:		return "highp";
    108 		case CASESHADERTYPE_FRAGMENT:	return "mediump";
    109 		default:
    110 			DE_ASSERT(false);
    111 			return DE_NULL;
    112 	}
    113 }
    114 
    115 struct ProgramData
    116 {
    117 	glu::ProgramSources			sources;
    118 	vector<gls::AttribSpec>		attributes; //!< \note Shouldn't contain a_position; that one is set by gls::ShaderPerformanceMeasurer.
    119 
    120 	ProgramData (void) {}
    121 	ProgramData (const glu::ProgramSources& sources_, const vector<gls::AttribSpec>& attributes_ = vector<gls::AttribSpec>())	: sources(sources_), attributes(attributes_)	{}
    122 	ProgramData (const glu::ProgramSources& sources_, const gls::AttribSpec& attribute)											: sources(sources_), attributes(1, attribute)	{}
    123 };
    124 
    125 //! Shader boilerplate helper; most cases have similar basic shader structure.
    126 static inline ProgramData defaultProgramData (CaseShaderType shaderType, const string& funcDefs, const string& mainStatements)
    127 {
    128 	const bool		isVertexCase	= shaderType == CASESHADERTYPE_VERTEX;
    129 	const bool		isFragmentCase	= shaderType == CASESHADERTYPE_FRAGMENT;
    130 	const string	vtxPrec			= getShaderPrecision(CASESHADERTYPE_VERTEX);
    131 	const string	fragPrec		= getShaderPrecision(CASESHADERTYPE_FRAGMENT);
    132 
    133 	return ProgramData(glu::ProgramSources() << glu::VertexSource(		"attribute " + vtxPrec + " vec4 a_position;\n"
    134 																		"attribute " + vtxPrec + " vec4 a_value;\n"
    135 																		"varying " + fragPrec + " vec4 v_value;\n"
    136 																		+ (isVertexCase ? funcDefs : "") +
    137 																		"void main (void)\n"
    138 																		"{\n"
    139 																		"	gl_Position = a_position;\n"
    140 																		"	" + vtxPrec + " vec4 value = a_value;\n"
    141 																		+ (isVertexCase ? mainStatements : "") +
    142 																		"	v_value = value;\n"
    143 																		"}\n")
    144 
    145 											 << glu::FragmentSource(	"varying " + fragPrec + " vec4 v_value;\n"
    146 																		+ (isFragmentCase ? funcDefs : "") +
    147 																		"void main (void)\n"
    148 																		"{\n"
    149 																		"	" + fragPrec + " vec4 value = v_value;\n"
    150 																		+ (isFragmentCase ? mainStatements : "") +
    151 																		"	gl_FragColor = value;\n"
    152 																		"}\n"),
    153 					  gls::AttribSpec("a_value",
    154 									  Vec4(1.0f, 0.0f, 0.0f, 0.0f),
    155 									  Vec4(0.0f, 1.0f, 0.0f, 0.0f),
    156 									  Vec4(0.0f, 0.0f, 1.0f, 0.0f),
    157 									  Vec4(0.0f, 0.0f, 0.0f, 1.0f)));
    158 }
    159 
    160 static inline ProgramData defaultProgramData (CaseShaderType shaderType, const string& mainStatements)
    161 {
    162 	return defaultProgramData(shaderType, "", mainStatements);
    163 }
    164 
    165 class ShaderOptimizationCase : public TestCase
    166 {
    167 public:
    168 	ShaderOptimizationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType)
    169 		: TestCase				(context, tcu::NODETYPE_PERFORMANCE, name, description)
    170 		, m_caseShaderType		(caseShaderType)
    171 		, m_state				(STATE_LAST)
    172 		, m_measurer			(context.getRenderContext(), caseShaderType == CASESHADERTYPE_VERTEX	? gls::CASETYPE_VERTEX
    173 														   : caseShaderType == CASESHADERTYPE_FRAGMENT	? gls::CASETYPE_FRAGMENT
    174 														   : gls::CASETYPE_LAST)
    175 		, m_unoptimizedResult	(-1.0f, -1.0f)
    176 		, m_optimizedResult		(-1.0f, -1.0f)
    177 	{
    178 	}
    179 
    180 	virtual ~ShaderOptimizationCase (void) {}
    181 
    182 	void			init		(void);
    183 	IterateResult	iterate		(void);
    184 
    185 protected:
    186 	virtual ProgramData		generateProgramData (bool optimized) const = 0;
    187 
    188 	const CaseShaderType	m_caseShaderType;
    189 
    190 private:
    191 	enum State
    192 	{
    193 		STATE_INIT_UNOPTIMIZED = 0,
    194 		STATE_MEASURE_UNOPTIMIZED,
    195 		STATE_INIT_OPTIMIZED,
    196 		STATE_MEASURE_OPTIMIZED,
    197 		STATE_FINISHED,
    198 
    199 		STATE_LAST
    200 	};
    201 
    202 	ProgramData&						programData		(bool optimized) { return optimized ? m_optimizedData		: m_unoptimizedData;		}
    203 	SharedPtr<const ShaderProgram>&		program			(bool optimized) { return optimized ? m_optimizedProgram	: m_unoptimizedProgram;		}
    204 	ShaderPerformanceMeasurer::Result&	result			(bool optimized) { return optimized ? m_optimizedResult		: m_unoptimizedResult;		}
    205 
    206 	State								m_state;
    207 	ShaderPerformanceMeasurer			m_measurer;
    208 
    209 	ProgramData							m_unoptimizedData;
    210 	ProgramData							m_optimizedData;
    211 	SharedPtr<const ShaderProgram>		m_unoptimizedProgram;
    212 	SharedPtr<const ShaderProgram>		m_optimizedProgram;
    213 	ShaderPerformanceMeasurer::Result	m_unoptimizedResult;
    214 	ShaderPerformanceMeasurer::Result	m_optimizedResult;
    215 };
    216 
    217 void ShaderOptimizationCase::init (void)
    218 {
    219 	const glu::RenderContext&	renderCtx	= m_context.getRenderContext();
    220 	TestLog&					log			= m_testCtx.getLog();
    221 
    222 	m_measurer.logParameters(log);
    223 
    224 	for (int ndx = 0; ndx < 2; ndx++)
    225 	{
    226 		const bool optimized = ndx == 1;
    227 
    228 		programData(optimized) = generateProgramData(optimized);
    229 
    230 		for (int i = 0; i < (int)programData(optimized).attributes.size(); i++)
    231 			DE_ASSERT(programData(optimized).attributes[i].name != "a_position"); // \note Position attribute is set by m_measurer.
    232 
    233 		program(optimized) = SharedPtr<const ShaderProgram>(new ShaderProgram(renderCtx, programData(optimized).sources));
    234 
    235 		{
    236 			const tcu::ScopedLogSection section(log, optimized ? "OptimizedProgram"			: "UnoptimizedProgram",
    237 													 optimized ? "Hand-optimized program"	: "Unoptimized program");
    238 			log << *program(optimized);
    239 		}
    240 
    241 		if (!program(optimized)->isOk())
    242 			TCU_FAIL("Shader compilation failed");
    243 	}
    244 
    245 	m_state = STATE_INIT_UNOPTIMIZED;
    246 }
    247 
    248 ShaderOptimizationCase::IterateResult ShaderOptimizationCase::iterate (void)
    249 {
    250 	TestLog& log = m_testCtx.getLog();
    251 
    252 	if (m_state == STATE_INIT_UNOPTIMIZED || m_state == STATE_INIT_OPTIMIZED)
    253 	{
    254 		const bool optimized = m_state == STATE_INIT_OPTIMIZED;
    255 		m_measurer.init(program(optimized)->getProgram(), programData(optimized).attributes, 1);
    256 		m_state = optimized ? STATE_MEASURE_OPTIMIZED : STATE_MEASURE_UNOPTIMIZED;
    257 
    258 		return CONTINUE;
    259 	}
    260 	else if (m_state == STATE_MEASURE_UNOPTIMIZED || m_state == STATE_MEASURE_OPTIMIZED)
    261 	{
    262 		m_measurer.iterate();
    263 
    264 		if (m_measurer.isFinished())
    265 		{
    266 			const bool						optimized	= m_state == STATE_MEASURE_OPTIMIZED;
    267 			const tcu::ScopedLogSection		section		(log, optimized ? "OptimizedResult"									: "UnoptimizedResult",
    268 															  optimized ? "Measurement results for hand-optimized program"	: "Measurement result for unoptimized program");
    269 			m_measurer.logMeasurementInfo(log);
    270 			result(optimized) = m_measurer.getResult();
    271 			m_measurer.deinit();
    272 			m_state = optimized ? STATE_FINISHED : STATE_INIT_OPTIMIZED;
    273 		}
    274 
    275 		return CONTINUE;
    276 	}
    277 	else
    278 	{
    279 		DE_ASSERT(m_state == STATE_FINISHED);
    280 
    281 		const float			unoptimizedRelevantResult	= m_caseShaderType == CASESHADERTYPE_VERTEX ? m_unoptimizedResult.megaVertPerSec	: m_unoptimizedResult.megaFragPerSec;
    282 		const float			optimizedRelevantResult		= m_caseShaderType == CASESHADERTYPE_VERTEX ? m_optimizedResult.megaVertPerSec		: m_optimizedResult.megaFragPerSec;
    283 		const char* const	relevantResultName			= m_caseShaderType == CASESHADERTYPE_VERTEX ? "vertex"								: "fragment";
    284 		const float			ratio						= unoptimizedRelevantResult / optimizedRelevantResult;
    285 		const int			handOptimizationGain		= (int)deFloatRound(100.0f/ratio) - 100;
    286 
    287 		log << TestLog::Message << "Unoptimized / optimized " << relevantResultName << " performance ratio: " << ratio << TestLog::EndMessage;
    288 
    289 		if (handOptimizationGain >= 0)
    290 			log << TestLog::Message << "Note: " << handOptimizationGain << "% performance gain was achieved with hand-optimized version" << TestLog::EndMessage;
    291 		else
    292 			log << TestLog::Message << "Note: hand-optimization degraded performance by " << -handOptimizationGain << "%" << TestLog::EndMessage;
    293 
    294 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(ratio, 2).c_str());
    295 
    296 		return STOP;
    297 	}
    298 }
    299 
    300 class LoopUnrollCase : public ShaderOptimizationCase
    301 {
    302 public:
    303 	enum CaseType
    304 	{
    305 		CASETYPE_INDEPENDENT = 0,
    306 		CASETYPE_DEPENDENT,
    307 
    308 		CASETYPE_LAST
    309 	};
    310 
    311 	LoopUnrollCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType, int numRepetitions)
    312 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
    313 		, m_numRepetitions			(numRepetitions)
    314 		, m_caseType				(caseType)
    315 	{
    316 	}
    317 
    318 protected:
    319 	ProgramData generateProgramData (bool optimized) const
    320 	{
    321 		const string repetition = optimized ? repeatIndexedTemplate("\t" + expressionTemplate(m_caseType) + ";\n", m_numRepetitions)
    322 											: loop(m_numRepetitions, expressionTemplate(m_caseType));
    323 
    324 		return defaultProgramData(m_caseShaderType, "\t" + getShaderPrecision(m_caseShaderType) + " vec4 valueOrig = value;\n" + repetition);
    325 	}
    326 
    327 private:
    328 	const int		m_numRepetitions;
    329 	const CaseType	m_caseType;
    330 
    331 	static inline string expressionTemplate (CaseType caseType)
    332 	{
    333 		switch (caseType)
    334 		{
    335 			case CASETYPE_INDEPENDENT:	return "value += sin(float(${NDX}+1)*valueOrig)";
    336 			case CASETYPE_DEPENDENT:	return "value = sin(value)";
    337 			default:
    338 				DE_ASSERT(false);
    339 				return DE_NULL;
    340 		}
    341 	}
    342 
    343 	static inline string loop (int iterations, const string& innerExpr)
    344 	{
    345 		return "\tfor (int i = 0; i < " + toString(iterations) + "; i++)\n\t\t" + tcu::StringTemplate(innerExpr).specialize(singleMap("NDX", "i")) + ";\n";
    346 	}
    347 };
    348 
    349 class LoopInvariantCodeMotionCase : public ShaderOptimizationCase
    350 {
    351 public:
    352 	LoopInvariantCodeMotionCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, int numLoopIterations)
    353 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
    354 		, m_numLoopIterations		(numLoopIterations)
    355 	{
    356 	}
    357 
    358 protected:
    359 	ProgramData generateProgramData (bool optimized) const
    360 	{
    361 		float scale = 0.0f;
    362 		for (int i = 0; i < m_numLoopIterations; i++)
    363 			scale += 3.2f*(float)i + 4.6f;
    364 		scale = 1.0f / scale;
    365 
    366 		const string precision		= getShaderPrecision(m_caseShaderType);
    367 		const string statements		= optimized ?	"	" + precision + " vec4 valueOrig = value;\n"
    368 													"	" + precision + " vec4 y = sin(cos(sin(valueOrig)));\n"
    369 													"	for (int i = 0; i < " + toString(m_numLoopIterations) + "; i++)\n"
    370 													"	{\n"
    371 													"		" + precision + " float x = 3.2*float(i) + 4.6;\n"
    372 													"		value += x*y;\n"
    373 													"	}\n"
    374 													"	value *= " + toString(scale) + ";\n"
    375 
    376 												:	"	" + precision + " vec4 valueOrig = value;\n"
    377 													"	for (int i = 0; i < " + toString(m_numLoopIterations) + "; i++)\n"
    378 													"	{\n"
    379 													"		" + precision + " float x = 3.2*float(i) + 4.6;\n"
    380 													"		" + precision + " vec4 y = sin(cos(sin(valueOrig)));\n"
    381 													"		value += x*y;\n"
    382 													"	}\n"
    383 													"	value *= " + toString(scale) + ";\n";
    384 
    385 		return defaultProgramData(m_caseShaderType, statements);
    386 	}
    387 
    388 private:
    389 	const int m_numLoopIterations;
    390 };
    391 
    392 class FunctionInliningCase : public ShaderOptimizationCase
    393 {
    394 public:
    395 	FunctionInliningCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, int callNestingDepth)
    396 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
    397 		, m_callNestingDepth		(callNestingDepth)
    398 	{
    399 	}
    400 
    401 protected:
    402 	ProgramData generateProgramData (bool optimized) const
    403 	{
    404 		const string precision				= getShaderPrecision(m_caseShaderType);
    405 		const string expression				= "value*vec4(0.8, 0.7, 0.6, 0.9)";
    406 		const string maybeFuncDefs			= optimized ? "" : funcDefinitions(m_callNestingDepth, precision, expression);
    407 		const string mainValueStatement		= (optimized ? "\tvalue = " + expression : "\tvalue = func" + toString(m_callNestingDepth-1) + "(value)") + ";\n";
    408 
    409 		return defaultProgramData(m_caseShaderType, maybeFuncDefs, mainValueStatement);
    410 	}
    411 
    412 private:
    413 	const int m_callNestingDepth;
    414 
    415 	static inline string funcDefinitions (int callNestingDepth, const string& precision, const string& expression)
    416 	{
    417 		string result = precision + " vec4 func0 (" + precision + " vec4 value) { return " + expression + "; }\n";
    418 
    419 		for (int i = 1; i < callNestingDepth; i++)
    420 			result += precision + " vec4 func" + toString(i) + " (" + precision + " vec4 v) { return func" + toString(i-1) + "(v); }\n";
    421 
    422 		return result;
    423 	}
    424 };
    425 
    426 class ConstantPropagationCase : public ShaderOptimizationCase
    427 {
    428 public:
    429 	enum CaseType
    430 	{
    431 		CASETYPE_BUILT_IN_FUNCTIONS = 0,
    432 		CASETYPE_ARRAY,
    433 		CASETYPE_STRUCT,
    434 
    435 		CASETYPE_LAST
    436 	};
    437 
    438 	ConstantPropagationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType, bool useConstantExpressionsOnly)
    439 		: ShaderOptimizationCase			(context, name, description, caseShaderType)
    440 		, m_caseType						(caseType)
    441 		, m_useConstantExpressionsOnly		(useConstantExpressionsOnly)
    442 	{
    443 		DE_ASSERT(!(m_caseType == CASETYPE_ARRAY && m_useConstantExpressionsOnly)); // \note Would need array constructors, which GLSL ES 1 doesn't have.
    444 	}
    445 
    446 protected:
    447 	ProgramData generateProgramData (bool optimized) const
    448 	{
    449 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
    450 		const string	precision		= getShaderPrecision(m_caseShaderType);
    451 		const string	statements		= m_caseType == CASETYPE_BUILT_IN_FUNCTIONS		? builtinFunctionsCaseStatements	(optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
    452 										: m_caseType == CASETYPE_ARRAY					? arrayCaseStatements				(optimized, precision, isVertexCase)
    453 										: m_caseType == CASETYPE_STRUCT					? structCaseStatements				(optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
    454 										: DE_NULL;
    455 
    456 		return defaultProgramData(m_caseShaderType, statements);
    457 	}
    458 
    459 private:
    460 	const CaseType	m_caseType;
    461 	const bool		m_useConstantExpressionsOnly;
    462 
    463 	static inline string builtinFunctionsCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
    464 	{
    465 		const string	constMaybe = constantExpressionsOnly ? "const " : "";
    466 		const int		numSinRows = useHeavierWorkload ? 12 : 1;
    467 
    468 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
    469 
    470 						 :	"	" + constMaybe + precision + " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
    471 							"	" + constMaybe + precision + " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
    472 							"	" + constMaybe + "bvec4 c = bvec4(true, false, true, true);\n"
    473 							"	" + constMaybe + precision + " vec4 d = exp(b + vec4(c));\n"
    474 							"	" + constMaybe + precision + " vec4 e0 = inversesqrt(mix(d+a, d+b, a));\n"
    475 							+ repeatIndexedTemplate("	" + constMaybe + precision + " vec4 e${NDX} = sin(sin(sin(sin(e${PREV_NDX}))));\n", numSinRows, "", 1) +
    476 							"	" + constMaybe + precision + " vec4 f = abs(e" + toString(numSinRows) + ");\n" +
    477 							"	value = f*value;\n";
    478 	}
    479 
    480 	static inline string arrayCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    481 	{
    482 		const int numSinRows = useHeavierWorkload ? 12 : 1;
    483 
    484 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
    485 
    486 						 :	"	const int arrLen = 4;\n"
    487 							"	" + precision + " vec4 arr[arrLen];\n"
    488 							"	arr[0] = vec4(0.1, 0.5, 0.9, 1.3);\n"
    489 							"	arr[1] = vec4(0.2, 0.6, 1.0, 1.4);\n"
    490 							"	arr[2] = vec4(0.3, 0.7, 1.1, 1.5);\n"
    491 							"	arr[3] = vec4(0.4, 0.8, 1.2, 1.6);\n"
    492 							"	" + precision + " vec4 a = (arr[0] + arr[1] + arr[2] + arr[3]) * 0.25;\n"
    493 							"	" + precision + " vec4 b0 = cos(sin(a));\n"
    494 							+ repeatIndexedTemplate("	" + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n", numSinRows, "", 1) +
    495 							"	" + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
    496 							"	value = c*value;\n";
    497 	}
    498 
    499 	static inline string structCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
    500 	{
    501 		const string	constMaybe = constantExpressionsOnly ? "const " : "";
    502 		const int		numSinRows = useHeavierWorkload ? 12 : 1;
    503 
    504 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
    505 
    506 						 :	"	struct S\n"
    507 							"	{\n"
    508 							"		" + precision + " vec4 a;\n"
    509 							"		" + precision + " vec4 b;\n"
    510 							"		" + precision + " vec4 c;\n"
    511 							"		" + precision + " vec4 d;\n"
    512 							"	};\n"
    513 							"\n"
    514 							"	" + constMaybe + "S s =\n"
    515 							"		S(vec4(0.1, 0.5, 0.9, 1.3),\n"
    516 							"		  vec4(0.2, 0.6, 1.0, 1.4),\n"
    517 							"		  vec4(0.3, 0.7, 1.1, 1.5),\n"
    518 							"		  vec4(0.4, 0.8, 1.2, 1.6));\n"
    519 							"	" + constMaybe + precision + " vec4 a = (s.a + s.b + s.c + s.d) * 0.25;\n"
    520 							"	" + constMaybe + precision + " vec4 b0 = cos(sin(a));\n"
    521 							+ repeatIndexedTemplate("	" + constMaybe + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n", numSinRows, "", 1) +
    522 							"	" + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
    523 							"	value = c*value;\n";
    524 	}
    525 };
    526 
    527 class CommonSubexpressionCase : public ShaderOptimizationCase
    528 {
    529 public:
    530 	enum CaseType
    531 	{
    532 		CASETYPE_SINGLE_STATEMENT = 0,
    533 		CASETYPE_MULTIPLE_STATEMENTS,
    534 		CASETYPE_STATIC_BRANCH,
    535 		CASETYPE_LOOP,
    536 
    537 		CASETYPE_LAST
    538 	};
    539 
    540 	CommonSubexpressionCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType)
    541 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
    542 		, m_caseType				(caseType)
    543 	{
    544 	}
    545 
    546 protected:
    547 	ProgramData generateProgramData (bool optimized) const
    548 	{
    549 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
    550 		const string	precision		= getShaderPrecision(m_caseShaderType);
    551 		const string	statements		= m_caseType == CASETYPE_SINGLE_STATEMENT		? singleStatementCaseStatements		(optimized, precision, isVertexCase)
    552 										: m_caseType == CASETYPE_MULTIPLE_STATEMENTS	? multipleStatementsCaseStatements	(optimized, precision, isVertexCase)
    553 										: m_caseType == CASETYPE_STATIC_BRANCH			? staticBranchCaseStatements		(optimized, precision, isVertexCase)
    554 										: m_caseType == CASETYPE_LOOP					? loopCaseStatements				(optimized, precision, isVertexCase)
    555 										: DE_NULL;
    556 
    557 		return defaultProgramData(m_caseShaderType, statements);
    558 	}
    559 
    560 private:
    561 	const CaseType m_caseType;
    562 
    563 	static inline string singleStatementCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    564 	{
    565 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 1;
    566 
    567 		return optimized ?	"	" + precision + " vec4 s = sin(value);\n"
    568 							"	" + precision + " vec4 cs = cos(s);\n"
    569 							"	" + precision + " vec4 d = fract(s + cs) + sqrt(s + exp(cs));\n"
    570 							"	value = " + repeat("d", numTopLevelRepeats, "+") + ";\n"
    571 
    572 						 :	"	value = " + repeat("fract(sin(value) + cos(sin(value))) + sqrt(sin(value) + exp(cos(sin(value))))", numTopLevelRepeats, "\n\t      + ") + ";\n";
    573 	}
    574 
    575 	static inline string multipleStatementsCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    576 	{
    577 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
    578 		DE_ASSERT(numTopLevelRepeats >= 2);
    579 
    580 		return optimized ?	"	" + precision + " vec4 a = sin(value) + cos(exp(value));\n"
    581 							"	" + precision + " vec4 b = cos(cos(a));\n"
    582 							"	a = fract(exp(sqrt(b)));\n"
    583 							"\n"
    584 							+ repeat("\tvalue += a*b;\n", numTopLevelRepeats)
    585 
    586 						 :	repeatIndexedTemplate(	"	" + precision + " vec4 a${NDX} = sin(value) + cos(exp(value));\n"
    587 													"	" + precision + " vec4 b${NDX} = cos(cos(a${NDX}));\n"
    588 													"	a${NDX} = fract(exp(sqrt(b${NDX})));\n"
    589 													"\n",
    590 													numTopLevelRepeats) +
    591 
    592 							repeatIndexedTemplate(	"	value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
    593 	}
    594 
    595 	static inline string staticBranchCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    596 	{
    597 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
    598 		DE_ASSERT(numTopLevelRepeats >= 2);
    599 
    600 		if (optimized)
    601 		{
    602 			return "	" + precision + " vec4 a = sin(value) + cos(exp(value));\n"
    603 				   "	" + precision + " vec4 b = cos(a);\n"
    604 				   "	b = cos(b);\n"
    605 				   "	a = fract(exp(sqrt(b)));\n"
    606 				   "\n"
    607 				   + repeat("	value += a*b;\n", numTopLevelRepeats);
    608 		}
    609 		else
    610 		{
    611 			string result;
    612 
    613 			for (int i = 0; i < numTopLevelRepeats; i++)
    614 			{
    615 				result +=	"	" + precision + " vec4 a" + toString(i) + " = sin(value) + cos(exp(value));\n"
    616 							"	" + precision + " vec4 b" + toString(i) + " = cos(a" + toString(i) + ");\n";
    617 
    618 				if (i % 3 == 0)
    619 					result +=	"	if (1 < 2)\n"
    620 								"		b" + toString(i) + " = cos(b" + toString(i) + ");\n";
    621 				else if (i % 3 == 1)
    622 					result +=	"	b" + toString(i) + " = cos(b" + toString(i) + ");\n";
    623 				else if (i % 3 == 2)
    624 					result +=	"	if (2 < 1);\n"
    625 								"	else\n"
    626 								"		b" + toString(i) + " = cos(b" + toString(i) + ");\n";
    627 				else
    628 					DE_ASSERT(false);
    629 
    630 				result +=	"	a" + toString(i) + " = fract(exp(sqrt(b" + toString(i) + ")));\n\n";
    631 			}
    632 
    633 			result += repeatIndexedTemplate("	value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
    634 
    635 			return result;
    636 		}
    637 	}
    638 
    639 	static inline string loopCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    640 	{
    641 		const int numLoopIterations = useHeavierWorkload ? 32 : 4;
    642 
    643 		return optimized ?	"	" + precision + " vec4 acc = value;\n"
    644 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
    645 							"		acc = sin(acc);\n"
    646 							"\n"
    647 							"	value += acc;\n"
    648 							"	value += acc;\n"
    649 
    650 						 :	"	" + precision + " vec4 acc0 = value;\n"
    651 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
    652 							"		acc0 = sin(acc0);\n"
    653 							"\n"
    654 							"	" + precision + " vec4 acc1 = value;\n"
    655 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
    656 							"		acc1 = sin(acc1);\n"
    657 							"\n"
    658 							"	value += acc0;\n"
    659 							"	value += acc1;\n";
    660 	}
    661 };
    662 
    663 class DeadCodeEliminationCase : public ShaderOptimizationCase
    664 {
    665 public:
    666 	enum CaseType
    667 	{
    668 		CASETYPE_DEAD_BRANCH_SIMPLE = 0,
    669 		CASETYPE_DEAD_BRANCH_COMPLEX,
    670 		CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST,
    671 		CASETYPE_DEAD_BRANCH_FUNC_CALL,
    672 		CASETYPE_UNUSED_VALUE_BASIC,
    673 		CASETYPE_UNUSED_VALUE_LOOP,
    674 		CASETYPE_UNUSED_VALUE_DEAD_BRANCH,
    675 		CASETYPE_UNUSED_VALUE_AFTER_RETURN,
    676 		CASETYPE_UNUSED_VALUE_MUL_ZERO,
    677 
    678 		CASETYPE_LAST
    679 	};
    680 
    681 	DeadCodeEliminationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType)
    682 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
    683 		, m_caseType				(caseType)
    684 	{
    685 	}
    686 
    687 protected:
    688 	ProgramData generateProgramData (bool optimized) const
    689 	{
    690 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
    691 		const string	precision		= getShaderPrecision(m_caseShaderType);
    692 		const string	funcDefs		= m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL		? deadBranchFuncCallCaseFuncDefs		(optimized, precision)
    693 										: m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN	? unusedValueAfterReturnCaseFuncDefs	(optimized, precision, isVertexCase)
    694 										: "";
    695 
    696 		const string	statements		= m_caseType == CASETYPE_DEAD_BRANCH_SIMPLE				? deadBranchSimpleCaseStatements			(optimized, isVertexCase)
    697 										: m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX			? deadBranchComplexCaseStatements			(optimized, precision, true,	isVertexCase)
    698 										: m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? deadBranchComplexCaseStatements			(optimized, precision, false,	isVertexCase)
    699 										: m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL			? deadBranchFuncCallCaseStatements			(optimized, isVertexCase)
    700 										: m_caseType == CASETYPE_UNUSED_VALUE_BASIC				? unusedValueBasicCaseStatements			(optimized, precision, isVertexCase)
    701 										: m_caseType == CASETYPE_UNUSED_VALUE_LOOP				? unusedValueLoopCaseStatements				(optimized, precision, isVertexCase)
    702 										: m_caseType == CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? unusedValueDeadBranchCaseStatements		(optimized, precision, isVertexCase)
    703 										: m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN		? unusedValueAfterReturnCaseStatements		()
    704 										: m_caseType == CASETYPE_UNUSED_VALUE_MUL_ZERO			? unusedValueMulZeroCaseStatements			(optimized, precision, isVertexCase)
    705 										: DE_NULL;
    706 
    707 		return defaultProgramData(m_caseShaderType, funcDefs, statements);
    708 	}
    709 
    710 private:
    711 	const CaseType m_caseType;
    712 
    713 	static inline string deadBranchSimpleCaseStatements (bool optimized, bool useHeavierWorkload)
    714 	{
    715 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
    716 
    717 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    718 
    719 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    720 							"	if (2 < 1)\n"
    721 							"	{\n"
    722 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
    723 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
    724 							"			value = sin(value);\n"
    725 							"	}\n";
    726 	}
    727 
    728 	static inline string deadBranchComplexCaseStatements (bool optimized, const string& precision, bool useConst, bool useHeavierWorkload)
    729 	{
    730 		const string	constMaybe			= useConst ? "const " : "";
    731 		const int		numLoopIterations	= useHeavierWorkload ? 16 : 4;
    732 
    733 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    734 
    735 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    736 							"	" + constMaybe + precision + " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
    737 							"	" + constMaybe + precision + " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
    738 							"	" + constMaybe + "bvec4 c = bvec4(true, false, true, true);\n"
    739 							"	" + constMaybe + precision + " vec4 d = exp(b + vec4(c));\n"
    740 							"	" + constMaybe + precision + " vec4 e = 1.8*abs(sin(sin(inversesqrt(mix(d+a, d+b, a)))));\n"
    741 							"	if (e.x > 1.0)\n"
    742 							"	{\n"
    743 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
    744 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
    745 							"			value = sin(value);\n"
    746 							"	}\n";
    747 	}
    748 
    749 	static inline string deadBranchFuncCallCaseFuncDefs (bool optimized, const string& precision)
    750 	{
    751 		return optimized ? "" : precision + " float func (" + precision + " float x) { return 2.0*x; }\n";
    752 	}
    753 
    754 	static inline string deadBranchFuncCallCaseStatements (bool optimized, bool useHeavierWorkload)
    755 	{
    756 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
    757 
    758 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    759 
    760 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    761 							"	if (func(0.3) > 1.0)\n"
    762 							"	{\n"
    763 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
    764 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
    765 							"			value = sin(value);\n"
    766 							"	}\n";
    767 	}
    768 
    769 	static inline string unusedValueBasicCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    770 	{
    771 		const int numSinRows = useHeavierWorkload ? 12 : 1;
    772 
    773 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    774 							"	value = used;\n"
    775 
    776 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    777 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value))) + used;\n"
    778 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
    779 							"	value = used;\n";
    780 	}
    781 
    782 	static inline string unusedValueLoopCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    783 	{
    784 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
    785 
    786 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    787 							"	value = used;\n"
    788 
    789 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    790 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
    791 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
    792 							"		unused = sin(unused + used);\n"
    793 							"	value = used;\n";
    794 	}
    795 
    796 	static inline string unusedValueAfterReturnCaseFuncDefs (bool optimized, const string& precision, bool useHeavierWorkload)
    797 	{
    798 		const int numSinRows = useHeavierWorkload ? 12 : 1;
    799 
    800 		return optimized ?	precision + " vec4 func (" + precision + " vec4 v)\n"
    801 							"{\n"
    802 							"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
    803 							"	return used;\n"
    804 							"}\n"
    805 
    806 						 :	precision + " vec4 func (" + precision + " vec4 v)\n"
    807 							"{\n"
    808 							"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
    809 							"	" + precision + " vec4 unused = cos(exp(sin(v))*log(sqrt(v)));\n"
    810 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
    811 							"	return used;\n"
    812 							"	used = used*unused;"
    813 							"	return used;\n"
    814 							"}\n";
    815 	}
    816 
    817 	static inline string unusedValueAfterReturnCaseStatements (void)
    818 	{
    819 		return "	value = func(value);\n";
    820 	}
    821 
    822 	static inline string unusedValueDeadBranchCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    823 	{
    824 		const int numSinRows = useHeavierWorkload ? 12 : 1;
    825 
    826 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    827 							"	value = used;\n"
    828 
    829 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    830 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
    831 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
    832 							"	if (2 < 1)\n"
    833 							"		used = used*unused;\n"
    834 							"	value = used;\n";
    835 	}
    836 
    837 	static inline string unusedValueMulZeroCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
    838 	{
    839 		const int numSinRows = useHeavierWorkload ? 12 : 1;
    840 
    841 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    842 							"	value = used;\n"
    843 
    844 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
    845 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
    846 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
    847 							"	value = used + unused*float(1-1);\n";
    848 	}
    849 };
    850 
    851 } // anonymous
    852 
    853 ShaderOptimizationTests::ShaderOptimizationTests (Context& context)
    854 	: TestCaseGroup(context, "optimization", "Shader Optimization Performance Tests")
    855 {
    856 }
    857 
    858 ShaderOptimizationTests::~ShaderOptimizationTests (void)
    859 {
    860 }
    861 
    862 void ShaderOptimizationTests::init (void)
    863 {
    864 	TestCaseGroup* const unrollGroup					= new TestCaseGroup(m_context, "loop_unrolling",					"Loop Unrolling Cases");
    865 	TestCaseGroup* const loopInvariantCodeMotionGroup	= new TestCaseGroup(m_context, "loop_invariant_code_motion",		"Loop-Invariant Code Motion Cases");
    866 	TestCaseGroup* const inlineGroup					= new TestCaseGroup(m_context, "function_inlining",					"Function Inlining Cases");
    867 	TestCaseGroup* const constantPropagationGroup		= new TestCaseGroup(m_context, "constant_propagation",				"Constant Propagation Cases");
    868 	TestCaseGroup* const commonSubexpressionGroup		= new TestCaseGroup(m_context, "common_subexpression_elimination",	"Common Subexpression Elimination Cases");
    869 	TestCaseGroup* const deadCodeEliminationGroup		= new TestCaseGroup(m_context, "dead_code_elimination",				"Dead Code Elimination Cases");
    870 	addChild(unrollGroup);
    871 	addChild(loopInvariantCodeMotionGroup);
    872 	addChild(inlineGroup);
    873 	addChild(constantPropagationGroup);
    874 	addChild(commonSubexpressionGroup);
    875 	addChild(deadCodeEliminationGroup);
    876 
    877 	for (int caseShaderTypeI = 0; caseShaderTypeI < CASESHADERTYPE_LAST; caseShaderTypeI++)
    878 	{
    879 		const CaseShaderType	caseShaderType			= (CaseShaderType)caseShaderTypeI;
    880 		const char* const		caseShaderTypeSuffix	= caseShaderType == CASESHADERTYPE_VERTEX		? "_vertex"
    881 														: caseShaderType == CASESHADERTYPE_FRAGMENT		? "_fragment"
    882 														: DE_NULL;
    883 
    884 		// Loop unrolling cases.
    885 
    886 		{
    887 			static const int loopIterationCounts[] = { 4, 8, 32 };
    888 
    889 			for (int caseTypeI = 0; caseTypeI < LoopUnrollCase::CASETYPE_LAST; caseTypeI++)
    890 			{
    891 				const LoopUnrollCase::CaseType	caseType		= (LoopUnrollCase::CaseType)caseTypeI;
    892 				const string					caseTypeName	= caseType == LoopUnrollCase::CASETYPE_INDEPENDENT	? "independent_iterations"
    893 																: caseType == LoopUnrollCase::CASETYPE_DEPENDENT	? "dependent_iterations"
    894 																: DE_NULL;
    895 				const string					caseTypeDesc	= caseType == LoopUnrollCase::CASETYPE_INDEPENDENT	? "loop iterations don't depend on each other"
    896 																: caseType == LoopUnrollCase::CASETYPE_DEPENDENT	? "loop iterations depend on each other"
    897 																: DE_NULL;
    898 
    899 				for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
    900 				{
    901 					const int			loopIterations	= loopIterationCounts[loopIterNdx];
    902 					const string		name			= caseTypeName + "_" + toString(loopIterations) + caseShaderTypeSuffix;
    903 					const string		description		= toString(loopIterations) + " iterations; " + caseTypeDesc;
    904 
    905 					unrollGroup->addChild(new LoopUnrollCase(m_context, name.c_str(), description.c_str(), caseShaderType, caseType, loopIterations));
    906 				}
    907 			}
    908 		}
    909 
    910 		// Loop-invariant code motion cases.
    911 
    912 		{
    913 			static const int loopIterationCounts[] = { 4, 8, 32 };
    914 
    915 			for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
    916 			{
    917 				const int		loopIterations	= loopIterationCounts[loopIterNdx];
    918 				const string	name			= toString(loopIterations) + "_iterations" + caseShaderTypeSuffix;
    919 
    920 				loopInvariantCodeMotionGroup->addChild(new LoopInvariantCodeMotionCase(m_context, name.c_str(), "", caseShaderType, loopIterations));
    921 			}
    922 		}
    923 
    924 		// Function inlining cases.
    925 
    926 		{
    927 			static const int callNestingDepths[] = { 4, 8, 32 };
    928 
    929 			for (int nestDepthNdx = 0; nestDepthNdx < DE_LENGTH_OF_ARRAY(callNestingDepths); nestDepthNdx++)
    930 			{
    931 				const int		nestingDepth	= callNestingDepths[nestDepthNdx];
    932 				const string	name			= toString(nestingDepth) + "_nested" + caseShaderTypeSuffix;
    933 
    934 				inlineGroup->addChild(new FunctionInliningCase(m_context, name.c_str(), "", caseShaderType, nestingDepth));
    935 			}
    936 		}
    937 
    938 		// Constant propagation cases.
    939 
    940 		for (int caseTypeI = 0; caseTypeI < ConstantPropagationCase::CASETYPE_LAST; caseTypeI++)
    941 		{
    942 			const ConstantPropagationCase::CaseType		caseType		= (ConstantPropagationCase::CaseType)caseTypeI;
    943 			const string								caseTypeName	= caseType == ConstantPropagationCase::CASETYPE_BUILT_IN_FUNCTIONS		? "built_in_functions"
    944 																		: caseType == ConstantPropagationCase::CASETYPE_ARRAY					? "array"
    945 																		: caseType == ConstantPropagationCase::CASETYPE_STRUCT					? "struct"
    946 																		: DE_NULL;
    947 
    948 			for (int constantExpressionsOnlyI = 0; constantExpressionsOnlyI <= 1; constantExpressionsOnlyI++)
    949 			{
    950 				const bool		constantExpressionsOnly		= constantExpressionsOnlyI != 0;
    951 				const string	name						= caseTypeName + (constantExpressionsOnly ? "" : "_no_const") + caseShaderTypeSuffix;
    952 
    953 				if (caseType == ConstantPropagationCase::CASETYPE_ARRAY && constantExpressionsOnly) // \note See ConstantPropagationCase's constructor for explanation.
    954 					continue;
    955 
    956 				constantPropagationGroup->addChild(new ConstantPropagationCase(m_context, name.c_str(), "", caseShaderType, caseType, constantExpressionsOnly));
    957 			}
    958 		}
    959 
    960 		// Common subexpression cases.
    961 
    962 		for (int caseTypeI = 0; caseTypeI < CommonSubexpressionCase::CASETYPE_LAST; caseTypeI++)
    963 		{
    964 			const CommonSubexpressionCase::CaseType		caseType		= (CommonSubexpressionCase::CaseType)caseTypeI;
    965 
    966 			const string								caseTypeName	= caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT		? "single_statement"
    967 																		: caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS		? "multiple_statements"
    968 																		: caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH			? "static_branch"
    969 																		: caseType == CommonSubexpressionCase::CASETYPE_LOOP					? "loop"
    970 																		: DE_NULL;
    971 
    972 			const string								description		= caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT		? "A single statement containing multiple uses of same subexpression"
    973 																		: caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS		? "Multiple statements performing same computations"
    974 																		: caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH			? "Multiple statements including a static conditional"
    975 																		: caseType == CommonSubexpressionCase::CASETYPE_LOOP					? "Multiple loops performing the same computations"
    976 																		: DE_NULL;
    977 
    978 			commonSubexpressionGroup->addChild(new CommonSubexpressionCase(m_context, (caseTypeName + caseShaderTypeSuffix).c_str(), description.c_str(), caseShaderType, caseType));
    979 		}
    980 
    981 		// Dead code elimination cases.
    982 
    983 		for (int caseTypeI = 0; caseTypeI < DeadCodeEliminationCase::CASETYPE_LAST; caseTypeI++)
    984 		{
    985 			const DeadCodeEliminationCase::CaseType		caseType				= (DeadCodeEliminationCase::CaseType)caseTypeI;
    986 			const char* const							caseTypeName			= caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE				? "dead_branch_simple"
    987 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX				? "dead_branch_complex"
    988 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? "dead_branch_complex_no_const"
    989 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL			? "dead_branch_func_call"
    990 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC				? "unused_value_basic"
    991 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP				? "unused_value_loop"
    992 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? "unused_value_dead_branch"
    993 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN		? "unused_value_after_return"
    994 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO			? "unused_value_mul_zero"
    995 																				: DE_NULL;
    996 
    997 			const char* const							caseTypeDescription		= caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE				? "Do computation inside a branch that is never taken (condition is simple false constant expression)"
    998 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX				? "Do computation inside a branch that is never taken (condition is complex false constant expression)"
    999 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? "Do computation inside a branch that is never taken (condition is complex false expression, not constant expression but still compile-time computable)"
   1000 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL			? "Do computation inside a branch that is never taken (condition is compile-time computable false expression containing function call to a simple inlineable function)"
   1001 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC				? "Compute a value that is never used even statically"
   1002 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP				? "Compute a value, using a loop, that is never used even statically"
   1003 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? "Compute a value that is used only inside a statically dead branch"
   1004 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN		? "Compute a value that is used only after a return statement"
   1005 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO			? "Compute a value that is used but multiplied by a zero constant expression"
   1006 																				: DE_NULL;
   1007 
   1008 			deadCodeEliminationGroup->addChild(new DeadCodeEliminationCase(m_context, (string() + caseTypeName + caseShaderTypeSuffix).c_str(), caseTypeDescription, caseShaderType, caseType));
   1009 		}
   1010 	}
   1011 }
   1012 
   1013 } // Performance
   1014 } // gles2
   1015 } // deqp
   1016