Home | History | Annotate | Download | only in performance
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program OpenGL ES 3.0 Module
      3  * -------------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Shader operator performance tests.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "es3pShaderOperatorTests.hpp"
     25 #include "glsCalibration.hpp"
     26 #include "gluShaderUtil.hpp"
     27 #include "gluShaderProgram.hpp"
     28 #include "gluPixelTransfer.hpp"
     29 #include "tcuTestLog.hpp"
     30 #include "tcuRenderTarget.hpp"
     31 #include "tcuCommandLine.hpp"
     32 #include "tcuSurface.hpp"
     33 #include "deStringUtil.hpp"
     34 #include "deSharedPtr.hpp"
     35 #include "deClock.h"
     36 #include "deMath.h"
     37 
     38 #include "glwEnums.hpp"
     39 #include "glwFunctions.hpp"
     40 
     41 #include <map>
     42 #include <algorithm>
     43 #include <limits>
     44 #include <set>
     45 
     46 namespace deqp
     47 {
     48 namespace gles3
     49 {
     50 namespace Performance
     51 {
     52 
     53 using namespace gls;
     54 using namespace glu;
     55 using tcu::Vec2;
     56 using tcu::Vec4;
     57 using tcu::TestLog;
     58 using de::SharedPtr;
     59 
     60 using std::string;
     61 using std::vector;
     62 
     63 #define MEASUREMENT_FAIL() throw tcu::InternalError("Unable to get sensible measurements for estimation", DE_NULL, __FILE__, __LINE__)
     64 
     65 // Number of measurements in OperatorPerformanceCase for each workload size, unless specified otherwise by a command line argument.
     66 static const int	DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD	= 3;
     67 // How many different workload sizes are used by OperatorPerformanceCase.
     68 static const int	NUM_WORKLOADS							= 8;
     69 // Maximum workload size that can be attempted. In a sensible case, this most likely won't be reached.
     70 static const int	MAX_WORKLOAD_SIZE						= 1<<29;
     71 
     72 // BinaryOpCase-specific constants for shader generation.
     73 static const int	BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS	= 4;
     74 static const int	BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT	= 2;
     75 static const int	BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT		= 4;
     76 
     77 // FunctionCase-specific constants for shader generation.
     78 static const int	FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS			= 4;
     79 
     80 static const char* const s_swizzles[][4] =
     81 {
     82 	{ "x", "yx", "yzx", "wzyx" },
     83 	{ "y", "zy", "wyz", "xwzy" },
     84 	{ "z", "wy", "zxy", "yzwx" },
     85 	{ "w", "xw", "yxw", "zyxw" }
     86 };
     87 
     88 template <int N>
     89 static tcu::Vector<float, N> mean (const vector<tcu::Vector<float, N> >& data)
     90 {
     91 	tcu::Vector<float, N> sum(0.0f);
     92 	for (int i = 0; i < (int)data.size(); i++)
     93 		sum += data[i];
     94 	return sum / tcu::Vector<float, N>((float)data.size());
     95 }
     96 
     97 static void uniformNfv (const glw::Functions& gl, int n, int location, int count, const float* data)
     98 {
     99 	switch (n)
    100 	{
    101 		case 1: gl.uniform1fv(location, count, data); break;
    102 		case 2: gl.uniform2fv(location, count, data); break;
    103 		case 3: gl.uniform3fv(location, count, data); break;
    104 		case 4: gl.uniform4fv(location, count, data); break;
    105 		default: DE_ASSERT(false);
    106 	}
    107 }
    108 
    109 static void uniformNiv (const glw::Functions& gl, int n, int location, int count, const int* data)
    110 {
    111 	switch (n)
    112 	{
    113 		case 1: gl.uniform1iv(location, count, data); break;
    114 		case 2: gl.uniform2iv(location, count, data); break;
    115 		case 3: gl.uniform3iv(location, count, data); break;
    116 		case 4: gl.uniform4iv(location, count, data); break;
    117 		default: DE_ASSERT(false);
    118 	}
    119 }
    120 
    121 static void uniformMatrixNfv (const glw::Functions& gl, int n, int location, int count, const float* data)
    122 {
    123 	switch (n)
    124 	{
    125 		case 2: gl.uniformMatrix2fv(location, count, GL_FALSE, &data[0]); break;
    126 		case 3: gl.uniformMatrix3fv(location, count, GL_FALSE, &data[0]); break;
    127 		case 4: gl.uniformMatrix4fv(location, count, GL_FALSE, &data[0]); break;
    128 		default: DE_ASSERT(false);
    129 	}
    130 }
    131 
    132 static glu::DataType getDataTypeFloatOrVec (int size)
    133 {
    134 	return size == 1 ? glu::TYPE_FLOAT : glu::getDataTypeFloatVec(size);
    135 }
    136 
    137 static int getIterationCountOrDefault (const tcu::CommandLine& cmdLine, int def)
    138 {
    139 	const int cmdLineVal = cmdLine.getTestIterationCount();
    140 	return cmdLineVal > 0 ? cmdLineVal : def;
    141 }
    142 
    143 static string lineParamsString (const LineParameters& params)
    144 {
    145 	return "y = " + de::toString(params.offset) + " + " + de::toString(params.coefficient) + "*x";
    146 }
    147 
    148 namespace
    149 {
    150 
    151 /*--------------------------------------------------------------------*//*!
    152  * \brief Abstract class for measuring shader operator performance.
    153  *
    154  * This class draws multiple times with different workload sizes (set
    155  * via a uniform, by subclass). Time for each frame is measured, and the
    156  * slope of the workload size vs frame time data is estimated. This slope
    157  * tells us the estimated increase in frame time caused by a workload
    158  * increase of 1 unit (what 1 workload unit means is up to subclass).
    159  *
    160  * Generally, the shaders contain not just the operation we're interested
    161  * in (e.g. addition) but also some other stuff (e.g. loop overhead). To
    162  * eliminate this cost, we actually do the stuff described in the above
    163  * paragraph with multiple programs (usually two), which contain different
    164  * kinds of workload (e.g. different loop contents). Then we can (in
    165  * theory) compute the cost of just one operation in a subclass-dependent
    166  * manner.
    167  *
    168  * At this point, the result tells us the increase in frame time caused
    169  * by the addition of one operation. Dividing this by the amount of
    170  * draw calls in a frame, and further by the amount of vertices or
    171  * fragments in a draw call, we get the time cost of one operation.
    172  *
    173  * In reality, there sometimes isn't just a trivial linear dependence
    174  * between workload size and frame time. Instead, there tends to be some
    175  * amount of initial "free" operations. That is, it may be that all
    176  * workload sizes below some positive integer C yield the same frame time,
    177  * and only workload sizes beyond C increase the frame time in a supposedly
    178  * linear manner. Graphically, this means that there graph consists of two
    179  * parts: a horizontal left part, and a linearly increasing right part; the
    180  * right part starts where the left parts ends. The principal task of these
    181  * tests is to look at the slope of the increasing right part. Additionally
    182  * an estimate for the amount of initial free operations is calculated.
    183  * Note that it is also normal to get graphs where the horizontal left part
    184  * is of zero width, i.e. there are no free operations.
    185  *//*--------------------------------------------------------------------*/
    186 class OperatorPerformanceCase : public tcu::TestCase
    187 {
    188 public:
    189 	enum CaseType
    190 	{
    191 		CASETYPE_VERTEX = 0,
    192 		CASETYPE_FRAGMENT,
    193 
    194 		CASETYPE_LAST
    195 	};
    196 
    197 	struct InitialCalibration
    198 	{
    199 		int initialNumCalls;
    200 		InitialCalibration (void) : initialNumCalls(1) {}
    201 	};
    202 
    203 	typedef SharedPtr<InitialCalibration> InitialCalibrationStorage;
    204 
    205 								OperatorPerformanceCase		(tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description,
    206 															 CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage);
    207 								~OperatorPerformanceCase	(void);
    208 
    209 	void						init						(void);
    210 	void						deinit						(void);
    211 
    212 	IterateResult				iterate						(void);
    213 
    214 	struct AttribSpec
    215 	{
    216 		AttribSpec (const char* name_, const tcu::Vec4& p00_, const tcu::Vec4& p01_, const tcu::Vec4& p10_, const tcu::Vec4& p11_)
    217 			: name		(name_)
    218 			, p00		(p00_)
    219 			, p01		(p01_)
    220 			, p10		(p10_)
    221 			, p11		(p11_)
    222 		{
    223 		}
    224 
    225 		AttribSpec (void) {}
    226 
    227 		std::string		name;
    228 		tcu::Vec4		p00;	//!< Bottom left.
    229 		tcu::Vec4		p01;	//!< Bottom right.
    230 		tcu::Vec4		p10;	//!< Top left.
    231 		tcu::Vec4		p11;	//!< Top right.
    232 	};
    233 
    234 protected:
    235 	struct ProgramContext
    236 	{
    237 		string				vertShaderSource;
    238 		string				fragShaderSource;
    239 		vector<AttribSpec>	attributes;
    240 
    241 		string				description;
    242 
    243 		ProgramContext (void) {}
    244 		ProgramContext (const string& vs, const string& fs, const vector<AttribSpec>& attrs, const string& desc)
    245 			: vertShaderSource(vs), fragShaderSource(fs), attributes(attrs), description(desc) {}
    246 	};
    247 
    248 	virtual vector<ProgramContext>	generateProgramData					(void) const = 0;
    249 	//! Sets program-specific uniforms that don't depend on the workload size.
    250 	virtual void					setGeneralUniforms					(deUint32 program) const = 0;
    251 	//! Sets the uniform(s) that specifies the workload size in the shader.
    252 	virtual void					setWorkloadSizeUniform				(deUint32 program, int workload) const = 0;
    253 	//! Computes the cost of a single operation, given the workload costs per program.
    254 	virtual float					computeSingleOperationTime			(const vector<float>& perProgramWorkloadCosts) const = 0;
    255 	//! Logs a human-readable description of what computeSingleOperationTime does.
    256 	virtual void					logSingleOperationCalculationInfo	(void) const = 0;
    257 
    258 	glu::RenderContext&				m_renderCtx;
    259 
    260 	CaseType						m_caseType;
    261 
    262 private:
    263 	enum State
    264 	{
    265 		STATE_CALIBRATING = 0,		//!< Calibrate draw call count, using first program in m_programs, with workload size 1.
    266 		STATE_FIND_HIGH_WORKLOAD,	//!< Find an appropriate lower bound for the highest workload size we intend to use (one with high-enough frame time compared to workload size 1) for each program.
    267 		STATE_MEASURING,			//!< Do actual measurements, for each program in m_programs.
    268 		STATE_REPORTING,			//!< Measurements are done; calculate results and log.
    269 		STATE_FINISHED,				//!< All done.
    270 
    271 		STATE_LAST
    272 	};
    273 
    274 	struct WorkloadRecord
    275 	{
    276 		int				workloadSize;
    277 		vector<float>	frameTimes; //!< In microseconds.
    278 
    279 				WorkloadRecord	(int workloadSize_)						: workloadSize(workloadSize_) {}
    280 		bool	operator<		(const WorkloadRecord& other) const		{ return this->workloadSize < other.workloadSize; }
    281 		void	addFrameTime	(float time)							{ frameTimes.push_back(time); }
    282 		float	getMedianTime	(void) const
    283 		{
    284 			vector<float> times = frameTimes;
    285 			std::sort(times.begin(), times.end());
    286 			return times.size() % 2 == 0 ?
    287 					(times[times.size()/2-1] + times[times.size()/2])*0.5f :
    288 					times[times.size()/2];
    289 		}
    290 	};
    291 
    292 	void								prepareProgram				(int progNdx);					//!< Sets attributes and uniforms for m_programs[progNdx].
    293 	void								prepareWorkload				(int progNdx, int workload);	//!< Calls setWorkloadSizeUniform and draws, in case the implementation does some draw-time compilation.
    294 	void								prepareNextRound			(void);							//!< Increases workload and/or updates m_state.
    295 	void								render						(int numDrawCalls);
    296 	deUint64							renderAndMeasure			(int numDrawCalls);
    297 	void								adjustAndLogGridAndViewport	(void);							//!< Log grid and viewport sizes, after possibly reducing them to reduce draw time.
    298 
    299 	vector<Vec2>						getWorkloadMedianDataPoints	(int progNdx) const; //!< [ Vec2(r.workloadSize, r.getMedianTime()) for r in m_workloadRecords[progNdx] ]
    300 
    301 	const int							m_numMeasurementsPerWorkload;
    302 	const int							m_numWorkloads;				//!< How many different workload sizes are used for measurement for each program.
    303 
    304 	int									m_workloadNdx;				//!< Runs from 0 to m_numWorkloads-1.
    305 
    306 	int									m_workloadMeasurementNdx;
    307 	vector<vector<WorkloadRecord> >		m_workloadRecordsFindHigh;	//!< The measurements done during STATE_FIND_HIGH_WORKLOAD.
    308 	vector<vector<WorkloadRecord> >		m_workloadRecords;			//!< The measurements of each program in m_programs. Generated during STATE_MEASURING, into index specified by m_measureProgramNdx.
    309 
    310 	State								m_state;
    311 	int									m_measureProgramNdx;		//!< When m_state is STATE_FIND_HIGH_WORKLOAD or STATE_MEASURING, this tells which program in m_programs is being measured.
    312 
    313 	vector<int>							m_highWorkloadSizes;		//!< The first workload size encountered during STATE_FIND_HIGH_WORKLOAD that was determined suitable, for each program.
    314 
    315 	TheilSenCalibrator					m_calibrator;
    316 	InitialCalibrationStorage			m_initialCalibrationStorage;
    317 
    318 	int									m_viewportWidth;
    319 	int									m_viewportHeight;
    320 	int									m_gridSizeX;
    321 	int									m_gridSizeY;
    322 
    323 	vector<ProgramContext>				m_programData;
    324 	vector<SharedPtr<ShaderProgram> >	m_programs;
    325 
    326 	std::vector<deUint32>				m_attribBuffers;
    327 };
    328 
    329 static inline float triangleInterpolate (float v0, float v1, float v2, float x, float y)
    330 {
    331 	return v0 + (v2-v0)*x + (v1-v0)*y;
    332 }
    333 
    334 static inline float triQuadInterpolate (float x, float y, const tcu::Vec4& quad)
    335 {
    336 	// \note Top left fill rule.
    337 	if (x + y < 1.0f)
    338 		return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
    339 	else
    340 		return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f-x, 1.0f-y);
    341 }
    342 
    343 static inline int getNumVertices (int gridSizeX, int gridSizeY)
    344 {
    345 	return gridSizeX * gridSizeY * 2 * 3;
    346 }
    347 
    348 static void generateVertices (std::vector<float>& dst, int gridSizeX, int gridSizeY, const OperatorPerformanceCase::AttribSpec& spec)
    349 {
    350 	const int numComponents = 4;
    351 
    352 	DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
    353 	dst.resize(getNumVertices(gridSizeX, gridSizeY) * numComponents);
    354 
    355 	{
    356 		int dstNdx = 0;
    357 
    358 		for (int baseY = 0; baseY < gridSizeY; baseY++)
    359 		for (int baseX = 0; baseX < gridSizeX; baseX++)
    360 		{
    361 			const float xf0 = (float)(baseX + 0) / (float)gridSizeX;
    362 			const float yf0 = (float)(baseY + 0) / (float)gridSizeY;
    363 			const float xf1 = (float)(baseX + 1) / (float)gridSizeX;
    364 			const float yf1 = (float)(baseY + 1) / (float)gridSizeY;
    365 
    366 #define ADD_VERTEX(XF, YF)										\
    367 	for (int compNdx = 0; compNdx < numComponents; compNdx++)	\
    368 		dst[dstNdx++] = triQuadInterpolate((XF), (YF), tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]))
    369 
    370 			ADD_VERTEX(xf0, yf0);
    371 			ADD_VERTEX(xf1, yf0);
    372 			ADD_VERTEX(xf0, yf1);
    373 
    374 			ADD_VERTEX(xf1, yf0);
    375 			ADD_VERTEX(xf1, yf1);
    376 			ADD_VERTEX(xf0, yf1);
    377 
    378 #undef ADD_VERTEX
    379 		}
    380 	}
    381 }
    382 
    383 static float intersectionX (const gls::LineParameters& a, const gls::LineParameters& b)
    384 {
    385 	return (a.offset - b.offset) / (b.coefficient - a.coefficient);
    386 }
    387 
    388 static int numDistinctX (const vector<Vec2>& data)
    389 {
    390 	std::set<float> xs;
    391 	for (int i = 0; i < (int)data.size(); i++)
    392 		xs.insert(data[i].x());
    393 	return (int)xs.size();
    394 }
    395 
    396 static gls::LineParameters simpleLinearRegression (const vector<Vec2>& data)
    397 {
    398 	const Vec2	mid					= mean(data);
    399 
    400 	float		slopeNumerator		= 0.0f;
    401 	float		slopeDenominator	= 0.0f;
    402 
    403 	for (int i = 0; i < (int)data.size(); i++)
    404 	{
    405 		const Vec2 diff = data[i] - mid;
    406 
    407 		slopeNumerator		+= diff.x()*diff.y();
    408 		slopeDenominator	+= diff.x()*diff.x();
    409 	}
    410 
    411 	const float slope	= slopeNumerator / slopeDenominator;
    412 	const float offset	= mid.y() - slope*mid.x();
    413 
    414 	return gls::LineParameters(offset, slope);
    415 }
    416 
    417 static float simpleLinearRegressionError (const vector<Vec2>& data)
    418 {
    419 	if (numDistinctX(data) <= 2)
    420 		return 0.0f;
    421 	else
    422 	{
    423 		const gls::LineParameters	estimator	= simpleLinearRegression(data);
    424 		float						error		= 0.0f;
    425 
    426 		for (int i = 0; i < (int)data.size(); i++)
    427 		{
    428 			const float estY = estimator.offset + estimator.coefficient*data[i].x();
    429 			const float diff = estY - data[i].y();
    430 			error += diff*diff;
    431 		}
    432 
    433 		return error / (float)data.size();
    434 	}
    435 }
    436 
    437 static float verticalVariance (const vector<Vec2>& data)
    438 {
    439 	if (numDistinctX(data) <= 2)
    440 		return 0.0f;
    441 	else
    442 	{
    443 		const float		meanY = mean(data).y();
    444 		float			error = 0.0f;
    445 
    446 		for (int i = 0; i < (int)data.size(); i++)
    447 		{
    448 			const float diff = meanY - data[i].y();
    449 			error += diff*diff;
    450 		}
    451 
    452 		return error / (float)data.size();
    453 	}
    454 }
    455 
    456 /*--------------------------------------------------------------------*//*!
    457  * \brief Find the x coord that divides the input data into two slopes.
    458  *
    459  * The operator performance measurements tend to produce results where
    460  * we get small operation counts "for free" (e.g. because the operations
    461  * are performed during some memory transfer overhead or something),
    462  * resulting in a curve with two parts: an initial horizontal line segment,
    463  * and a rising line.
    464  *
    465  * This function finds the x coordinate that divides the input data into
    466  * two parts such that the sum of the mean square errors for the
    467  * least-squares estimated lines for the two parts is minimized, under the
    468  * additional condition that the left line is horizontal.
    469  *
    470  * This function returns a number X s.t. { pt | pt is in data, pt.x >= X }
    471  * is the right line, and the rest of data is the left line.
    472  *//*--------------------------------------------------------------------*/
    473 static float findSlopePivotX (const vector<Vec2>& data)
    474 {
    475 	std::set<float> xCoords;
    476 	for (int i = 0; i < (int)data.size(); i++)
    477 		xCoords.insert(data[i].x());
    478 
    479 	float			lowestError		= std::numeric_limits<float>::infinity();
    480 	float			bestPivotX		= -std::numeric_limits<float>::infinity();
    481 
    482 	for (std::set<float>::const_iterator pivotX = xCoords.begin(); pivotX != xCoords.end(); ++pivotX)
    483 	{
    484 		vector<Vec2> leftData;
    485 		vector<Vec2> rightData;
    486 		for (int i = 0; i < (int)data.size(); i++)
    487 		{
    488 			if (data[i].x() < *pivotX)
    489 				leftData.push_back(data[i]);
    490 			else
    491 				rightData.push_back(data[i]);
    492 		}
    493 
    494 		if (numDistinctX(rightData) < 3) // We don't trust the right data if there's too little of it.
    495 			break;
    496 
    497 		{
    498 			const float totalError = verticalVariance(leftData) + simpleLinearRegressionError(rightData);
    499 
    500 			if (totalError < lowestError)
    501 			{
    502 				lowestError = totalError;
    503 				bestPivotX = *pivotX;
    504 			}
    505 		}
    506 	}
    507 
    508 	DE_ASSERT(lowestError < std::numeric_limits<float>::infinity());
    509 
    510 	return bestPivotX;
    511 }
    512 
    513 struct SegmentedEstimator
    514 {
    515 	float					pivotX; //!< Value returned by findSlopePivotX, or -infinity if only single line.
    516 	gls::LineParameters		left;
    517 	gls::LineParameters		right;
    518 	SegmentedEstimator (const gls::LineParameters& l, const gls::LineParameters& r, float pivotX_) : pivotX(pivotX_), left(l), right(r) {}
    519 };
    520 
    521 /*--------------------------------------------------------------------*//*!
    522  * \brief Compute line estimators for (potentially) two-segment data.
    523  *
    524  * Splits the given data into left and right parts (using findSlopePivotX)
    525  * and returns the line estimates for them.
    526  *
    527  * Sometimes, however (especially in fragment shader cases) the data is
    528  * in fact not segmented, but a straight line. This function attempts to
    529  * detect if this the case, and if so, sets left.offset = right.offset and
    530  * left.slope = 0, meaning essentially that the initial "flat" part of the
    531  * data has zero width.
    532  *//*--------------------------------------------------------------------*/
    533 static SegmentedEstimator computeSegmentedEstimator (const vector<Vec2>& data)
    534 {
    535 	const float		pivotX = findSlopePivotX(data);
    536 	vector<Vec2>	leftData;
    537 	vector<Vec2>	rightData;
    538 
    539 	for (int i = 0; i < (int)data.size(); i++)
    540 	{
    541 		if (data[i].x() < pivotX)
    542 			leftData.push_back(data[i]);
    543 		else
    544 			rightData.push_back(data[i]);
    545 	}
    546 
    547 	{
    548 		const gls::LineParameters leftLine		= gls::theilSenLinearRegression(leftData);
    549 		const gls::LineParameters rightLine		= gls::theilSenLinearRegression(rightData);
    550 
    551 		if (numDistinctX(leftData) < 2 || leftLine.coefficient > rightLine.coefficient*0.5f)
    552 		{
    553 			// Left data doesn't seem credible; assume the data is just a single line.
    554 			const gls::LineParameters entireLine = gls::theilSenLinearRegression(data);
    555 			return SegmentedEstimator(gls::LineParameters(entireLine.offset, 0.0f), entireLine, -std::numeric_limits<float>::infinity());
    556 		}
    557 		else
    558 			return SegmentedEstimator(leftLine, rightLine, pivotX);
    559 	}
    560 }
    561 
    562 OperatorPerformanceCase::OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description,
    563 												  CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage)
    564 	: tcu::TestCase					(testCtx, tcu::NODETYPE_PERFORMANCE, name, description)
    565 	, m_renderCtx					(renderCtx)
    566 	, m_caseType					(caseType)
    567 	, m_numMeasurementsPerWorkload	(getIterationCountOrDefault(m_testCtx.getCommandLine(), DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD))
    568 	, m_numWorkloads				(numWorkloads)
    569 	, m_workloadNdx					(-1)
    570 	, m_workloadMeasurementNdx		(-1)
    571 	, m_state						(STATE_LAST)
    572 	, m_measureProgramNdx			(-1)
    573 	, m_initialCalibrationStorage	(initialCalibrationStorage)
    574 	, m_viewportWidth				(caseType == CASETYPE_VERTEX	? 32	: renderCtx.getRenderTarget().getWidth())
    575 	, m_viewportHeight				(caseType == CASETYPE_VERTEX	? 32	: renderCtx.getRenderTarget().getHeight())
    576 	, m_gridSizeX					(caseType == CASETYPE_FRAGMENT	? 1		: 100)
    577 	, m_gridSizeY					(caseType == CASETYPE_FRAGMENT	? 1		: 100)
    578 {
    579 	DE_ASSERT(m_numWorkloads > 0);
    580 }
    581 
    582 OperatorPerformanceCase::~OperatorPerformanceCase (void)
    583 {
    584 	if (!m_attribBuffers.empty())
    585 	{
    586 		m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
    587 		m_attribBuffers.clear();
    588 	}
    589 }
    590 
    591 static void logRenderTargetInfo (TestLog& log, const tcu::RenderTarget& renderTarget)
    592 {
    593 	log << TestLog::Section("RenderTarget", "Render target")
    594 		<< TestLog::Message << "size: " << renderTarget.getWidth() << "x" << renderTarget.getHeight() << TestLog::EndMessage
    595 		<< TestLog::Message << "bits:"
    596 							<< " R" << renderTarget.getPixelFormat().redBits
    597 							<< " G" << renderTarget.getPixelFormat().greenBits
    598 							<< " B" << renderTarget.getPixelFormat().blueBits
    599 							<< " A" << renderTarget.getPixelFormat().alphaBits
    600 							<< " D" << renderTarget.getDepthBits()
    601 							<< " S" << renderTarget.getStencilBits()
    602 							<< TestLog::EndMessage;
    603 
    604 	if (renderTarget.getNumSamples() != 0)
    605 		log << TestLog::Message << renderTarget.getNumSamples() << "x MSAA" << TestLog::EndMessage;
    606 	else
    607 		log << TestLog::Message << "No MSAA" << TestLog::EndMessage;
    608 
    609 	log << TestLog::EndSection;
    610 }
    611 
    612 vector<Vec2> OperatorPerformanceCase::getWorkloadMedianDataPoints (int progNdx) const
    613 {
    614 	const vector<WorkloadRecord>&	records = m_workloadRecords[progNdx];
    615 	vector<Vec2>					result;
    616 
    617 	for (int i = 0; i < (int)records.size(); i++)
    618 		result.push_back(Vec2((float)records[i].workloadSize, records[i].getMedianTime()));
    619 
    620 	return result;
    621 }
    622 
    623 void OperatorPerformanceCase::prepareProgram (int progNdx)
    624 {
    625 	DE_ASSERT(progNdx < (int)m_programs.size());
    626 	DE_ASSERT(m_programData.size() == m_programs.size());
    627 
    628 	const glw::Functions&	gl			= m_renderCtx.getFunctions();
    629 	const ShaderProgram&	program		= *m_programs[progNdx];
    630 
    631 	vector<AttribSpec>		attributes	= m_programData[progNdx].attributes;
    632 
    633 	attributes.push_back(AttribSpec("a_position",
    634 									Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
    635 									Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
    636 									Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
    637 									Vec4( 1.0f,  1.0f, 0.0f, 1.0f)));
    638 
    639 	DE_ASSERT(program.isOk());
    640 
    641 	// Generate vertices.
    642 	if (!m_attribBuffers.empty())
    643 		gl.deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
    644 	m_attribBuffers.resize(attributes.size(), 0);
    645 	gl.genBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
    646 	GLU_EXPECT_NO_ERROR(gl.getError(), "glGenBuffers()");
    647 
    648 	for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
    649 	{
    650 		std::vector<float> vertices;
    651 		generateVertices(vertices, m_gridSizeX, m_gridSizeY, attributes[attribNdx]);
    652 
    653 		gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
    654 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertices.size()*sizeof(float)), &vertices[0], GL_STATIC_DRAW);
    655 		GLU_EXPECT_NO_ERROR(gl.getError(), "Upload buffer data");
    656 	}
    657 
    658 	// Setup attribute bindings.
    659 	for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
    660 	{
    661 		int location = gl.getAttribLocation(program.getProgram(), attributes[attribNdx].name.c_str());
    662 
    663 		if (location >= 0)
    664 		{
    665 			gl.enableVertexAttribArray(location);
    666 			gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
    667 			gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
    668 		}
    669 	}
    670 	GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex input state");
    671 
    672 	gl.useProgram(program.getProgram());
    673 	setGeneralUniforms(program.getProgram());
    674 	gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
    675 }
    676 
    677 void OperatorPerformanceCase::prepareWorkload (int progNdx, int workload)
    678 {
    679 	setWorkloadSizeUniform(m_programs[progNdx]->getProgram(), workload);
    680 	render(m_calibrator.getCallCount());
    681 }
    682 
    683 void OperatorPerformanceCase::prepareNextRound (void)
    684 {
    685 	DE_ASSERT(m_state == STATE_CALIBRATING			||
    686 			  m_state == STATE_FIND_HIGH_WORKLOAD	||
    687 			  m_state == STATE_MEASURING);
    688 
    689 	TestLog& log = m_testCtx.getLog();
    690 
    691 	if (m_state == STATE_CALIBRATING && m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
    692 	{
    693 		m_measureProgramNdx = 0;
    694 		m_state = STATE_FIND_HIGH_WORKLOAD;
    695 	}
    696 
    697 	if (m_state == STATE_CALIBRATING)
    698 		prepareWorkload(0, 1);
    699 	else if (m_state == STATE_FIND_HIGH_WORKLOAD)
    700 	{
    701 		vector<WorkloadRecord>& records = m_workloadRecordsFindHigh[m_measureProgramNdx];
    702 
    703 		if (records.empty() || records.back().getMedianTime() < 2.0f*records[0].getMedianTime())
    704 		{
    705 			int workloadSize;
    706 
    707 			if (records.empty())
    708 				workloadSize = 1;
    709 			else
    710 			{
    711 				workloadSize = records.back().workloadSize*2;
    712 
    713 				if (workloadSize > MAX_WORKLOAD_SIZE)
    714 				{
    715 					log << TestLog::Message << "Even workload size " << records.back().workloadSize
    716 											<< " doesn't give high enough frame time for program " << m_measureProgramNdx
    717 											<< ". Can't get sensible result." << TestLog::EndMessage;
    718 					MEASUREMENT_FAIL();
    719 				}
    720 			}
    721 
    722 			records.push_back(WorkloadRecord(workloadSize));
    723 			prepareWorkload(0, workloadSize);
    724 			m_workloadMeasurementNdx = 0;
    725 		}
    726 		else
    727 		{
    728 			m_highWorkloadSizes[m_measureProgramNdx] = records.back().workloadSize;
    729 			m_measureProgramNdx++;
    730 
    731 			if (m_measureProgramNdx >= (int)m_programs.size())
    732 			{
    733 				m_state = STATE_MEASURING;
    734 				m_workloadNdx = -1;
    735 				m_measureProgramNdx = 0;
    736 			}
    737 
    738 			prepareProgram(m_measureProgramNdx);
    739 			prepareNextRound();
    740 		}
    741 	}
    742 	else
    743 	{
    744 		m_workloadNdx++;
    745 
    746 		if (m_workloadNdx < m_numWorkloads)
    747 		{
    748 			DE_ASSERT(m_numWorkloads > 1);
    749 			const int highWorkload	= m_highWorkloadSizes[m_measureProgramNdx];
    750 			const int workload		= highWorkload > m_numWorkloads ?
    751 										1 + m_workloadNdx*(highWorkload-1)/(m_numWorkloads-1) :
    752 										1 + m_workloadNdx;
    753 
    754 			prepareWorkload(m_measureProgramNdx, workload);
    755 
    756 			m_workloadMeasurementNdx = 0;
    757 
    758 			m_workloadRecords[m_measureProgramNdx].push_back(WorkloadRecord(workload));
    759 		}
    760 		else
    761 		{
    762 			m_measureProgramNdx++;
    763 
    764 			if (m_measureProgramNdx < (int)m_programs.size())
    765 			{
    766 				m_workloadNdx = -1;
    767 				m_workloadMeasurementNdx = 0;
    768 				prepareProgram(m_measureProgramNdx);
    769 				prepareNextRound();
    770 			}
    771 			else
    772 				m_state = STATE_REPORTING;
    773 		}
    774 	}
    775 }
    776 
    777 void OperatorPerformanceCase::init (void)
    778 {
    779 	TestLog&				log		= m_testCtx.getLog();
    780 	const glw::Functions&	gl		= m_renderCtx.getFunctions();
    781 
    782 	// Validate that we have sane grid and viewport setup.
    783 	DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
    784 	TCU_CHECK(de::inRange(m_viewportWidth,	1, m_renderCtx.getRenderTarget().getWidth()) &&
    785 			  de::inRange(m_viewportHeight,	1, m_renderCtx.getRenderTarget().getHeight()));
    786 
    787 	logRenderTargetInfo(log, m_renderCtx.getRenderTarget());
    788 
    789 	log << TestLog::Message << "Using additive blending." << TestLog::EndMessage;
    790 	gl.enable(GL_BLEND);
    791 	gl.blendEquation(GL_FUNC_ADD);
    792 	gl.blendFunc(GL_ONE, GL_ONE);
    793 
    794 	// Generate programs.
    795 	DE_ASSERT(m_programs.empty());
    796 	m_programData = generateProgramData();
    797 	DE_ASSERT(!m_programData.empty());
    798 
    799 	for (int progNdx = 0; progNdx < (int)m_programData.size(); progNdx++)
    800 	{
    801 		const string& vert = m_programData[progNdx].vertShaderSource;
    802 		const string& frag = m_programData[progNdx].fragShaderSource;
    803 
    804 		m_programs.push_back(SharedPtr<ShaderProgram>(new ShaderProgram(m_renderCtx, glu::makeVtxFragSources(vert, frag))));
    805 
    806 		if (!m_programs.back()->isOk())
    807 		{
    808 			log << *m_programs.back();
    809 			TCU_FAIL("Compile failed");
    810 		}
    811 	}
    812 
    813 	// Log all programs.
    814 	for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
    815 		log << TestLog::Section("Program" + de::toString(progNdx), "Program " + de::toString(progNdx))
    816 				<< TestLog::Message << m_programData[progNdx].description << TestLog::EndMessage
    817 				<< *m_programs[progNdx]
    818 			<< TestLog::EndSection;
    819 
    820 	m_highWorkloadSizes.resize(m_programData.size());
    821 	m_workloadRecordsFindHigh.resize(m_programData.size());
    822 	m_workloadRecords.resize(m_programData.size());
    823 
    824 	m_calibrator.clear(CalibratorParameters(m_initialCalibrationStorage->initialNumCalls, 10 /* calibrate iteration frames */, 2000.0f /* calibrate iteration shortcut threshold (ms) */, 16 /* max calibrate iterations */,
    825 											1000.0f/30.0f /* frame time (ms) */, 1000.0f/60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */));
    826 	m_state = STATE_CALIBRATING;
    827 
    828 	prepareProgram(0);
    829 	prepareNextRound();
    830 }
    831 
    832 void OperatorPerformanceCase::deinit (void)
    833 {
    834 	if (!m_attribBuffers.empty())
    835 	{
    836 		m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
    837 		m_attribBuffers.clear();
    838 	}
    839 
    840 	m_programs.clear();
    841 }
    842 
    843 void OperatorPerformanceCase::render (int numDrawCalls)
    844 {
    845 	const glw::Functions&	gl				= m_renderCtx.getFunctions();
    846 	const int				numVertices		= getNumVertices(m_gridSizeX, m_gridSizeY);
    847 
    848 	for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
    849 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
    850 
    851 	glu::readPixels(m_renderCtx, 0, 0, tcu::Surface(1, 1).getAccess()); // \note Serves as a more reliable replacement for glFinish().
    852 }
    853 
    854 deUint64 OperatorPerformanceCase::renderAndMeasure (int numDrawCalls)
    855 {
    856 	const deUint64 startTime = deGetMicroseconds();
    857 	render(numDrawCalls);
    858 	return deGetMicroseconds() - startTime;
    859 }
    860 
    861 void OperatorPerformanceCase::adjustAndLogGridAndViewport (void)
    862 {
    863 	TestLog& log = m_testCtx.getLog();
    864 
    865 	// If call count is just 1, and the target frame time still wasn't reached, reduce grid or viewport size.
    866 	if (m_calibrator.getCallCount() == 1)
    867 	{
    868 		const gls::MeasureState&	calibratorMeasure	= m_calibrator.getMeasureState();
    869 		const float					drawCallTime		= (float)calibratorMeasure.getTotalTime() / (float)calibratorMeasure.frameTimes.size();
    870 		const float					targetDrawCallTime	= m_calibrator.getParameters().targetFrameTimeUs;
    871 		const float					targetRatio			= targetDrawCallTime / drawCallTime;
    872 
    873 		if (targetRatio < 0.95f)
    874 		{
    875 			// Reduce grid or viewport size assuming draw call time scales proportionally.
    876 			if (m_caseType == CASETYPE_VERTEX)
    877 			{
    878 				const float targetRatioSqrt = deFloatSqrt(targetRatio);
    879 				m_gridSizeX = (int)(targetRatioSqrt * (float)m_gridSizeX);
    880 				m_gridSizeY = (int)(targetRatioSqrt * (float)m_gridSizeY);
    881 				TCU_CHECK_MSG(m_gridSizeX >= 1 && m_gridSizeY >= 1, "Can't decrease grid size enough to achieve low-enough draw times");
    882 				log << TestLog::Message << "Note: triangle grid size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage;
    883 			}
    884 			else
    885 			{
    886 				const float targetRatioSqrt = deFloatSqrt(targetRatio);
    887 				m_viewportWidth  = (int)(targetRatioSqrt * (float)m_viewportWidth);
    888 				m_viewportHeight = (int)(targetRatioSqrt * (float)m_viewportHeight);
    889 				TCU_CHECK_MSG(m_viewportWidth >= 1 && m_viewportHeight >= 1, "Can't decrease viewport size enough to achieve low-enough draw times");
    890 				log << TestLog::Message << "Note: viewport size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage;
    891 			}
    892 		}
    893 	}
    894 
    895 	prepareProgram(0);
    896 
    897 	// Log grid and viewport sizes.
    898 	log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage;
    899 	log << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
    900 }
    901 
    902 OperatorPerformanceCase::IterateResult OperatorPerformanceCase::iterate (void)
    903 {
    904 	const TheilSenCalibrator::State calibratorState = m_calibrator.getState();
    905 
    906 	if (calibratorState != TheilSenCalibrator::STATE_FINISHED)
    907 	{
    908 		if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
    909 			m_calibrator.recomputeParameters();
    910 		else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
    911 			m_calibrator.recordIteration(renderAndMeasure(m_calibrator.getCallCount()));
    912 		else
    913 			DE_ASSERT(false);
    914 
    915 		if (m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
    916 		{
    917 			logCalibrationInfo(m_testCtx.getLog(), m_calibrator);
    918 			adjustAndLogGridAndViewport();
    919 			prepareNextRound();
    920 			m_initialCalibrationStorage->initialNumCalls = m_calibrator.getCallCount();
    921 		}
    922 	}
    923 	else if (m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING)
    924 	{
    925 		if (m_workloadMeasurementNdx < m_numMeasurementsPerWorkload)
    926 		{
    927 			vector<WorkloadRecord>& records = m_state == STATE_FIND_HIGH_WORKLOAD ? m_workloadRecordsFindHigh[m_measureProgramNdx] : m_workloadRecords[m_measureProgramNdx];
    928 			records.back().addFrameTime((float)renderAndMeasure(m_calibrator.getCallCount()));
    929 			m_workloadMeasurementNdx++;
    930 		}
    931 		else
    932 			prepareNextRound();
    933 	}
    934 	else
    935 	{
    936 		DE_ASSERT(m_state == STATE_REPORTING);
    937 
    938 		TestLog&	log				= m_testCtx.getLog();
    939 		const int	drawCallCount	= m_calibrator.getCallCount();
    940 
    941 		{
    942 			// Compute per-program estimators for measurements.
    943 			vector<SegmentedEstimator> estimators;
    944 			for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
    945 				estimators.push_back(computeSegmentedEstimator(getWorkloadMedianDataPoints(progNdx)));
    946 
    947 			// Log measurements and their estimators for all programs.
    948 			for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
    949 			{
    950 				const SegmentedEstimator&	estimator	= estimators[progNdx];
    951 				const string				progNdxStr	= de::toString(progNdx);
    952 				vector<WorkloadRecord>		records		= m_workloadRecords[progNdx];
    953 				std::sort(records.begin(), records.end());
    954 
    955 				{
    956 					const tcu::ScopedLogSection section(log,
    957 														"Program" + progNdxStr + "Measurements",
    958 														"Measurements for program " + progNdxStr);
    959 
    960 					// Sample list of individual frame times.
    961 
    962 					log << TestLog::SampleList("Program" + progNdxStr + "IndividualFrameTimes", "Individual frame times")
    963 						<< TestLog::SampleInfo << TestLog::ValueInfo("Workload",	"Workload",		"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
    964 											   << TestLog::ValueInfo("FrameTime",	"Frame time",	"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
    965 						<< TestLog::EndSampleInfo;
    966 
    967 					for (int i = 0; i < (int)records.size(); i++)
    968 						for (int j = 0; j < (int)records[i].frameTimes.size(); j++)
    969 							log << TestLog::Sample << records[i].workloadSize << records[i].frameTimes[j] << TestLog::EndSample;
    970 
    971 					log << TestLog::EndSampleList;
    972 
    973 					// Sample list of median frame times.
    974 
    975 					log << TestLog::SampleList("Program" + progNdxStr + "MedianFrameTimes", "Median frame times")
    976 						<< TestLog::SampleInfo << TestLog::ValueInfo("Workload",		"Workload",				"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
    977 											   << TestLog::ValueInfo("MedianFrameTime",	"Median frame time",	"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
    978 						<< TestLog::EndSampleInfo;
    979 
    980 					for (int i = 0; i < (int)records.size(); i++)
    981 						log << TestLog::Sample << records[i].workloadSize << records[i].getMedianTime() << TestLog::EndSample;
    982 
    983 					log << TestLog::EndSampleList;
    984 
    985 					log << TestLog::Float("Program" + progNdxStr + "WorkloadCostEstimate", "Workload cost estimate", "us / workload", QP_KEY_TAG_TIME, estimator.right.coefficient);
    986 
    987 					if (estimator.pivotX > -std::numeric_limits<float>::infinity())
    988 						log << TestLog::Message << "Note: the data points with x coordinate greater than or equal to " << estimator.pivotX
    989 												<< " seem to form a rising line, and the rest of data points seem to form a near-horizontal line" << TestLog::EndMessage
    990 							<< TestLog::Message << "Note: the left line is estimated to be " << lineParamsString(estimator.left)
    991 												<< " and the right line " << lineParamsString(estimator.right) << TestLog::EndMessage;
    992 					else
    993 						log << TestLog::Message << "Note: the data seem to form a single line: " << lineParamsString(estimator.right) << TestLog::EndMessage;
    994 				}
    995 			}
    996 
    997 			for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
    998 			{
    999 				if (estimators[progNdx].right.coefficient <= 0.0f)
   1000 				{
   1001 					log << TestLog::Message << "Slope of measurements for program " << progNdx << " isn't positive. Can't get sensible result." << TestLog::EndMessage;
   1002 					MEASUREMENT_FAIL();
   1003 				}
   1004 			}
   1005 
   1006 			// \note For each estimator, .right.coefficient is the increase in draw time (in microseconds) when
   1007 			// incrementing shader workload size by 1, when D draw calls are done, with a vertex/fragment count
   1008 			// of R.
   1009 			//
   1010 			// The measurements of any single program can't tell us the final result (time of single operation),
   1011 			// so we use computeSingleOperationTime to compute it from multiple programs' measurements in a
   1012 			// subclass-defined manner.
   1013 			//
   1014 			// After that, microseconds per operation can be calculated as singleOperationTime / (D * R).
   1015 
   1016 			{
   1017 				vector<float>	perProgramSlopes;
   1018 				for (int i = 0; i < (int)m_programs.size(); i++)
   1019 					perProgramSlopes.push_back(estimators[i].right.coefficient);
   1020 
   1021 				logSingleOperationCalculationInfo();
   1022 
   1023 				const float		maxSlope				= *std::max_element(perProgramSlopes.begin(), perProgramSlopes.end());
   1024 				const float		usecsPerFramePerOp		= computeSingleOperationTime(perProgramSlopes);
   1025 				const int		vertexOrFragmentCount	= m_caseType == CASETYPE_VERTEX ?
   1026 															getNumVertices(m_gridSizeX, m_gridSizeY) :
   1027 															m_viewportWidth*m_viewportHeight;
   1028 				const double	usecsPerDrawCallPerOp	= usecsPerFramePerOp / (double)drawCallCount;
   1029 				const double	usecsPerSingleOp		= usecsPerDrawCallPerOp / (double)vertexOrFragmentCount;
   1030 				const double	megaOpsPerSecond		= (double)(drawCallCount*vertexOrFragmentCount) / usecsPerFramePerOp;
   1031 				const int		numFreeOps				= de::max(0, (int)deFloatFloor(intersectionX(estimators[0].left,
   1032 																									 LineParameters(estimators[0].right.offset,
   1033 																													usecsPerFramePerOp))));
   1034 
   1035 				log << TestLog::Integer("VertexOrFragmentCount",
   1036 										"R = " + string(m_caseType == CASETYPE_VERTEX ? "Vertex" : "Fragment") + " count",
   1037 										"", QP_KEY_TAG_NONE, vertexOrFragmentCount)
   1038 
   1039 					<< TestLog::Integer("DrawCallsPerFrame", "D = Draw calls per frame", "", QP_KEY_TAG_NONE, drawCallCount)
   1040 
   1041 					<< TestLog::Integer("VerticesOrFragmentsPerFrame",
   1042 										"R*D = " + string(m_caseType == CASETYPE_VERTEX ? "Vertices" : "Fragments") + " per frame",
   1043 										"", QP_KEY_TAG_NONE, vertexOrFragmentCount*drawCallCount)
   1044 
   1045 					<< TestLog::Float("TimePerFramePerOp",
   1046 									  "Estimated cost of R*D " + string(m_caseType == CASETYPE_VERTEX ? "vertices" : "fragments")
   1047 									  + " (i.e. one frame) with one shader operation",
   1048 									  "us", QP_KEY_TAG_TIME, (float)usecsPerFramePerOp)
   1049 
   1050 					<< TestLog::Float("TimePerDrawcallPerOp",
   1051 									  "Estimated cost of one draw call with one shader operation",
   1052 									  "us", QP_KEY_TAG_TIME, (float)usecsPerDrawCallPerOp)
   1053 
   1054 					<< TestLog::Float("TimePerSingleOp",
   1055 									  "Estimated cost of a single shader operation",
   1056 									  "us", QP_KEY_TAG_TIME, (float)usecsPerSingleOp);
   1057 
   1058 				// \note Sometimes, when the operation is free or very cheap, it can happen that the shader with the operation runs,
   1059 				//		 for some reason, a bit faster than the shader without the operation, and thus we get a negative result. The
   1060 				//		 following threshold values for accepting a negative or almost-zero result are rather quick and dirty.
   1061 				if (usecsPerFramePerOp <= -0.1f*maxSlope)
   1062 				{
   1063 					log << TestLog::Message << "Got strongly negative result." << TestLog::EndMessage;
   1064 					MEASUREMENT_FAIL();
   1065 				}
   1066 				else if (usecsPerFramePerOp <= 0.001*maxSlope)
   1067 				{
   1068 					log << TestLog::Message << "Cost of operation seems to be approximately zero." << TestLog::EndMessage;
   1069 					m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   1070 				}
   1071 				else
   1072 				{
   1073 					log << TestLog::Float("OpsPerSecond",
   1074 										  "Operations per second",
   1075 										  "Million/s", QP_KEY_TAG_PERFORMANCE, (float)megaOpsPerSecond)
   1076 
   1077 						<< TestLog::Integer("NumFreeOps",
   1078 											"Estimated number of \"free\" operations",
   1079 											"", QP_KEY_TAG_PERFORMANCE, numFreeOps);
   1080 
   1081 					m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString((float)megaOpsPerSecond, 2).c_str());
   1082 				}
   1083 
   1084 				m_state = STATE_FINISHED;
   1085 			}
   1086 		}
   1087 
   1088 		return STOP;
   1089 	}
   1090 
   1091 	return CONTINUE;
   1092 }
   1093 
   1094 // Binary operator case.
   1095 class BinaryOpCase : public OperatorPerformanceCase
   1096 {
   1097 public:
   1098 						BinaryOpCase				(Context& context, const char* name, const char* description, const char* op,
   1099 													 glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration);
   1100 
   1101 protected:
   1102 	vector<ProgramContext>	generateProgramData					(void) const;
   1103 	void					setGeneralUniforms					(deUint32 program) const;
   1104 	void					setWorkloadSizeUniform				(deUint32 program, int numOperations) const;
   1105 	float					computeSingleOperationTime			(const vector<float>& perProgramOperationCosts) const;
   1106 	void					logSingleOperationCalculationInfo	(void) const;
   1107 
   1108 private:
   1109 	enum ProgramID
   1110 	{
   1111 		// \note 0-based sequential numbering is relevant, because these are also used as vector indices.
   1112 		// \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
   1113 		PROGRAM_WITH_BIGGER_LOOP = 0,
   1114 		PROGRAM_WITH_SMALLER_LOOP,
   1115 
   1116 		PROGRAM_LAST
   1117 	};
   1118 
   1119 	ProgramContext			generateSingleProgramData		(ProgramID) const;
   1120 
   1121 	const string			m_op;
   1122 	const glu::DataType		m_type;
   1123 	const glu::Precision	m_precision;
   1124 	const bool				m_useSwizzle;
   1125 };
   1126 
   1127 BinaryOpCase::BinaryOpCase (Context& context, const char* name, const char* description, const char* op,
   1128 							glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration)
   1129 	: OperatorPerformanceCase	(context.getTestContext(), context.getRenderContext(), name, description,
   1130 								 isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
   1131 	, m_op						(op)
   1132 	, m_type					(type)
   1133 	, m_precision				(precision)
   1134 	, m_useSwizzle				(useSwizzle)
   1135 {
   1136 }
   1137 
   1138 BinaryOpCase::ProgramContext BinaryOpCase::generateSingleProgramData (ProgramID programID) const
   1139 {
   1140 	DE_ASSERT(glu::isDataTypeFloatOrVec(m_type) || glu::isDataTypeIntOrIVec(m_type));
   1141 
   1142 	const bool			isVertexCase	= m_caseType == CASETYPE_VERTEX;
   1143 	const char* const	precision		= glu::getPrecisionName(m_precision);
   1144 	const char* const	inputPrecision	= glu::isDataTypeIntOrIVec(m_type) && m_precision == glu::PRECISION_LOWP ? "mediump" : precision;
   1145 	const char* const	typeName		= getDataTypeName(m_type);
   1146 
   1147 	std::ostringstream	vtx;
   1148 	std::ostringstream	frag;
   1149 	std::ostringstream&	op				= isVertexCase ? vtx : frag;
   1150 
   1151 	vtx << "#version 300 es\n";
   1152 	frag << "#version 300 es\n"
   1153 		 << "layout (location = 0) out mediump vec4 o_color;\n";
   1154 
   1155 	// Attributes.
   1156 	vtx << "in highp vec4 a_position;\n";
   1157 	for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
   1158 		vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
   1159 
   1160 	if (isVertexCase)
   1161 	{
   1162 		vtx << "out mediump vec4 v_color;\n";
   1163 		frag << "in mediump vec4 v_color;\n";
   1164 	}
   1165 	else
   1166 	{
   1167 		for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
   1168 		{
   1169 			vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
   1170 			frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
   1171 		}
   1172 	}
   1173 
   1174 	op << "uniform mediump int u_numLoopIterations;\n";
   1175 	if (isVertexCase)
   1176 		op << "uniform mediump float u_zero;\n";
   1177 
   1178 	vtx << "\n";
   1179 	vtx << "void main()\n";
   1180 	vtx << "{\n";
   1181 
   1182 	if (!isVertexCase)
   1183 		vtx << "\tgl_Position = a_position;\n";
   1184 
   1185 	frag << "\n";
   1186 	frag << "void main()\n";
   1187 	frag << "{\n";
   1188 
   1189 	// Expression inputs.
   1190 	const char* const prefix = isVertexCase ? "a_" : "v_";
   1191 	for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
   1192 	{
   1193 		const int	inSize		= getDataTypeScalarSize(m_type);
   1194 		const bool	isInt		= de::inRange<int>(m_type, TYPE_INT, TYPE_INT_VEC4);
   1195 		const bool	cast		= isInt || (!m_useSwizzle && m_type != TYPE_FLOAT_VEC4);
   1196 
   1197 		op << "\t" << precision << " " << typeName << " in" << i << " = ";
   1198 
   1199 		if (cast)
   1200 			op << typeName << "(";
   1201 
   1202 		op << prefix << "in" << i;
   1203 
   1204 		if (m_useSwizzle)
   1205 			op << "." << s_swizzles[i % DE_LENGTH_OF_ARRAY(s_swizzles)][inSize-1];
   1206 
   1207 		if (cast)
   1208 			op << ")";
   1209 
   1210 		op << ";\n";
   1211 	}
   1212 
   1213 	// Operation accumulation variables.
   1214 	for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
   1215 	{
   1216 		op << "\t" << precision << " " << typeName << " acc" << i << "a" << " = in" << i+0 << ";\n";
   1217 		op << "\t" << precision << " " << typeName << " acc" << i << "b" << " = in" << i+1 << ";\n";
   1218 	}
   1219 
   1220 	// Loop, with expressions in it.
   1221 	op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
   1222 	op << "\t{\n";
   1223 	{
   1224 		const int unrollAmount = programID == PROGRAM_WITH_SMALLER_LOOP ? BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT : BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
   1225 		for (int unrollNdx = 0; unrollNdx < unrollAmount; unrollNdx++)
   1226 		{
   1227 			for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
   1228 			{
   1229 				if (i > 0 || unrollNdx > 0)
   1230 					op << "\n";
   1231 				op << "\t\tacc" << i << "a = acc" << i << "b " << m_op << " acc" << i << "a" << ";\n";
   1232 				op << "\t\tacc" << i << "b = acc" << i << "a " << m_op << " acc" << i << "b" << ";\n";
   1233 			}
   1234 		}
   1235 	}
   1236 	op << "\t}\n";
   1237 	op << "\n";
   1238 
   1239 	// Result variable (sum of accumulation variables).
   1240 	op << "\t" << precision << " " << typeName << " res =";
   1241 	for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
   1242 		op << (i > 0 ? " "+m_op : "") << " acc" << i << "b";
   1243 	op << ";\n";
   1244 
   1245 	// Convert to color.
   1246 	op << "\tmediump vec4 color = ";
   1247 	if (m_type == TYPE_FLOAT_VEC4)
   1248 		op << "res";
   1249 	else
   1250 	{
   1251 		int size = getDataTypeScalarSize(m_type);
   1252 		op << "vec4(res";
   1253 
   1254 		for (int i = size; i < 4; i++)
   1255 			op << ", " << (i == 3 ? "1.0" : "0.0");
   1256 
   1257 		op << ")";
   1258 	}
   1259 	op << ";\n";
   1260 	op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
   1261 
   1262 	if (isVertexCase)
   1263 	{
   1264 		vtx << "	gl_Position = a_position + u_zero*color;\n";
   1265 		frag << "	o_color = v_color;\n";
   1266 	}
   1267 	else
   1268 	{
   1269 		for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
   1270 			vtx << "	v_in" << i << " = a_in" << i << ";\n";
   1271 	}
   1272 
   1273 	vtx << "}\n";
   1274 	frag << "}\n";
   1275 
   1276 	{
   1277 		vector<AttribSpec> attributes;
   1278 		for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
   1279 			attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
   1280 											Vec4(2.0f, 2.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
   1281 											Vec4(1.0f, 2.0f, 1.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
   1282 											Vec4(2.0f, 1.0f, 2.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
   1283 											Vec4(1.0f, 1.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4)));
   1284 
   1285 		{
   1286 			string description = "This is the program with the ";
   1287 
   1288 			description += programID == PROGRAM_WITH_SMALLER_LOOP	? "smaller"
   1289 						 : programID == PROGRAM_WITH_BIGGER_LOOP	? "bigger"
   1290 						 : DE_NULL;
   1291 
   1292 			description += " loop.\n"
   1293 						   "Note: workload size for this program means the number of loop iterations.";
   1294 
   1295 			return ProgramContext(vtx.str(), frag.str(), attributes, description);
   1296 		}
   1297 	}
   1298 }
   1299 
   1300 vector<BinaryOpCase::ProgramContext> BinaryOpCase::generateProgramData (void) const
   1301 {
   1302 	vector<ProgramContext> progData;
   1303 	for (int i = 0; i < PROGRAM_LAST; i++)
   1304 		progData.push_back(generateSingleProgramData((ProgramID)i));
   1305 	return progData;
   1306 }
   1307 
   1308 void BinaryOpCase::setGeneralUniforms (deUint32 program) const
   1309 {
   1310 	const glw::Functions& gl = m_renderCtx.getFunctions();
   1311 	gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
   1312 }
   1313 
   1314 void BinaryOpCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const
   1315 {
   1316 	const glw::Functions& gl = m_renderCtx.getFunctions();
   1317 	gl.uniform1i(gl.getUniformLocation(program, "u_numLoopIterations"), numLoopIterations);
   1318 }
   1319 
   1320 float BinaryOpCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const
   1321 {
   1322 	DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
   1323 
   1324 	const int		baseNumOpsInsideLoop				= 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
   1325 	const int		numOpsInsideLoopInSmallProgram		= baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
   1326 	const int		numOpsInsideLoopInBigProgram		= baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
   1327 	DE_STATIC_ASSERT(numOpsInsideLoopInBigProgram > numOpsInsideLoopInSmallProgram);
   1328 	const int		opDiff								= numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
   1329 	const float		programOperationCostDiff			= perProgramOperationCosts[PROGRAM_WITH_BIGGER_LOOP] - perProgramOperationCosts[PROGRAM_WITH_SMALLER_LOOP];
   1330 
   1331 	return programOperationCostDiff / (float)opDiff;
   1332 }
   1333 
   1334 void BinaryOpCase::logSingleOperationCalculationInfo (void) const
   1335 {
   1336 	const int			baseNumOpsInsideLoop			= 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
   1337 	const int			numOpsInsideLoopInSmallProgram	= baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
   1338 	const int			numOpsInsideLoopInBigProgram	= baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
   1339 	const int			opDiff							= numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
   1340 	const char* const	opName							= m_op == "+" ? "addition"
   1341 														: m_op == "-" ? "subtraction"
   1342 														: m_op == "*" ? "multiplication"
   1343 														: m_op == "/" ? "division"
   1344 														: DE_NULL;
   1345 	DE_ASSERT(opName != DE_NULL);
   1346 
   1347 	m_testCtx.getLog() << TestLog::Message << "Note: the bigger program contains " << opDiff << " more "
   1348 										   << opName << " operations in one loop iteration than the small program; "
   1349 										   << "cost of one operation is calculated as (cost_of_bigger_workload - cost_of_smaller_workload) / " << opDiff
   1350 										   << TestLog::EndMessage;
   1351 }
   1352 
   1353 // Built-in function case.
   1354 class FunctionCase : public OperatorPerformanceCase
   1355 {
   1356 public:
   1357 	enum
   1358 	{
   1359 		MAX_PARAMS = 3
   1360 	};
   1361 
   1362 						FunctionCase			(Context&							context,
   1363 												 const char*						name,
   1364 												 const char*						description,
   1365 												 const char*						func,
   1366 												 glu::DataType						returnType,
   1367 												 const glu::DataType				paramTypes[MAX_PARAMS],
   1368 												 const Vec4&						attribute,
   1369 												 int								modifyParamNdx, //!< Add a compile-time constant (2.0) to the parameter at this index. This is ignored if negative.
   1370 												 bool								useNearlyConstantINputs, //!< Function inputs shouldn't be much bigger than 'attribute'.
   1371 												 glu::Precision						precision,
   1372 												 bool								isVertex,
   1373 												 const InitialCalibrationStorage&	initialCalibration);
   1374 
   1375 protected:
   1376 	vector<ProgramContext>	generateProgramData					(void) const;
   1377 	void					setGeneralUniforms					(deUint32 program) const;
   1378 	void					setWorkloadSizeUniform				(deUint32 program, int numOperations) const;
   1379 	float					computeSingleOperationTime			(const vector<float>& perProgramOperationCosts) const;
   1380 	void					logSingleOperationCalculationInfo	(void) const;
   1381 
   1382 private:
   1383 	enum ProgramID
   1384 	{
   1385 		// \note 0-based sequential numbering is relevant, because these are also used as vector indices.
   1386 		// \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
   1387 		PROGRAM_WITH_FUNCTION_CALLS = 0,
   1388 		PROGRAM_WITHOUT_FUNCTION_CALLS,
   1389 
   1390 		PROGRAM_LAST
   1391 	};
   1392 
   1393 	//! Forms a "sum" expression from aExpr and bExpr; for booleans, this is "equal(a,b)", otherwise actual sum.
   1394 	static string		sumExpr						(const string& aExpr, const string& bExpr, glu::DataType type);
   1395 	//! Forms an expression used to increment an input value in the shader. If type is boolean, this is just
   1396 	//! baseExpr; otherwise, baseExpr is modified by multiplication or division by a loop index,
   1397 	//! to prevent simple compiler optimizations. See m_useNearlyConstantInputs for more explanation.
   1398 	static string		incrementExpr				(const string& baseExpr, glu::DataType type, bool divide);
   1399 
   1400 	ProgramContext		generateSingleProgramData	(ProgramID) const;
   1401 
   1402 	const string			m_func;
   1403 	const glu::DataType		m_returnType;
   1404 	glu::DataType			m_paramTypes[MAX_PARAMS];
   1405 	// \note m_modifyParamNdx, if not negative, specifies the index of the parameter to which a
   1406 	//		 compile-time constant (2.0) is added. This is a quick and dirty way to deal with
   1407 	//		 functions like clamp or smoothstep that require that a certain parameter is
   1408 	//		 greater than a certain other parameter.
   1409 	const int				m_modifyParamNdx;
   1410 	// \note m_useNearlyConstantInputs determines whether the inputs given to the function
   1411 	//		 should increase (w.r.t m_attribute) only by very small amounts. This is relevant
   1412 	//		 for functions like asin, which requires its inputs to be in a specific range.
   1413 	//		 In practice, this affects whether expressions used to increment the input
   1414 	//		 variables use division instead of multiplication; normally, multiplication is used,
   1415 	//		 but it's hard to keep the increments very small that way, and division shouldn't
   1416 	//		 be the default, since for many functions (probably not asin, luckily), division
   1417 	//		 is too heavy and dominates time-wise.
   1418 	const bool				m_useNearlyConstantInputs;
   1419 	const Vec4				m_attribute;
   1420 	const glu::Precision	m_precision;
   1421 };
   1422 
   1423 FunctionCase::FunctionCase (Context&							context,
   1424 							const char*							name,
   1425 							const char*							description,
   1426 							const char*							func,
   1427 							glu::DataType						returnType,
   1428 							const glu::DataType					paramTypes[MAX_PARAMS],
   1429 							const Vec4&							attribute,
   1430 							int									modifyParamNdx,
   1431 							bool								useNearlyConstantInputs,
   1432 							glu::Precision						precision,
   1433 							bool								isVertex,
   1434 							const InitialCalibrationStorage&	initialCalibration)
   1435 	: OperatorPerformanceCase	(context.getTestContext(), context.getRenderContext(), name, description,
   1436 								 isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
   1437 	, m_func					(func)
   1438 	, m_returnType				(returnType)
   1439 	, m_modifyParamNdx			(modifyParamNdx)
   1440 	, m_useNearlyConstantInputs	(useNearlyConstantInputs)
   1441 	, m_attribute				(attribute)
   1442 	, m_precision				(precision)
   1443 {
   1444 	for (int i = 0; i < MAX_PARAMS; i++)
   1445 		m_paramTypes[i] = paramTypes[i];
   1446 }
   1447 
   1448 string FunctionCase::sumExpr (const string& aExpr, const string& bExpr, glu::DataType type)
   1449 {
   1450 	if (glu::isDataTypeBoolOrBVec(type))
   1451 	{
   1452 		if (type == glu::TYPE_BOOL)
   1453 			return "(" + aExpr + " == " + bExpr + ")";
   1454 		else
   1455 			return "equal(" + aExpr + ", " + bExpr + ")";
   1456 	}
   1457 	else
   1458 		return "(" + aExpr + " + " + bExpr + ")";
   1459 }
   1460 
   1461 string FunctionCase::incrementExpr (const string& baseExpr, glu::DataType type, bool divide)
   1462 {
   1463 	const string mulOrDiv = divide ? "/" : "*";
   1464 
   1465 	return glu::isDataTypeBoolOrBVec(type)	? baseExpr
   1466 		 : glu::isDataTypeIntOrIVec(type)	? "(" + baseExpr + mulOrDiv + "(i+1))"
   1467 		 :									  "(" + baseExpr + mulOrDiv + "float(i+1))";
   1468 }
   1469 
   1470 FunctionCase::ProgramContext FunctionCase::generateSingleProgramData (ProgramID programID) const
   1471 {
   1472 	const bool			isVertexCase			= m_caseType == CASETYPE_VERTEX;
   1473 	const char* const	precision				= glu::getPrecisionName(m_precision);
   1474 	const char* const	returnTypeName			= getDataTypeName(m_returnType);
   1475 	const string		returnPrecisionMaybe	= glu::isDataTypeBoolOrBVec(m_returnType) ? "" : string() + precision + " ";
   1476 	const char*			inputPrecision			= DE_NULL;
   1477 	const bool			isMatrixReturn			= isDataTypeMatrix(m_returnType);
   1478 	int					numParams				= 0;
   1479 	const char*			paramTypeNames[MAX_PARAMS];
   1480 	string				paramPrecisionsMaybe[MAX_PARAMS];
   1481 
   1482 	for (int i = 0; i < MAX_PARAMS; i++)
   1483 	{
   1484 		paramTypeNames[i]			= getDataTypeName(m_paramTypes[i]);
   1485 		paramPrecisionsMaybe[i]		= glu::isDataTypeBoolOrBVec(m_paramTypes[i]) ? "" : string() + precision + " ";
   1486 
   1487 		if (inputPrecision == DE_NULL && isDataTypeIntOrIVec(m_paramTypes[i]) && m_precision == glu::PRECISION_LOWP)
   1488 			inputPrecision = "mediump";
   1489 
   1490 		if (m_paramTypes[i] != TYPE_INVALID)
   1491 			numParams = i+1;
   1492 	}
   1493 
   1494 	DE_ASSERT(numParams > 0);
   1495 
   1496 	if (inputPrecision == DE_NULL)
   1497 		inputPrecision = precision;
   1498 
   1499 	int						numAttributes	= FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS + numParams - 1;
   1500 	std::ostringstream		vtx;
   1501 	std::ostringstream		frag;
   1502 	std::ostringstream&		op				= isVertexCase ? vtx : frag;
   1503 
   1504 	vtx << "#version 300 es\n";
   1505 	frag << "#version 300 es\n"
   1506 		 << "layout (location = 0) out mediump vec4 o_color;\n";
   1507 
   1508 	// Attributes.
   1509 	vtx << "in highp vec4 a_position;\n";
   1510 	for (int i = 0; i < numAttributes; i++)
   1511 		vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
   1512 
   1513 	if (isVertexCase)
   1514 	{
   1515 		vtx << "out mediump vec4 v_color;\n";
   1516 		frag << "in mediump vec4 v_color;\n";
   1517 	}
   1518 	else
   1519 	{
   1520 		for (int i = 0; i < numAttributes; i++)
   1521 		{
   1522 			vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
   1523 			frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
   1524 		}
   1525 	}
   1526 
   1527 	op << "uniform mediump int u_numLoopIterations;\n";
   1528 	if (isVertexCase)
   1529 		op << "uniform mediump float u_zero;\n";
   1530 
   1531 	for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
   1532 		op << "uniform " << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " u_inc" << (char)('A'+paramNdx) << ";\n";
   1533 
   1534 	vtx << "\n";
   1535 	vtx << "void main()\n";
   1536 	vtx << "{\n";
   1537 
   1538 	if (!isVertexCase)
   1539 		vtx << "\tgl_Position = a_position;\n";
   1540 
   1541 	frag << "\n";
   1542 	frag << "void main()\n";
   1543 	frag << "{\n";
   1544 
   1545 	// Function call input and return value accumulation variables.
   1546 	{
   1547 		const char* const inPrefix = isVertexCase ? "a_" : "v_";
   1548 
   1549 		for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
   1550 		{
   1551 			for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
   1552 			{
   1553 				const glu::DataType		paramType	= m_paramTypes[paramNdx];
   1554 				const bool				mustCast	= paramType != glu::TYPE_FLOAT_VEC4;
   1555 
   1556 				op << "\t" << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " in" << calcNdx << (char)('a'+paramNdx) << " = ";
   1557 
   1558 				if (mustCast)
   1559 					op << paramTypeNames[paramNdx] << "(";
   1560 
   1561 				if (glu::isDataTypeMatrix(paramType))
   1562 				{
   1563 					static const char* const	swizzles[3]		= { "x", "xy", "xyz" };
   1564 					const int					numRows			= glu::getDataTypeMatrixNumRows(paramType);
   1565 					const int					numCols			= glu::getDataTypeMatrixNumColumns(paramType);
   1566 					const string				swizzle			= numRows < 4 ? string() + "." + swizzles[numRows-1] : "";
   1567 
   1568 					for (int i = 0; i < numCols; i++)
   1569 						op << (i > 0 ? ", " : "") << inPrefix << "in" << calcNdx+paramNdx << swizzle;
   1570 				}
   1571 				else
   1572 				{
   1573 					op << inPrefix << "in" << calcNdx+paramNdx;
   1574 
   1575 					if (paramNdx == m_modifyParamNdx)
   1576 					{
   1577 						DE_ASSERT(glu::isDataTypeFloatOrVec(paramType));
   1578 						op << " + 2.0";
   1579 					}
   1580 				}
   1581 
   1582 				if (mustCast)
   1583 					op << ")";
   1584 
   1585 				op << ";\n";
   1586 			}
   1587 
   1588 			op << "\t" << returnPrecisionMaybe << returnTypeName << " res" << calcNdx << " = " << returnTypeName << "(0);\n";
   1589 		}
   1590 	}
   1591 
   1592 	// Loop with expressions in it.
   1593 	op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
   1594 	op << "\t{\n";
   1595 	for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
   1596 	{
   1597 		if (calcNdx > 0)
   1598 			op << "\n";
   1599 
   1600 		op << "\t\t{\n";
   1601 
   1602 		for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
   1603 		{
   1604 			const string inputName	= "in" + de::toString(calcNdx) + (char)('a'+inputNdx);
   1605 			const string incName	= string() + "u_inc" + (char)('A'+inputNdx);
   1606 			const string incExpr	= incrementExpr(incName, m_paramTypes[inputNdx], m_useNearlyConstantInputs);
   1607 
   1608 			op << "\t\t\t" << inputName << " = " << sumExpr(inputName, incExpr, m_paramTypes[inputNdx]) << ";\n";
   1609 		}
   1610 
   1611 		op << "\t\t\t" << returnPrecisionMaybe << returnTypeName << " eval" << calcNdx << " = ";
   1612 
   1613 		if (programID == PROGRAM_WITH_FUNCTION_CALLS)
   1614 		{
   1615 			op << m_func << "(";
   1616 
   1617 			for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
   1618 			{
   1619 				if (paramNdx > 0)
   1620 					op << ", ";
   1621 
   1622 				op << "in" << calcNdx << (char)('a'+paramNdx);
   1623 			}
   1624 
   1625 			op << ")";
   1626 		}
   1627 		else
   1628 		{
   1629 			DE_ASSERT(programID == PROGRAM_WITHOUT_FUNCTION_CALLS);
   1630 			op << returnTypeName << "(1)";
   1631 		}
   1632 
   1633 		op << ";\n";
   1634 
   1635 		{
   1636 			const string resName	= "res" + de::toString(calcNdx);
   1637 			const string evalName	= "eval" + de::toString(calcNdx);
   1638 			const string incExpr	= incrementExpr(evalName, m_returnType, m_useNearlyConstantInputs);
   1639 
   1640 			op << "\t\t\tres" << calcNdx << " = " << sumExpr(resName, incExpr, m_returnType) << ";\n";
   1641 		}
   1642 
   1643 		op << "\t\t}\n";
   1644 	}
   1645 	op << "\t}\n";
   1646 	op << "\n";
   1647 
   1648 	// Result variables.
   1649 	for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
   1650 	{
   1651 		op << "\t" << paramPrecisionsMaybe[inputNdx] << paramTypeNames[inputNdx] << " sumIn" << (char)('A'+inputNdx) << " = ";
   1652 		{
   1653 			string expr = string() + "in0" + (char)('a'+inputNdx);
   1654 			for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
   1655 				expr = sumExpr(expr, string() + "in" + de::toString(i) + (char)('a'+inputNdx), m_paramTypes[inputNdx]);
   1656 			op << expr;
   1657 		}
   1658 		op << ";\n";
   1659 	}
   1660 
   1661 	op << "\t" << returnPrecisionMaybe << returnTypeName << " sumRes = ";
   1662 	{
   1663 		string expr = "res0";
   1664 		for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
   1665 			expr = sumExpr(expr, "res" + de::toString(i), m_returnType);
   1666 		op << expr;
   1667 	}
   1668 	op << ";\n";
   1669 
   1670 	{
   1671 		glu::DataType finalResultDataType = glu::TYPE_LAST;
   1672 
   1673 		if (glu::isDataTypeMatrix(m_returnType))
   1674 		{
   1675 			finalResultDataType = m_returnType;
   1676 
   1677 			op << "\t" << precision << " " << returnTypeName << " finalRes = ";
   1678 
   1679 			for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
   1680 			{
   1681 				DE_ASSERT(m_paramTypes[inputNdx] == m_returnType);
   1682 				op << "sumIn" << (char)('A'+inputNdx) << " + ";
   1683 			}
   1684 			op << "sumRes;\n";
   1685 		}
   1686 		else
   1687 		{
   1688 			int numFinalResComponents = glu::getDataTypeScalarSize(m_returnType);
   1689 			for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
   1690 				numFinalResComponents = de::max(numFinalResComponents, glu::getDataTypeScalarSize(m_paramTypes[inputNdx]));
   1691 
   1692 			finalResultDataType = getDataTypeFloatOrVec(numFinalResComponents);
   1693 
   1694 			{
   1695 				const string finalResType = glu::getDataTypeName(finalResultDataType);
   1696 				op << "\t" << precision << " " << finalResType << " finalRes = ";
   1697 				for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
   1698 					op << finalResType << "(sumIn" << (char)('A'+inputNdx) << ") + ";
   1699 				op << finalResType << "(sumRes);\n";
   1700 			}
   1701 		}
   1702 
   1703 		// Convert to color.
   1704 		op << "\tmediump vec4 color = ";
   1705 		if (finalResultDataType == TYPE_FLOAT_VEC4)
   1706 			op << "finalRes";
   1707 		else
   1708 		{
   1709 			int size = isMatrixReturn ? getDataTypeMatrixNumRows(finalResultDataType) : getDataTypeScalarSize(finalResultDataType);
   1710 
   1711 			op << "vec4(";
   1712 
   1713 			if (isMatrixReturn)
   1714 			{
   1715 				for (int i = 0; i < getDataTypeMatrixNumColumns(finalResultDataType); i++)
   1716 				{
   1717 					if (i > 0)
   1718 						op << " + ";
   1719 					op << "finalRes[" << i << "]";
   1720 				}
   1721 			}
   1722 			else
   1723 				op << "finalRes";
   1724 
   1725 			for (int i = size; i < 4; i++)
   1726 				op << ", " << (i == 3 ? "1.0" : "0.0");
   1727 
   1728 			op << ")";
   1729 		}
   1730 		op << ";\n";
   1731 		op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
   1732 
   1733 		if (isVertexCase)
   1734 		{
   1735 			vtx << "	gl_Position = a_position + u_zero*color;\n";
   1736 			frag << "	o_color = v_color;\n";
   1737 		}
   1738 		else
   1739 		{
   1740 			for (int i = 0; i < numAttributes; i++)
   1741 				vtx << "	v_in" << i << " = a_in" << i << ";\n";
   1742 		}
   1743 
   1744 		vtx << "}\n";
   1745 		frag << "}\n";
   1746 	}
   1747 
   1748 	{
   1749 		vector<AttribSpec> attributes;
   1750 		for (int i = 0; i < numAttributes; i++)
   1751 			attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
   1752 											m_attribute.swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
   1753 											m_attribute.swizzle((i+1)%4, (i+2)%4, (i+3)%4, (i+0)%4),
   1754 											m_attribute.swizzle((i+2)%4, (i+3)%4, (i+0)%4, (i+1)%4),
   1755 											m_attribute.swizzle((i+3)%4, (i+0)%4, (i+1)%4, (i+2)%4)));
   1756 
   1757 		{
   1758 			string description = "This is the program ";
   1759 
   1760 			description += programID == PROGRAM_WITHOUT_FUNCTION_CALLS	? "without"
   1761 						 : programID == PROGRAM_WITH_FUNCTION_CALLS		? "with"
   1762 						 : DE_NULL;
   1763 
   1764 			description += " '" + m_func + "' function calls.\n"
   1765 						   "Note: workload size for this program means the number of loop iterations.";
   1766 
   1767 			return ProgramContext(vtx.str(), frag.str(), attributes, description);
   1768 		}
   1769 	}
   1770 }
   1771 
   1772 vector<FunctionCase::ProgramContext> FunctionCase::generateProgramData (void) const
   1773 {
   1774 	vector<ProgramContext> progData;
   1775 	for (int i = 0; i < PROGRAM_LAST; i++)
   1776 		progData.push_back(generateSingleProgramData((ProgramID)i));
   1777 	return progData;
   1778 }
   1779 
   1780 void FunctionCase::setGeneralUniforms (deUint32 program) const
   1781 {
   1782 	const glw::Functions& gl = m_renderCtx.getFunctions();
   1783 
   1784 	gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
   1785 
   1786 	for (int paramNdx = 0; paramNdx < MAX_PARAMS; paramNdx++)
   1787 	{
   1788 		if (m_paramTypes[paramNdx] != glu::TYPE_INVALID)
   1789 		{
   1790 			const glu::DataType		paramType	= m_paramTypes[paramNdx];
   1791 			const int				scalarSize	= glu::getDataTypeScalarSize(paramType);
   1792 			const int				location	= gl.getUniformLocation(program, (string() + "u_inc" + (char)('A'+paramNdx)).c_str());
   1793 
   1794 			if (glu::isDataTypeFloatOrVec(paramType))
   1795 			{
   1796 				float values[4];
   1797 				for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
   1798 					values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary small values.
   1799 				uniformNfv(gl, scalarSize, location, 1, &values[0]);
   1800 			}
   1801 			else if (glu::isDataTypeIntOrIVec(paramType))
   1802 			{
   1803 				int values[4];
   1804 				for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
   1805 					values[i] = paramNdx*100 + i; // Arbitrary values.
   1806 				uniformNiv(gl, scalarSize, location, 1, &values[0]);
   1807 			}
   1808 			else if (glu::isDataTypeBoolOrBVec(paramType))
   1809 			{
   1810 				int values[4];
   1811 				for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
   1812 					values[i] = (paramNdx >> i) & 1; // Arbitrary values.
   1813 				uniformNiv(gl, scalarSize, location, 1, &values[0]);
   1814 			}
   1815 			else if (glu::isDataTypeMatrix(paramType))
   1816 			{
   1817 				const int size = glu::getDataTypeMatrixNumRows(paramType);
   1818 				DE_ASSERT(size == glu::getDataTypeMatrixNumColumns(paramType));
   1819 				float values[4*4];
   1820 				for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
   1821 					values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary values.
   1822 				uniformMatrixNfv(gl, size, location, 1, &values[0]);
   1823 			}
   1824 			else
   1825 				DE_ASSERT(false);
   1826 		}
   1827 	}
   1828 }
   1829 
   1830 void FunctionCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const
   1831 {
   1832 	const glw::Functions&	gl		= m_renderCtx.getFunctions();
   1833 	const int				loc		= gl.getUniformLocation(program, "u_numLoopIterations");
   1834 
   1835 	gl.uniform1i(loc, numLoopIterations);
   1836 }
   1837 
   1838 float FunctionCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const
   1839 {
   1840 	DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
   1841 	const int		numFunctionCalls			= FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
   1842 	const float		programOperationCostDiff	= perProgramOperationCosts[PROGRAM_WITH_FUNCTION_CALLS] - perProgramOperationCosts[PROGRAM_WITHOUT_FUNCTION_CALLS];
   1843 
   1844 	return programOperationCostDiff / (float)numFunctionCalls;
   1845 }
   1846 
   1847 void FunctionCase::logSingleOperationCalculationInfo (void) const
   1848 {
   1849 	const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
   1850 
   1851 	m_testCtx.getLog() << TestLog::Message << "Note: program " << (int)PROGRAM_WITH_FUNCTION_CALLS << " contains "
   1852 										   << numFunctionCalls << " calls to '" << m_func << "' in one loop iteration; "
   1853 										   << "cost of one operation is calculated as "
   1854 										   << "(cost_of_workload_with_calls - cost_of_workload_without_calls) / " << numFunctionCalls << TestLog::EndMessage;
   1855 }
   1856 
   1857 } // anonymous
   1858 
   1859 ShaderOperatorTests::ShaderOperatorTests (Context& context)
   1860 	: TestCaseGroup(context, "operator", "Operator Performance Tests")
   1861 {
   1862 }
   1863 
   1864 ShaderOperatorTests::~ShaderOperatorTests (void)
   1865 {
   1866 }
   1867 
   1868 void ShaderOperatorTests::init (void)
   1869 {
   1870 	// Binary operator cases
   1871 
   1872 	static const DataType binaryOpTypes[] =
   1873 	{
   1874 		TYPE_FLOAT,
   1875 		TYPE_FLOAT_VEC2,
   1876 		TYPE_FLOAT_VEC3,
   1877 		TYPE_FLOAT_VEC4,
   1878 		TYPE_INT,
   1879 		TYPE_INT_VEC2,
   1880 		TYPE_INT_VEC3,
   1881 		TYPE_INT_VEC4,
   1882 	};
   1883 	static const Precision precisions[] =
   1884 	{
   1885 		PRECISION_LOWP,
   1886 		PRECISION_MEDIUMP,
   1887 		PRECISION_HIGHP
   1888 	};
   1889 	static const struct
   1890 	{
   1891 		const char*		name;
   1892 		const char*		op;
   1893 		bool			swizzle;
   1894 	} binaryOps[] =
   1895 	{
   1896 		{ "add",		"+",		false	},
   1897 		{ "sub",		"-",		true	},
   1898 		{ "mul",		"*",		false	},
   1899 		{ "div",		"/",		true	}
   1900 	};
   1901 
   1902 	tcu::TestCaseGroup* const binaryOpsGroup = new tcu::TestCaseGroup(m_testCtx, "binary_operator", "Binary Operator Performance Tests");
   1903 	addChild(binaryOpsGroup);
   1904 
   1905 	for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(binaryOps); opNdx++)
   1906 	{
   1907 		tcu::TestCaseGroup* const opGroup = new tcu::TestCaseGroup(m_testCtx, binaryOps[opNdx].name, "");
   1908 		binaryOpsGroup->addChild(opGroup);
   1909 
   1910 		for (int isFrag = 0; isFrag <= 1; isFrag++)
   1911 		{
   1912 			const BinaryOpCase::InitialCalibrationStorage	shaderGroupCalibrationStorage	(new BinaryOpCase::InitialCalibration);
   1913 			const bool										isVertex						= isFrag == 0;
   1914 			tcu::TestCaseGroup* const						shaderGroup						= new tcu::TestCaseGroup(m_testCtx, isVertex ? "vertex" : "fragment", "");
   1915 			opGroup->addChild(shaderGroup);
   1916 
   1917 			for (int typeNdx = 0; typeNdx < DE_LENGTH_OF_ARRAY(binaryOpTypes); typeNdx++)
   1918 			{
   1919 				for (int precNdx = 0; precNdx < DE_LENGTH_OF_ARRAY(precisions); precNdx++)
   1920 				{
   1921 					const DataType		type			= binaryOpTypes[typeNdx];
   1922 					const Precision		precision		= precisions[precNdx];
   1923 					const char* const	op				= binaryOps[opNdx].op;
   1924 					const bool			useSwizzle		= binaryOps[opNdx].swizzle;
   1925 					std::ostringstream	name;
   1926 
   1927 					name << getPrecisionName(precision) << "_" << getDataTypeName(type);
   1928 
   1929 					shaderGroup->addChild(new BinaryOpCase(m_context, name.str().c_str(), "", op, type, precision, useSwizzle, isVertex, shaderGroupCalibrationStorage));
   1930 				}
   1931 			}
   1932 		}
   1933 	}
   1934 
   1935 	// Built-in function cases.
   1936 
   1937 	// Non-specific (i.e. includes gentypes) parameter types for the functions.
   1938 	enum ValueType
   1939 	{
   1940 		VALUE_NONE			= 0,
   1941 		VALUE_FLOAT			= (1<<0),	// float scalar
   1942 		VALUE_FLOAT_VEC		= (1<<1),	// float vector
   1943 		VALUE_FLOAT_VEC34	= (1<<2),	// float vector of size 3 or 4
   1944 		VALUE_FLOAT_GENTYPE	= (1<<3),	// float scalar/vector
   1945 		VALUE_VEC3			= (1<<4),	// vec3 only
   1946 		VALUE_VEC4			= (1<<5),	// vec4 only
   1947 		VALUE_MATRIX		= (1<<6),	// matrix
   1948 		VALUE_BOOL			= (1<<7),	// boolean scalar
   1949 		VALUE_BOOL_VEC		= (1<<8),	// boolean vector
   1950 		VALUE_BOOL_VEC4		= (1<<9),	// bvec4 only
   1951 		VALUE_BOOL_GENTYPE	= (1<<10),	// boolean scalar/vector
   1952 		VALUE_INT			= (1<<11),	// int scalar
   1953 		VALUE_INT_VEC		= (1<<12),	// int vector
   1954 		VALUE_INT_VEC4		= (1<<13),	// ivec4 only
   1955 		VALUE_INT_GENTYPE	= (1<<14),	// int scalar/vector
   1956 
   1957 		// Shorthands.
   1958 		N				= VALUE_NONE,
   1959 		F				= VALUE_FLOAT,
   1960 		FV				= VALUE_FLOAT_VEC,
   1961 		VL				= VALUE_FLOAT_VEC34, // L for "large"
   1962 		GT				= VALUE_FLOAT_GENTYPE,
   1963 		V3				= VALUE_VEC3,
   1964 		V4				= VALUE_VEC4,
   1965 		M				= VALUE_MATRIX,
   1966 		B				= VALUE_BOOL,
   1967 		BV				= VALUE_BOOL_VEC,
   1968 		B4				= VALUE_BOOL_VEC4,
   1969 		BGT				= VALUE_BOOL_GENTYPE,
   1970 		I				= VALUE_INT,
   1971 		IV				= VALUE_INT_VEC,
   1972 		I4				= VALUE_INT_VEC4,
   1973 		IGT				= VALUE_INT_GENTYPE,
   1974 
   1975 		VALUE_ANY_FLOAT			= VALUE_FLOAT		|	VALUE_FLOAT_VEC		|	VALUE_FLOAT_GENTYPE		| VALUE_VEC3 | VALUE_VEC4 | VALUE_FLOAT_VEC34,
   1976 		VALUE_ANY_INT			= VALUE_INT			|	VALUE_INT_VEC		|	VALUE_INT_GENTYPE		| VALUE_INT_VEC4,
   1977 		VALUE_ANY_BOOL			= VALUE_BOOL		|	VALUE_BOOL_VEC		|	VALUE_BOOL_GENTYPE		| VALUE_BOOL_VEC4,
   1978 
   1979 		VALUE_ANY_GENTYPE		= VALUE_FLOAT_VEC	|	VALUE_FLOAT_GENTYPE	|	VALUE_FLOAT_VEC34	|
   1980 								  VALUE_BOOL_VEC	|	VALUE_BOOL_GENTYPE	|
   1981 								  VALUE_INT_VEC		|	VALUE_INT_GENTYPE	|
   1982 								  VALUE_MATRIX
   1983 	};
   1984 	enum PrecisionMask
   1985 	{
   1986 		PRECMASK_NA				= 0,						//!< Precision not applicable (booleans)
   1987 		PRECMASK_LOWP			= (1<<PRECISION_LOWP),
   1988 		PRECMASK_MEDIUMP		= (1<<PRECISION_MEDIUMP),
   1989 		PRECMASK_HIGHP			= (1<<PRECISION_HIGHP),
   1990 
   1991 		PRECMASK_MEDIUMP_HIGHP	= (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP),
   1992 		PRECMASK_ALL			= (1<<PRECISION_LOWP) | (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP)
   1993 	};
   1994 
   1995 	static const DataType floatTypes[] =
   1996 	{
   1997 		TYPE_FLOAT,
   1998 		TYPE_FLOAT_VEC2,
   1999 		TYPE_FLOAT_VEC3,
   2000 		TYPE_FLOAT_VEC4
   2001 	};
   2002 	static const DataType intTypes[] =
   2003 	{
   2004 		TYPE_INT,
   2005 		TYPE_INT_VEC2,
   2006 		TYPE_INT_VEC3,
   2007 		TYPE_INT_VEC4
   2008 	};
   2009 	static const DataType boolTypes[] =
   2010 	{
   2011 		TYPE_BOOL,
   2012 		TYPE_BOOL_VEC2,
   2013 		TYPE_BOOL_VEC3,
   2014 		TYPE_BOOL_VEC4
   2015 	};
   2016 	static const DataType matrixTypes[] =
   2017 	{
   2018 		TYPE_FLOAT_MAT2,
   2019 		TYPE_FLOAT_MAT3,
   2020 		TYPE_FLOAT_MAT4
   2021 	};
   2022 
   2023 	tcu::TestCaseGroup* const angleAndTrigonometryGroup		= new tcu::TestCaseGroup(m_testCtx, "angle_and_trigonometry",	"Built-In Angle and Trigonometry Function Performance Tests");
   2024 	tcu::TestCaseGroup* const exponentialGroup				= new tcu::TestCaseGroup(m_testCtx, "exponential",				"Built-In Exponential Function Performance Tests");
   2025 	tcu::TestCaseGroup* const commonFunctionsGroup			= new tcu::TestCaseGroup(m_testCtx, "common_functions",			"Built-In Common Function Performance Tests");
   2026 	tcu::TestCaseGroup* const geometricFunctionsGroup		= new tcu::TestCaseGroup(m_testCtx, "geometric",				"Built-In Geometric Function Performance Tests");
   2027 	tcu::TestCaseGroup* const matrixFunctionsGroup			= new tcu::TestCaseGroup(m_testCtx, "matrix",					"Built-In Matrix Function Performance Tests");
   2028 	tcu::TestCaseGroup* const floatCompareGroup				= new tcu::TestCaseGroup(m_testCtx, "float_compare",			"Built-In Floating Point Comparison Function Performance Tests");
   2029 	tcu::TestCaseGroup* const intCompareGroup				= new tcu::TestCaseGroup(m_testCtx, "int_compare",				"Built-In Integer Comparison Function Performance Tests");
   2030 	tcu::TestCaseGroup* const boolCompareGroup				= new tcu::TestCaseGroup(m_testCtx, "bool_compare",				"Built-In Boolean Comparison Function Performance Tests");
   2031 
   2032 	addChild(angleAndTrigonometryGroup);
   2033 	addChild(exponentialGroup);
   2034 	addChild(commonFunctionsGroup);
   2035 	addChild(geometricFunctionsGroup);
   2036 	addChild(matrixFunctionsGroup);
   2037 	addChild(floatCompareGroup);
   2038 	addChild(intCompareGroup);
   2039 	addChild(boolCompareGroup);
   2040 
   2041 	// Some attributes to be used as parameters for the functions.
   2042 	const Vec4 attrPos		= Vec4( 2.3f,  1.9f,  0.8f,  0.7f);
   2043 	const Vec4 attrNegPos	= Vec4(-1.3f,  2.5f, -3.5f,	 4.3f);
   2044 	const Vec4 attrSmall	= Vec4(-0.9f,  0.8f, -0.4f,	 0.2f);
   2045 	const Vec4 attrBig		= Vec4( 1.3f,  2.4f,  3.0f,	 4.0f);
   2046 
   2047 	// \todo The following functions and variants are missing, and should be added in the future:
   2048 	//		 - modf (has an output parameter, not currently handled by test code)
   2049 	//		 - functions with uint/uvec* return or parameter types
   2050 	//		 - non-matrix <-> matrix functions (outerProduct etc.)
   2051 	// \note Remember to update test spec when these are added.
   2052 
   2053 	// Function name, return type and parameter type information; also, what attribute should be used in the test.
   2054 	// \note Different versions of the same function (i.e. with the same group name) can be defined by putting them successively in this array.
   2055 	// \note In order to reduce case count and thus total execution time, we don't test all input type combinations for every function.
   2056 	static const struct
   2057 	{
   2058 		tcu::TestCaseGroup*					parentGroup;
   2059 		const char*							groupName;
   2060 		const char*							func;
   2061 		const ValueType						types[FunctionCase::MAX_PARAMS + 1]; // Return type and parameter types, in that order.
   2062 		const Vec4&							attribute;
   2063 		int									modifyParamNdx;
   2064 		bool								useNearlyConstantInputs;
   2065 		bool								booleanCase;
   2066 		PrecisionMask						precMask;
   2067 	} functionCaseGroups[] =
   2068 	{
   2069 		{ angleAndTrigonometryGroup,	"radians",			"radians",			{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2070 		{ angleAndTrigonometryGroup,	"degrees",			"degrees",			{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2071 		{ angleAndTrigonometryGroup,	"sin",				"sin",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2072 		{ angleAndTrigonometryGroup,	"cos",				"cos",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2073 		{ angleAndTrigonometryGroup,	"tan",				"tan",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2074 		{ angleAndTrigonometryGroup,	"asin",				"asin",				{ F,  F,  N,  N  }, attrSmall,		-1, true,	false,	PRECMASK_ALL			},
   2075 		{ angleAndTrigonometryGroup,	"acos",				"acos",				{ F,  F,  N,  N  }, attrSmall,		-1, true,	false,	PRECMASK_ALL			},
   2076 		{ angleAndTrigonometryGroup,	"atan2",			"atan",				{ F,  F,  F,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2077 		{ angleAndTrigonometryGroup,	"atan",				"atan",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2078 		{ angleAndTrigonometryGroup,	"sinh",				"sinh",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2079 		{ angleAndTrigonometryGroup,	"cosh",				"cosh",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2080 		{ angleAndTrigonometryGroup,	"tanh",				"tanh",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2081 		{ angleAndTrigonometryGroup,	"asinh",			"asinh",			{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2082 		{ angleAndTrigonometryGroup,	"acosh",			"acosh",			{ F,  F,  N,  N  }, attrBig,		-1, false,	false,	PRECMASK_ALL			},
   2083 		{ angleAndTrigonometryGroup,	"atanh",			"atanh",			{ F,  F,  N,  N  }, attrSmall,		-1, true,	false,	PRECMASK_ALL			},
   2084 
   2085 		{ exponentialGroup,				"pow",				"pow",				{ F,  F,  F,  N  }, attrPos,		-1, false,	false,	PRECMASK_ALL			},
   2086 		{ exponentialGroup,				"exp",				"exp",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2087 		{ exponentialGroup,				"log",				"log",				{ F,  F,  N,  N  }, attrPos,		-1, false,	false,	PRECMASK_ALL			},
   2088 		{ exponentialGroup,				"exp2",				"exp2",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2089 		{ exponentialGroup,				"log2",				"log2",				{ F,  F,  N,  N  }, attrPos,		-1, false,	false,	PRECMASK_ALL			},
   2090 		{ exponentialGroup,				"sqrt",				"sqrt",				{ F,  F,  N,  N  }, attrPos,		-1, false,	false,	PRECMASK_ALL			},
   2091 		{ exponentialGroup,				"inversesqrt",		"inversesqrt",		{ F,  F,  N,  N  }, attrPos,		-1, false,	false,	PRECMASK_ALL			},
   2092 
   2093 		{ commonFunctionsGroup,			"abs",				"abs",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2094 		{ commonFunctionsGroup,			"abs",				"abs",				{ V4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2095 		{ commonFunctionsGroup,			"sign",				"sign",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2096 		{ commonFunctionsGroup,			"sign",				"sign",				{ V4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2097 		{ commonFunctionsGroup,			"floor",			"floor",			{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2098 		{ commonFunctionsGroup,			"floor",			"floor",			{ V4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2099 		{ commonFunctionsGroup,			"trunc",			"trunc",			{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2100 		{ commonFunctionsGroup,			"trunc",			"trunc",			{ V4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2101 		{ commonFunctionsGroup,			"round",			"round",			{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2102 		{ commonFunctionsGroup,			"round",			"round",			{ V4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2103 		{ commonFunctionsGroup,			"roundEven",		"roundEven",		{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2104 		{ commonFunctionsGroup,			"roundEven",		"roundEven",		{ V4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2105 		{ commonFunctionsGroup,			"ceil",				"ceil",				{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2106 		{ commonFunctionsGroup,			"ceil",				"ceil",				{ V4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2107 		{ commonFunctionsGroup,			"fract",			"fract",			{ F,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2108 		{ commonFunctionsGroup,			"fract",			"fract",			{ V4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2109 		{ commonFunctionsGroup,			"mod",				"mod",				{ GT, GT, GT, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2110 		{ commonFunctionsGroup,			"min",				"min",				{ F,  F,  F,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2111 		{ commonFunctionsGroup,			"min",				"min",				{ V4, V4, V4, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2112 		{ commonFunctionsGroup,			"max",				"max",				{ F,  F,  F,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2113 		{ commonFunctionsGroup,			"max",				"max",				{ V4, V4, V4, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2114 		{ commonFunctionsGroup,			"clamp",			"clamp",			{ F,  F,  F,  F  }, attrSmall,		 2, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2115 		{ commonFunctionsGroup,			"clamp",			"clamp",			{ V4, V4, V4, V4 }, attrSmall,		 2, false,	false,	PRECMASK_ALL			},
   2116 		{ commonFunctionsGroup,			"mix",				"mix",				{ F,  F,  F,  F  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2117 		{ commonFunctionsGroup,			"mix",				"mix",				{ V4, V4, V4, V4 }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2118 		{ commonFunctionsGroup,			"mix",				"mix",				{ F,  F,  F,  B  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2119 		{ commonFunctionsGroup,			"mix",				"mix",				{ V4, V4, V4, B4 }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2120 		{ commonFunctionsGroup,			"step",				"step",				{ F,  F,  F,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2121 		{ commonFunctionsGroup,			"step",				"step",				{ V4, V4, V4, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2122 		{ commonFunctionsGroup,			"smoothstep",		"smoothstep",		{ F,  F,  F,  F  }, attrSmall,		 1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2123 		{ commonFunctionsGroup,			"smoothstep",		"smoothstep",		{ V4, V4, V4, V4 }, attrSmall,		 1, false,	false,	PRECMASK_ALL			},
   2124 		{ commonFunctionsGroup,			"isnan",			"isnan",			{ B,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2125 		{ commonFunctionsGroup,			"isnan",			"isnan",			{ B4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2126 		{ commonFunctionsGroup,			"isinf",			"isinf",			{ B,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2127 		{ commonFunctionsGroup,			"isinf",			"isinf",			{ B4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2128 		{ commonFunctionsGroup,			"floatBitsToInt",	"floatBitsToInt",	{ I,  F,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2129 		{ commonFunctionsGroup,			"floatBitsToInt",	"floatBitsToInt",	{ I4, V4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2130 		{ commonFunctionsGroup,			"intBitsToFloat",	"intBitsToFloat",	{ F,  I,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_MEDIUMP_HIGHP	},
   2131 		{ commonFunctionsGroup,			"intBitsToFloat",	"intBitsToFloat",	{ V4, I4, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2132 
   2133 		{ geometricFunctionsGroup,		"length",			"length",			{ F,  VL, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2134 		{ geometricFunctionsGroup,		"distance",			"distance",			{ F,  VL, VL, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2135 		{ geometricFunctionsGroup,		"dot",				"dot",				{ F,  VL, VL, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2136 		{ geometricFunctionsGroup,		"cross",			"cross",			{ V3, V3, V3, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2137 		{ geometricFunctionsGroup,		"normalize",		"normalize",		{ VL, VL, N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2138 		{ geometricFunctionsGroup,		"faceforward",		"faceforward",		{ VL, VL, VL, VL }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2139 		{ geometricFunctionsGroup,		"reflect",			"reflect",			{ VL, VL, VL, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2140 		{ geometricFunctionsGroup,		"refract",			"refract",			{ VL, VL, VL, F  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2141 
   2142 		{ matrixFunctionsGroup,			"matrixCompMult",	"matrixCompMult",	{ M,  M,  M,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2143 		{ matrixFunctionsGroup,			"transpose",		"transpose",		{ M,  M,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2144 		{ matrixFunctionsGroup,			"inverse",			"inverse",			{ M,  M,  N,  N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2145 
   2146 		{ floatCompareGroup,			"lessThan",			"lessThan",			{ BV, FV, FV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2147 		{ floatCompareGroup,			"lessThanEqual",	"lessThanEqual",	{ BV, FV, FV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2148 		{ floatCompareGroup,			"greaterThan",		"greaterThan",		{ BV, FV, FV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2149 		{ floatCompareGroup,			"greaterThanEqual",	"greaterThanEqual",	{ BV, FV, FV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2150 		{ floatCompareGroup,			"equal",			"equal",			{ BV, FV, FV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2151 		{ floatCompareGroup,			"notEqual",			"notEqual",			{ BV, FV, FV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2152 
   2153 		{ intCompareGroup,				"lessThan",			"lessThan",			{ BV, IV, IV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2154 		{ intCompareGroup,				"lessThanEqual",	"lessThanEqual",	{ BV, IV, IV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2155 		{ intCompareGroup,				"greaterThan",		"greaterThan",		{ BV, IV, IV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2156 		{ intCompareGroup,				"greaterThanEqual",	"greaterThanEqual",	{ BV, IV, IV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2157 		{ intCompareGroup,				"equal",			"equal",			{ BV, IV, IV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2158 		{ intCompareGroup,				"notEqual",			"notEqual",			{ BV, IV, IV, N  }, attrNegPos,		-1, false,	false,	PRECMASK_ALL			},
   2159 
   2160 		{ boolCompareGroup,				"equal",			"equal",			{ BV, BV, BV, N  }, attrNegPos,		-1, false,	true,	PRECMASK_MEDIUMP		},
   2161 		{ boolCompareGroup,				"notEqual",			"notEqual",			{ BV, BV, BV, N  }, attrNegPos,		-1, false,	true,	PRECMASK_MEDIUMP		},
   2162 		{ boolCompareGroup,				"any",				"any",				{ B,  BV, N,  N  }, attrNegPos,		-1, false,	true,	PRECMASK_MEDIUMP		},
   2163 		{ boolCompareGroup,				"all",				"all",				{ B,  BV, N,  N  }, attrNegPos,		-1, false,	true,	PRECMASK_MEDIUMP		},
   2164 		{ boolCompareGroup,				"not",				"not",				{ BV, BV, N,  N  }, attrNegPos,		-1, false,	true,	PRECMASK_MEDIUMP		}
   2165 	};
   2166 
   2167 	// vertexSubGroup and fragmentSubGroup are the groups where the various vertex/fragment cases of a single function are added.
   2168 	// \note These are defined here so that different versions (different entries in the functionCaseGroups array) of the same function can be put in the same group.
   2169 	tcu::TestCaseGroup*							vertexSubGroup		= DE_NULL;
   2170 	tcu::TestCaseGroup*							fragmentSubGroup	= DE_NULL;
   2171 	FunctionCase::InitialCalibrationStorage		vertexSubGroupCalibrationStorage;
   2172 	FunctionCase::InitialCalibrationStorage		fragmentSubGroupCalibrationStorage;
   2173 	for (int funcNdx = 0; funcNdx < DE_LENGTH_OF_ARRAY(functionCaseGroups); funcNdx++)
   2174 	{
   2175 		tcu::TestCaseGroup* const	parentGroup					= functionCaseGroups[funcNdx].parentGroup;
   2176 		const char* const			groupName					= functionCaseGroups[funcNdx].groupName;
   2177 		const char* const			groupFunc					= functionCaseGroups[funcNdx].func;
   2178 		const ValueType* const		funcTypes					= functionCaseGroups[funcNdx].types;
   2179 		const Vec4&					groupAttribute				= functionCaseGroups[funcNdx].attribute;
   2180 		const int					modifyParamNdx				= functionCaseGroups[funcNdx].modifyParamNdx;
   2181 		const bool					useNearlyConstantInputs		= functionCaseGroups[funcNdx].useNearlyConstantInputs;
   2182 		const bool					booleanCase					= functionCaseGroups[funcNdx].booleanCase;
   2183 		const PrecisionMask			precMask					= functionCaseGroups[funcNdx].precMask;
   2184 
   2185 		// If this is a new function and not just a different version of the previously defined function, create a new group.
   2186 		if (funcNdx == 0 || parentGroup != functionCaseGroups[funcNdx-1].parentGroup || string(groupName) != functionCaseGroups[funcNdx-1].groupName)
   2187 		{
   2188 			tcu::TestCaseGroup* const funcGroup = new tcu::TestCaseGroup(m_testCtx, groupName, "");
   2189 			functionCaseGroups[funcNdx].parentGroup->addChild(funcGroup);
   2190 
   2191 			vertexSubGroup		= new tcu::TestCaseGroup(m_testCtx, "vertex", "");
   2192 			fragmentSubGroup	= new tcu::TestCaseGroup(m_testCtx, "fragment", "");
   2193 
   2194 			funcGroup->addChild(vertexSubGroup);
   2195 			funcGroup->addChild(fragmentSubGroup);
   2196 
   2197 			vertexSubGroupCalibrationStorage	= FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
   2198 			fragmentSubGroupCalibrationStorage	= FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
   2199 		}
   2200 
   2201 		DE_ASSERT(vertexSubGroup != DE_NULL);
   2202 		DE_ASSERT(fragmentSubGroup != DE_NULL);
   2203 
   2204 		// Find the type size range of parameters (e.g. from 2 to 4 in case of vectors).
   2205 		int genTypeFirstSize	= 1;
   2206 		int genTypeLastSize		= 1;
   2207 
   2208 		// Find the first return value or parameter with a gentype (if any) and set sizes accordingly.
   2209 		// \note Assumes only matching sizes gentypes are to be found, e.g. no "genType func (vec param)"
   2210 		for (int i = 0; i < FunctionCase::MAX_PARAMS + 1 && genTypeLastSize == 1; i++)
   2211 		{
   2212 			switch (funcTypes[i])
   2213 			{
   2214 				case VALUE_FLOAT_VEC:
   2215 				case VALUE_BOOL_VEC:
   2216 				case VALUE_INT_VEC:			// \note Fall-through.
   2217 					genTypeFirstSize = 2;
   2218 					genTypeLastSize = 4;
   2219 					break;
   2220 				case VALUE_FLOAT_VEC34:
   2221 					genTypeFirstSize = 3;
   2222 					genTypeLastSize = 4;
   2223 					break;
   2224 				case VALUE_FLOAT_GENTYPE:
   2225 				case VALUE_BOOL_GENTYPE:
   2226 				case VALUE_INT_GENTYPE:		// \note Fall-through.
   2227 					genTypeFirstSize = 1;
   2228 					genTypeLastSize = 4;
   2229 					break;
   2230 				case VALUE_MATRIX:
   2231 					genTypeFirstSize = 2;
   2232 					genTypeLastSize = 4;
   2233 					break;
   2234 				// If none of the above, keep looping.
   2235 				default:
   2236 					break;
   2237 			}
   2238 		}
   2239 
   2240 		// Create a case for each possible size of the gentype.
   2241 		for (int curSize = genTypeFirstSize; curSize <= genTypeLastSize; curSize++)
   2242 		{
   2243 			// Determine specific types for return value and the parameters, according to curSize. Non-gentypes not affected by curSize.
   2244 			DataType types[FunctionCase::MAX_PARAMS + 1];
   2245 			for (int i = 0; i < FunctionCase::MAX_PARAMS + 1; i++)
   2246 			{
   2247 				if (funcTypes[i] == VALUE_NONE)
   2248 					types[i] = TYPE_INVALID;
   2249 				else
   2250 				{
   2251 					int isFloat	= funcTypes[i] & VALUE_ANY_FLOAT;
   2252 					int isBool	= funcTypes[i] & VALUE_ANY_BOOL;
   2253 					int isInt	= funcTypes[i] & VALUE_ANY_INT;
   2254 					int isMat	= funcTypes[i] == VALUE_MATRIX;
   2255 					int inSize	= (funcTypes[i] & VALUE_ANY_GENTYPE)	? curSize
   2256 								: funcTypes[i] == VALUE_VEC3			? 3
   2257 								: funcTypes[i] == VALUE_VEC4			? 4
   2258 								: funcTypes[i] == VALUE_BOOL_VEC4		? 4
   2259 								: funcTypes[i] == VALUE_INT_VEC4		? 4
   2260 								: 1;
   2261 					int			typeArrayNdx = isMat ? inSize - 2 : inSize - 1; // \note No matrices of size 1.
   2262 
   2263 					types[i]	= isFloat	? floatTypes[typeArrayNdx]
   2264 								: isBool	? boolTypes[typeArrayNdx]
   2265 								: isInt		? intTypes[typeArrayNdx]
   2266 								: isMat		? matrixTypes[typeArrayNdx]
   2267 								: TYPE_LAST;
   2268 				}
   2269 
   2270 				DE_ASSERT(types[i] != TYPE_LAST);
   2271 			}
   2272 
   2273 			// Array for just the parameter types.
   2274 			DataType paramTypes[FunctionCase::MAX_PARAMS];
   2275 			for (int i = 0; i < FunctionCase::MAX_PARAMS; i++)
   2276 				paramTypes[i] = types[i+1];
   2277 
   2278 			for (int prec = (int)PRECISION_LOWP; prec < (int)PRECISION_LAST; prec++)
   2279 			{
   2280 				if ((precMask & (1 << prec)) == 0)
   2281 					continue;
   2282 
   2283 				const string		precisionPrefix = booleanCase ? "" : (string(getPrecisionName((Precision)prec)) + "_");
   2284 				std::ostringstream	caseName;
   2285 
   2286 				caseName << precisionPrefix;
   2287 
   2288 				// Write the name of each distinct parameter data type into the test case name.
   2289 				for (int i = 1; i < FunctionCase::MAX_PARAMS + 1 && types[i] != TYPE_INVALID; i++)
   2290 				{
   2291 					if (i == 1 || types[i] != types[i-1])
   2292 					{
   2293 						if (i > 1)
   2294 							caseName << "_";
   2295 
   2296 						caseName << getDataTypeName(types[i]);
   2297 					}
   2298 				}
   2299 
   2300 				for (int fragI = 0; fragI <= 1; fragI++)
   2301 				{
   2302 					const bool					vert	= fragI == 0;
   2303 					tcu::TestCaseGroup* const	group	= vert ? vertexSubGroup : fragmentSubGroup;
   2304 					group->addChild	(new FunctionCase(m_context,
   2305 													  caseName.str().c_str(), "",
   2306 													  groupFunc,
   2307 													  types[0], paramTypes,
   2308 													  groupAttribute, modifyParamNdx, useNearlyConstantInputs,
   2309 													  (Precision)prec, vert,
   2310 													  vert ? vertexSubGroupCalibrationStorage : fragmentSubGroupCalibrationStorage));
   2311 				}
   2312 			}
   2313 		}
   2314 	}
   2315 }
   2316 
   2317 } // Performance
   2318 } // gles3
   2319 } // deqp
   2320