Home | History | Annotate | Download | only in ssbo
      1 /*------------------------------------------------------------------------
      2  * Vulkan Conformance Tests
      3  * ------------------------
      4  *
      5  * Copyright (c) 2015 The Khronos Group Inc.
      6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
      7  * Copyright (c) 2016 The Android Open Source Project
      8  *
      9  * Licensed under the Apache License, Version 2.0 (the "License");
     10  * you may not use this file except in compliance with the License.
     11  * You may obtain a copy of the License at
     12  *
     13  *      http://www.apache.org/licenses/LICENSE-2.0
     14  *
     15  * Unless required by applicable law or agreed to in writing, software
     16  * distributed under the License is distributed on an "AS IS" BASIS,
     17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     18  * See the License for the specific language governing permissions and
     19  * limitations under the License.
     20  *
     21  *//*!
     22  * \file
     23  * \brief SSBO layout case.
     24  *//*--------------------------------------------------------------------*/
     25 
     26 #include "vktSSBOLayoutCase.hpp"
     27 #include "gluShaderProgram.hpp"
     28 #include "gluContextInfo.hpp"
     29 #include "gluShaderUtil.hpp"
     30 #include "gluVarType.hpp"
     31 #include "gluVarTypeUtil.hpp"
     32 #include "tcuTestLog.hpp"
     33 #include "deRandom.hpp"
     34 #include "deStringUtil.hpp"
     35 #include "deMemory.h"
     36 #include "deString.h"
     37 #include "deMath.h"
     38 #include "deSharedPtr.hpp"
     39 
     40 #include "vkBuilderUtil.hpp"
     41 #include "vkMemUtil.hpp"
     42 #include "vkPrograms.hpp"
     43 #include "vkQueryUtil.hpp"
     44 #include "vkRef.hpp"
     45 #include "vkRefUtil.hpp"
     46 #include "vkTypeUtil.hpp"
     47 
     48 namespace vkt
     49 {
     50 namespace ssbo
     51 {
     52 
     53 using tcu::TestLog;
     54 using std::string;
     55 using std::vector;
     56 using glu::VarType;
     57 using glu::StructType;
     58 using glu::StructMember;
     59 
     60 struct LayoutFlagsFmt
     61 {
     62 	deUint32 flags;
     63 	LayoutFlagsFmt (deUint32 flags_) : flags(flags_) {}
     64 };
     65 
     66 std::ostream& operator<< (std::ostream& str, const LayoutFlagsFmt& fmt)
     67 {
     68 	static const struct
     69 	{
     70 		deUint32	bit;
     71 		const char*	token;
     72 	} bitDesc[] =
     73 	{
     74 		{ LAYOUT_STD140,		"std140"		},
     75 		{ LAYOUT_STD430,		"std430"		},
     76 		{ LAYOUT_ROW_MAJOR,		"row_major"		},
     77 		{ LAYOUT_COLUMN_MAJOR,	"column_major"	}
     78 	};
     79 
     80 	deUint32 remBits = fmt.flags;
     81 	for (int descNdx = 0; descNdx < DE_LENGTH_OF_ARRAY(bitDesc); descNdx++)
     82 	{
     83 		if (remBits & bitDesc[descNdx].bit)
     84 		{
     85 			if (remBits != fmt.flags)
     86 				str << ", ";
     87 			str << bitDesc[descNdx].token;
     88 			remBits &= ~bitDesc[descNdx].bit;
     89 		}
     90 	}
     91 	DE_ASSERT(remBits == 0);
     92 	return str;
     93 }
     94 
     95 // BufferVar implementation.
     96 
     97 BufferVar::BufferVar (const char* name, const VarType& type, deUint32 flags)
     98 	: m_name	(name)
     99 	, m_type	(type)
    100 	, m_flags	(flags)
    101 	, m_offset	(~0u)
    102 {
    103 }
    104 
    105 // BufferBlock implementation.
    106 
    107 BufferBlock::BufferBlock (const char* blockName)
    108 	: m_blockName		(blockName)
    109 	, m_arraySize		(-1)
    110 	, m_flags			(0)
    111 {
    112 	setArraySize(0);
    113 }
    114 
    115 void BufferBlock::setArraySize (int arraySize)
    116 {
    117 	DE_ASSERT(arraySize >= 0);
    118 	m_lastUnsizedArraySizes.resize(arraySize == 0 ? 1 : arraySize, 0);
    119 	m_arraySize = arraySize;
    120 }
    121 
    122 std::ostream& operator<< (std::ostream& stream, const BlockLayoutEntry& entry)
    123 {
    124 	stream << entry.name << " { name = " << entry.name
    125 		   << ", size = " << entry.size
    126 		   << ", activeVarIndices = [";
    127 
    128 	for (vector<int>::const_iterator i = entry.activeVarIndices.begin(); i != entry.activeVarIndices.end(); i++)
    129 	{
    130 		if (i != entry.activeVarIndices.begin())
    131 			stream << ", ";
    132 		stream << *i;
    133 	}
    134 
    135 	stream << "] }";
    136 	return stream;
    137 }
    138 
    139 static bool isUnsizedArray (const BufferVarLayoutEntry& entry)
    140 {
    141 	DE_ASSERT(entry.arraySize != 0 || entry.topLevelArraySize != 0);
    142 	return entry.arraySize == 0 || entry.topLevelArraySize == 0;
    143 }
    144 
    145 std::ostream& operator<< (std::ostream& stream, const BufferVarLayoutEntry& entry)
    146 {
    147 	stream << entry.name << " { type = " << glu::getDataTypeName(entry.type)
    148 		   << ", blockNdx = " << entry.blockNdx
    149 		   << ", offset = " << entry.offset
    150 		   << ", arraySize = " << entry.arraySize
    151 		   << ", arrayStride = " << entry.arrayStride
    152 		   << ", matrixStride = " << entry.matrixStride
    153 		   << ", topLevelArraySize = " << entry.topLevelArraySize
    154 		   << ", topLevelArrayStride = " << entry.topLevelArrayStride
    155 		   << ", isRowMajor = " << (entry.isRowMajor ? "true" : "false")
    156 		   << " }";
    157 	return stream;
    158 }
    159 
    160 // \todo [2012-01-24 pyry] Speed up lookups using hash.
    161 
    162 int BufferLayout::getVariableIndex (const string& name) const
    163 {
    164 	for (int ndx = 0; ndx < (int)bufferVars.size(); ndx++)
    165 	{
    166 		if (bufferVars[ndx].name == name)
    167 			return ndx;
    168 	}
    169 	return -1;
    170 }
    171 
    172 int BufferLayout::getBlockIndex (const string& name) const
    173 {
    174 	for (int ndx = 0; ndx < (int)blocks.size(); ndx++)
    175 	{
    176 		if (blocks[ndx].name == name)
    177 			return ndx;
    178 	}
    179 	return -1;
    180 }
    181 
    182 // ShaderInterface implementation.
    183 
    184 ShaderInterface::ShaderInterface (void)
    185 {
    186 }
    187 
    188 ShaderInterface::~ShaderInterface (void)
    189 {
    190 	for (std::vector<StructType*>::iterator i = m_structs.begin(); i != m_structs.end(); i++)
    191 		delete *i;
    192 
    193 	for (std::vector<BufferBlock*>::iterator i = m_bufferBlocks.begin(); i != m_bufferBlocks.end(); i++)
    194 		delete *i;
    195 }
    196 
    197 StructType& ShaderInterface::allocStruct (const char* name)
    198 {
    199 	m_structs.reserve(m_structs.size()+1);
    200 	m_structs.push_back(new StructType(name));
    201 	return *m_structs.back();
    202 }
    203 
    204 struct StructNameEquals
    205 {
    206 	std::string name;
    207 
    208 	StructNameEquals (const char* name_) : name(name_) {}
    209 
    210 	bool operator() (const StructType* type) const
    211 	{
    212 		return type->getTypeName() && name == type->getTypeName();
    213 	}
    214 };
    215 
    216 const StructType* ShaderInterface::findStruct (const char* name) const
    217 {
    218 	std::vector<StructType*>::const_iterator pos = std::find_if(m_structs.begin(), m_structs.end(), StructNameEquals(name));
    219 	return pos != m_structs.end() ? *pos : DE_NULL;
    220 }
    221 
    222 void ShaderInterface::getNamedStructs (std::vector<const StructType*>& structs) const
    223 {
    224 	for (std::vector<StructType*>::const_iterator i = m_structs.begin(); i != m_structs.end(); i++)
    225 	{
    226 		if ((*i)->getTypeName() != DE_NULL)
    227 			structs.push_back(*i);
    228 	}
    229 }
    230 
    231 BufferBlock& ShaderInterface::allocBlock (const char* name)
    232 {
    233 	m_bufferBlocks.reserve(m_bufferBlocks.size()+1);
    234 	m_bufferBlocks.push_back(new BufferBlock(name));
    235 	return *m_bufferBlocks.back();
    236 }
    237 
    238 namespace // Utilities
    239 {
    240 // Layout computation.
    241 
    242 int getDataTypeByteSize (glu::DataType type)
    243 {
    244 	return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint32);
    245 }
    246 
    247 int getDataTypeByteAlignment (glu::DataType type)
    248 {
    249 	switch (type)
    250 	{
    251 		case glu::TYPE_FLOAT:
    252 		case glu::TYPE_INT:
    253 		case glu::TYPE_UINT:
    254 		case glu::TYPE_BOOL:		return 1*(int)sizeof(deUint32);
    255 
    256 		case glu::TYPE_FLOAT_VEC2:
    257 		case glu::TYPE_INT_VEC2:
    258 		case glu::TYPE_UINT_VEC2:
    259 		case glu::TYPE_BOOL_VEC2:	return 2*(int)sizeof(deUint32);
    260 
    261 		case glu::TYPE_FLOAT_VEC3:
    262 		case glu::TYPE_INT_VEC3:
    263 		case glu::TYPE_UINT_VEC3:
    264 		case glu::TYPE_BOOL_VEC3:	// Fall-through to vec4
    265 
    266 		case glu::TYPE_FLOAT_VEC4:
    267 		case glu::TYPE_INT_VEC4:
    268 		case glu::TYPE_UINT_VEC4:
    269 		case glu::TYPE_BOOL_VEC4:	return 4*(int)sizeof(deUint32);
    270 
    271 		default:
    272 			DE_ASSERT(false);
    273 			return 0;
    274 	}
    275 }
    276 
    277 static inline int deRoundUp32 (int a, int b)
    278 {
    279 	int d = a/b;
    280 	return d*b == a ? a : (d+1)*b;
    281 }
    282 
    283 int computeStd140BaseAlignment (const VarType& type, deUint32 layoutFlags)
    284 {
    285 	const int vec4Alignment = (int)sizeof(deUint32)*4;
    286 
    287 	if (type.isBasicType())
    288 	{
    289 		glu::DataType basicType = type.getBasicType();
    290 
    291 		if (glu::isDataTypeMatrix(basicType))
    292 		{
    293 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
    294 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
    295 												 : glu::getDataTypeMatrixNumRows(basicType);
    296 			const int	vecAlign	= deAlign32(getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize)), vec4Alignment);
    297 
    298 			return vecAlign;
    299 		}
    300 		else
    301 			return getDataTypeByteAlignment(basicType);
    302 	}
    303 	else if (type.isArrayType())
    304 	{
    305 		int elemAlignment = computeStd140BaseAlignment(type.getElementType(), layoutFlags);
    306 
    307 		// Round up to alignment of vec4
    308 		return deAlign32(elemAlignment, vec4Alignment);
    309 	}
    310 	else
    311 	{
    312 		DE_ASSERT(type.isStructType());
    313 
    314 		int maxBaseAlignment = 0;
    315 
    316 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
    317 			maxBaseAlignment = de::max(maxBaseAlignment, computeStd140BaseAlignment(memberIter->getType(), layoutFlags));
    318 
    319 		return deAlign32(maxBaseAlignment, vec4Alignment);
    320 	}
    321 }
    322 
    323 int computeStd430BaseAlignment (const VarType& type, deUint32 layoutFlags)
    324 {
    325 	// Otherwise identical to std140 except that alignment of structures and arrays
    326 	// are not rounded up to alignment of vec4.
    327 
    328 	if (type.isBasicType())
    329 	{
    330 		glu::DataType basicType = type.getBasicType();
    331 
    332 		if (glu::isDataTypeMatrix(basicType))
    333 		{
    334 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
    335 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
    336 												 : glu::getDataTypeMatrixNumRows(basicType);
    337 			const int	vecAlign	= getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
    338 			return vecAlign;
    339 		}
    340 		else
    341 			return getDataTypeByteAlignment(basicType);
    342 	}
    343 	else if (type.isArrayType())
    344 	{
    345 		return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
    346 	}
    347 	else
    348 	{
    349 		DE_ASSERT(type.isStructType());
    350 
    351 		int maxBaseAlignment = 0;
    352 
    353 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
    354 			maxBaseAlignment = de::max(maxBaseAlignment, computeStd430BaseAlignment(memberIter->getType(), layoutFlags));
    355 
    356 		return maxBaseAlignment;
    357 	}
    358 }
    359 
    360 int computeRelaxedBlockBaseAlignment (const VarType& type, deUint32 layoutFlags)
    361 {
    362 	if (type.isBasicType())
    363 	{
    364 		glu::DataType basicType = type.getBasicType();
    365 
    366 		if (glu::isDataTypeVector(basicType))
    367 			return 4;
    368 
    369 		if (glu::isDataTypeMatrix(basicType))
    370 		{
    371 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
    372 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
    373 												 : glu::getDataTypeMatrixNumRows(basicType);
    374 			const int	vecAlign	= getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
    375 			return vecAlign;
    376 		}
    377 		else
    378 			return getDataTypeByteAlignment(basicType);
    379 	}
    380 	else if (type.isArrayType())
    381 		return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
    382 	else
    383 	{
    384 		DE_ASSERT(type.isStructType());
    385 
    386 		int maxBaseAlignment = 0;
    387 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
    388 			maxBaseAlignment = de::max(maxBaseAlignment, computeRelaxedBlockBaseAlignment(memberIter->getType(), layoutFlags));
    389 
    390 		return maxBaseAlignment;
    391 	}
    392 }
    393 
    394 inline deUint32 mergeLayoutFlags (deUint32 prevFlags, deUint32 newFlags)
    395 {
    396 	const deUint32	packingMask		= LAYOUT_STD430|LAYOUT_STD140|LAYOUT_RELAXED;
    397 	const deUint32	matrixMask		= LAYOUT_ROW_MAJOR|LAYOUT_COLUMN_MAJOR;
    398 
    399 	deUint32 mergedFlags = 0;
    400 
    401 	mergedFlags |= ((newFlags & packingMask)	? newFlags : prevFlags) & packingMask;
    402 	mergedFlags |= ((newFlags & matrixMask)		? newFlags : prevFlags) & matrixMask;
    403 
    404 	return mergedFlags;
    405 }
    406 
    407 template <class T>
    408 bool isPow2(T powerOf2)
    409 {
    410 	if (powerOf2 <= 0)
    411 		return false;
    412 	return (powerOf2 & (powerOf2 - (T)1)) == (T)0;
    413 }
    414 
    415 template <class T>
    416 T roundToPow2(T number, int powerOf2)
    417 {
    418 	DE_ASSERT(isPow2(powerOf2));
    419 	return (number + (T)powerOf2 - (T)1) & (T)(~(powerOf2 - 1));
    420 }
    421 
    422 //! Appends all child elements to layout, returns value that should be appended to offset.
    423 int computeReferenceLayout (
    424 	BufferLayout&		layout,
    425 	int					curBlockNdx,
    426 	int					baseOffset,
    427 	const std::string&	curPrefix,
    428 	const VarType&		type,
    429 	deUint32			layoutFlags)
    430 {
    431 	// Reference layout uses std430 rules by default. std140 rules are
    432 	// choosen only for blocks that have std140 layout.
    433 	const int	baseAlignment		= (layoutFlags & LAYOUT_STD140)  != 0 ? computeStd140BaseAlignment(type, layoutFlags)		:
    434 									  (layoutFlags & LAYOUT_RELAXED) != 0 ? computeRelaxedBlockBaseAlignment(type, layoutFlags)	:
    435 									  computeStd430BaseAlignment(type, layoutFlags);
    436 	int			curOffset			= deAlign32(baseOffset, baseAlignment);
    437 	const int	topLevelArraySize	= 1; // Default values
    438 	const int	topLevelArrayStride	= 0;
    439 
    440 	if (type.isBasicType())
    441 	{
    442 		const glu::DataType		basicType	= type.getBasicType();
    443 		BufferVarLayoutEntry	entry;
    444 
    445 		entry.name					= curPrefix;
    446 		entry.type					= basicType;
    447 		entry.arraySize				= 1;
    448 		entry.arrayStride			= 0;
    449 		entry.matrixStride			= 0;
    450 		entry.topLevelArraySize		= topLevelArraySize;
    451 		entry.topLevelArrayStride	= topLevelArrayStride;
    452 		entry.blockNdx				= curBlockNdx;
    453 
    454 		if (glu::isDataTypeMatrix(basicType))
    455 		{
    456 			// Array of vectors as specified in rules 5 & 7.
    457 			const bool	isRowMajor			= !!(layoutFlags & LAYOUT_ROW_MAJOR);
    458 			const int	numVecs				= isRowMajor ? glu::getDataTypeMatrixNumRows(basicType)
    459 														 : glu::getDataTypeMatrixNumColumns(basicType);
    460 
    461 			entry.offset		= curOffset;
    462 			entry.matrixStride	= baseAlignment;
    463 			entry.isRowMajor	= isRowMajor;
    464 
    465 			curOffset += numVecs*baseAlignment;
    466 		}
    467 		else
    468 		{
    469 			if (glu::isDataTypeVector(basicType) && (getDataTypeByteSize(basicType) <= 16 ? curOffset / 16 != (curOffset +  getDataTypeByteSize(basicType) - 1) / 16 : curOffset % 16 != 0) && (layoutFlags & LAYOUT_RELAXED))
    470 				curOffset = roundToPow2(curOffset, 16);
    471 
    472 			// Scalar or vector.
    473 			entry.offset = curOffset;
    474 
    475 			curOffset += getDataTypeByteSize(basicType);
    476 		}
    477 
    478 		layout.bufferVars.push_back(entry);
    479 	}
    480 	else if (type.isArrayType())
    481 	{
    482 		const VarType&	elemType	= type.getElementType();
    483 
    484 		if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
    485 		{
    486 			// Array of scalars or vectors.
    487 			const glu::DataType		elemBasicType	= elemType.getBasicType();
    488 			const int				stride			= baseAlignment;
    489 			BufferVarLayoutEntry	entry;
    490 
    491 			entry.name					= curPrefix + "[0]"; // Array variables are always postfixed with [0]
    492 			entry.type					= elemBasicType;
    493 			entry.blockNdx				= curBlockNdx;
    494 			entry.offset				= curOffset;
    495 			entry.arraySize				= type.getArraySize();
    496 			entry.arrayStride			= stride;
    497 			entry.matrixStride			= 0;
    498 			entry.topLevelArraySize		= topLevelArraySize;
    499 			entry.topLevelArrayStride	= topLevelArrayStride;
    500 
    501 			curOffset += stride*type.getArraySize();
    502 
    503 			layout.bufferVars.push_back(entry);
    504 		}
    505 		else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
    506 		{
    507 			// Array of matrices.
    508 			const glu::DataType			elemBasicType	= elemType.getBasicType();
    509 			const bool					isRowMajor		= !!(layoutFlags & LAYOUT_ROW_MAJOR);
    510 			const int					numVecs			= isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
    511 																	 : glu::getDataTypeMatrixNumColumns(elemBasicType);
    512 			const int					vecStride		= baseAlignment;
    513 			BufferVarLayoutEntry		entry;
    514 
    515 			entry.name					= curPrefix + "[0]"; // Array variables are always postfixed with [0]
    516 			entry.type					= elemBasicType;
    517 			entry.blockNdx				= curBlockNdx;
    518 			entry.offset				= curOffset;
    519 			entry.arraySize				= type.getArraySize();
    520 			entry.arrayStride			= vecStride*numVecs;
    521 			entry.matrixStride			= vecStride;
    522 			entry.isRowMajor			= isRowMajor;
    523 			entry.topLevelArraySize		= topLevelArraySize;
    524 			entry.topLevelArrayStride	= topLevelArrayStride;
    525 
    526 			curOffset += numVecs*vecStride*type.getArraySize();
    527 
    528 			layout.bufferVars.push_back(entry);
    529 		}
    530 		else
    531 		{
    532 			DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
    533 
    534 			for (int elemNdx = 0; elemNdx < type.getArraySize(); elemNdx++)
    535 				curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "[" + de::toString(elemNdx) + "]", type.getElementType(), layoutFlags);
    536 		}
    537 	}
    538 	else
    539 	{
    540 		DE_ASSERT(type.isStructType());
    541 
    542 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
    543 			curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "." + memberIter->getName(), memberIter->getType(), layoutFlags);
    544 
    545 		curOffset = deAlign32(curOffset, baseAlignment);
    546 	}
    547 
    548 	return curOffset-baseOffset;
    549 }
    550 
    551 //! Appends all child elements to layout, returns offset increment.
    552 int computeReferenceLayout (BufferLayout& layout, int curBlockNdx, const std::string& blockPrefix, int baseOffset, const BufferVar& bufVar, deUint32 blockLayoutFlags)
    553 {
    554 	const VarType&	varType			= bufVar.getType();
    555 	const deUint32	combinedFlags	= mergeLayoutFlags(blockLayoutFlags, bufVar.getFlags());
    556 
    557 	if (varType.isArrayType())
    558 	{
    559 		// Top-level arrays need special care.
    560 		const int		topLevelArraySize	= varType.getArraySize() == VarType::UNSIZED_ARRAY ? 0 : varType.getArraySize();
    561 		const string	prefix				= blockPrefix + bufVar.getName() + "[0]";
    562 		const bool		isStd140			= (blockLayoutFlags & LAYOUT_STD140) != 0;
    563 		const int		vec4Align			= (int)sizeof(deUint32)*4;
    564 		const int		baseAlignment		= isStd140									? computeStd140BaseAlignment(varType, combinedFlags)		:
    565 											(blockLayoutFlags & LAYOUT_RELAXED) != 0	? computeRelaxedBlockBaseAlignment(varType, combinedFlags)	:
    566 											computeStd430BaseAlignment(varType, combinedFlags);
    567 		int				curOffset			= deAlign32(baseOffset, baseAlignment);
    568 		const VarType&	elemType			= varType.getElementType();
    569 
    570 		if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
    571 		{
    572 			// Array of scalars or vectors.
    573 			const glu::DataType		elemBasicType	= elemType.getBasicType();
    574 			const int				elemBaseAlign	= getDataTypeByteAlignment(elemBasicType);
    575 			const int				stride			= isStd140 ? deAlign32(elemBaseAlign, vec4Align) : elemBaseAlign;
    576 			BufferVarLayoutEntry	entry;
    577 
    578 			entry.name					= prefix;
    579 			entry.topLevelArraySize		= 1;
    580 			entry.topLevelArrayStride	= 0;
    581 			entry.type					= elemBasicType;
    582 			entry.blockNdx				= curBlockNdx;
    583 			entry.offset				= curOffset;
    584 			entry.arraySize				= topLevelArraySize;
    585 			entry.arrayStride			= stride;
    586 			entry.matrixStride			= 0;
    587 
    588 			layout.bufferVars.push_back(entry);
    589 
    590 			curOffset += stride*topLevelArraySize;
    591 		}
    592 		else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
    593 		{
    594 			// Array of matrices.
    595 			const glu::DataType		elemBasicType	= elemType.getBasicType();
    596 			const bool				isRowMajor		= !!(combinedFlags & LAYOUT_ROW_MAJOR);
    597 			const int				vecSize			= isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType)
    598 																 : glu::getDataTypeMatrixNumRows(elemBasicType);
    599 			const int				numVecs			= isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
    600 																 : glu::getDataTypeMatrixNumColumns(elemBasicType);
    601 			const glu::DataType		vecType			= glu::getDataTypeFloatVec(vecSize);
    602 			const int				vecBaseAlign	= getDataTypeByteAlignment(vecType);
    603 			const int				stride			= isStd140 ? deAlign32(vecBaseAlign, vec4Align) : vecBaseAlign;
    604 			BufferVarLayoutEntry	entry;
    605 
    606 			entry.name					= prefix;
    607 			entry.topLevelArraySize		= 1;
    608 			entry.topLevelArrayStride	= 0;
    609 			entry.type					= elemBasicType;
    610 			entry.blockNdx				= curBlockNdx;
    611 			entry.offset				= curOffset;
    612 			entry.arraySize				= topLevelArraySize;
    613 			entry.arrayStride			= stride*numVecs;
    614 			entry.matrixStride			= stride;
    615 			entry.isRowMajor			= isRowMajor;
    616 
    617 			layout.bufferVars.push_back(entry);
    618 
    619 			curOffset += stride*numVecs*topLevelArraySize;
    620 		}
    621 		else
    622 		{
    623 			DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
    624 
    625 			// Struct base alignment is not added multiple times as curOffset supplied to computeReferenceLayout
    626 			// was already aligned correctly. Thus computeReferenceLayout should not add any extra padding
    627 			// before struct. Padding after struct will be added as it should.
    628 			//
    629 			// Stride could be computed prior to creating child elements, but it would essentially require running
    630 			// the layout computation twice. Instead we fix stride to child elements afterwards.
    631 
    632 			const int	firstChildNdx	= (int)layout.bufferVars.size();
    633 			const int	stride			= computeReferenceLayout(layout, curBlockNdx, curOffset, prefix, varType.getElementType(), combinedFlags);
    634 
    635 			for (int childNdx = firstChildNdx; childNdx < (int)layout.bufferVars.size(); childNdx++)
    636 			{
    637 				layout.bufferVars[childNdx].topLevelArraySize	= topLevelArraySize;
    638 				layout.bufferVars[childNdx].topLevelArrayStride	= stride;
    639 			}
    640 
    641 			curOffset += stride*topLevelArraySize;
    642 		}
    643 
    644 		return curOffset-baseOffset;
    645 	}
    646 	else
    647 		return computeReferenceLayout(layout, curBlockNdx, baseOffset, blockPrefix + bufVar.getName(), varType, combinedFlags);
    648 }
    649 
    650 void computeReferenceLayout (BufferLayout& layout, ShaderInterface& interface)
    651 {
    652 	int numBlocks = interface.getNumBlocks();
    653 
    654 	for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
    655 	{
    656 		BufferBlock&		block			= interface.getBlock(blockNdx);
    657 		bool				hasInstanceName	= block.getInstanceName() != DE_NULL;
    658 		std::string			blockPrefix		= hasInstanceName ? (std::string(block.getBlockName()) + ".") : std::string("");
    659 		int					curOffset		= 0;
    660 		int					activeBlockNdx	= (int)layout.blocks.size();
    661 		int					firstVarNdx		= (int)layout.bufferVars.size();
    662 
    663 		size_t oldSize	= layout.bufferVars.size();
    664 		for (BufferBlock::iterator varIter = block.begin(); varIter != block.end(); varIter++)
    665 		{
    666 			BufferVar& bufVar = *varIter;
    667 			curOffset += computeReferenceLayout(layout, activeBlockNdx,  blockPrefix, curOffset, bufVar, block.getFlags());
    668 			if (block.getFlags() & LAYOUT_RELAXED)
    669 			{
    670 				DE_ASSERT(!(layout.bufferVars.size() <= oldSize));
    671 				bufVar.setOffset(layout.bufferVars[oldSize].offset);
    672 			}
    673 			oldSize	= layout.bufferVars.size();
    674 		}
    675 
    676 		int	varIndicesEnd	= (int)layout.bufferVars.size();
    677 		int	blockSize		= curOffset;
    678 		int	numInstances	= block.isArray() ? block.getArraySize() : 1;
    679 
    680 		// Create block layout entries for each instance.
    681 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
    682 		{
    683 			// Allocate entry for instance.
    684 			layout.blocks.push_back(BlockLayoutEntry());
    685 			BlockLayoutEntry& blockEntry = layout.blocks.back();
    686 
    687 			blockEntry.name = block.getBlockName();
    688 			blockEntry.size = blockSize;
    689 
    690 			// Compute active variable set for block.
    691 			for (int varNdx = firstVarNdx; varNdx < varIndicesEnd; varNdx++)
    692 				blockEntry.activeVarIndices.push_back(varNdx);
    693 
    694 			if (block.isArray())
    695 				blockEntry.name += "[" + de::toString(instanceNdx) + "]";
    696 		}
    697 	}
    698 }
    699 
    700 // Value generator.
    701 
    702 void generateValue (const BufferVarLayoutEntry& entry, int unsizedArraySize, void* basePtr, de::Random& rnd)
    703 {
    704 	const glu::DataType	scalarType		= glu::getDataTypeScalarType(entry.type);
    705 	const int			scalarSize		= glu::getDataTypeScalarSize(entry.type);
    706 	const int			arraySize		= entry.arraySize == 0 ? unsizedArraySize : entry.arraySize;
    707 	const int			arrayStride		= entry.arrayStride;
    708 	const int			topLevelSize	= entry.topLevelArraySize == 0 ? unsizedArraySize : entry.topLevelArraySize;
    709 	const int			topLevelStride	= entry.topLevelArrayStride;
    710 	const bool			isMatrix		= glu::isDataTypeMatrix(entry.type);
    711 	const int			numVecs			= isMatrix ? (entry.isRowMajor ? glu::getDataTypeMatrixNumRows(entry.type) : glu::getDataTypeMatrixNumColumns(entry.type)) : 1;
    712 	const int			vecSize			= scalarSize / numVecs;
    713 	const int			compSize		= sizeof(deUint32);
    714 
    715 	DE_ASSERT(scalarSize%numVecs == 0);
    716 	DE_ASSERT(topLevelSize >= 0);
    717 	DE_ASSERT(arraySize >= 0);
    718 
    719 	for (int topElemNdx = 0; topElemNdx < topLevelSize; topElemNdx++)
    720 	{
    721 		deUint8* const topElemPtr = (deUint8*)basePtr + entry.offset + topElemNdx*topLevelStride;
    722 
    723 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
    724 		{
    725 			deUint8* const elemPtr = topElemPtr + elemNdx*arrayStride;
    726 
    727 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
    728 			{
    729 				deUint8* const vecPtr = elemPtr + (isMatrix ? vecNdx*entry.matrixStride : 0);
    730 
    731 				for (int compNdx = 0; compNdx < vecSize; compNdx++)
    732 				{
    733 					deUint8* const compPtr = vecPtr + compSize*compNdx;
    734 
    735 					switch (scalarType)
    736 					{
    737 						case glu::TYPE_FLOAT:	*((float*)compPtr)		= (float)rnd.getInt(-9, 9);						break;
    738 						case glu::TYPE_INT:		*((int*)compPtr)		= rnd.getInt(-9, 9);							break;
    739 						case glu::TYPE_UINT:	*((deUint32*)compPtr)	= (deUint32)rnd.getInt(0, 9);					break;
    740 						// \note Random bit pattern is used for true values. Spec states that all non-zero values are
    741 						//       interpreted as true but some implementations fail this.
    742 						case glu::TYPE_BOOL:	*((deUint32*)compPtr)	= rnd.getBool() ? rnd.getUint32()|1u : 0u;		break;
    743 						default:
    744 							DE_ASSERT(false);
    745 					}
    746 				}
    747 			}
    748 		}
    749 	}
    750 }
    751 
    752 void generateValues (const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, deUint32 seed)
    753 {
    754 	de::Random	rnd			(seed);
    755 	const int	numBlocks	= (int)layout.blocks.size();
    756 
    757 	DE_ASSERT(numBlocks == (int)blockPointers.size());
    758 
    759 	for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
    760 	{
    761 		const BlockLayoutEntry&	blockLayout	= layout.blocks[blockNdx];
    762 		const BlockDataPtr&		blockPtr	= blockPointers[blockNdx];
    763 		const int				numEntries	= (int)layout.blocks[blockNdx].activeVarIndices.size();
    764 
    765 		for (int entryNdx = 0; entryNdx < numEntries; entryNdx++)
    766 		{
    767 			const int					varNdx		= blockLayout.activeVarIndices[entryNdx];
    768 			const BufferVarLayoutEntry&	varEntry	= layout.bufferVars[varNdx];
    769 
    770 			generateValue(varEntry, blockPtr.lastUnsizedArraySize, blockPtr.ptr, rnd);
    771 		}
    772 	}
    773 }
    774 
    775 // Shader generator.
    776 
    777 const char* getCompareFuncForType (glu::DataType type)
    778 {
    779 	switch (type)
    780 	{
    781 		case glu::TYPE_FLOAT:			return "bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }\n";
    782 		case glu::TYPE_FLOAT_VEC2:		return "bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }\n";
    783 		case glu::TYPE_FLOAT_VEC3:		return "bool compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }\n";
    784 		case glu::TYPE_FLOAT_VEC4:		return "bool compare_vec4     (highp vec4 a, highp vec4 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }\n";
    785 		case glu::TYPE_FLOAT_MAT2:		return "bool compare_mat2     (highp mat2 a, highp mat2 b)    { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); }\n";
    786 		case glu::TYPE_FLOAT_MAT2X3:	return "bool compare_mat2x3   (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1]); }\n";
    787 		case glu::TYPE_FLOAT_MAT2X4:	return "bool compare_mat2x4   (highp mat2x4 a, highp mat2x4 b){ return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1]); }\n";
    788 		case glu::TYPE_FLOAT_MAT3X2:	return "bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }\n";
    789 		case glu::TYPE_FLOAT_MAT3:		return "bool compare_mat3     (highp mat3 a, highp mat3 b)    { return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2]); }\n";
    790 		case glu::TYPE_FLOAT_MAT3X4:	return "bool compare_mat3x4   (highp mat3x4 a, highp mat3x4 b){ return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2]); }\n";
    791 		case glu::TYPE_FLOAT_MAT4X2:	return "bool compare_mat4x2   (highp mat4x2 a, highp mat4x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2])&&compare_vec2(a[3], b[3]); }\n";
    792 		case glu::TYPE_FLOAT_MAT4X3:	return "bool compare_mat4x3   (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); }\n";
    793 		case glu::TYPE_FLOAT_MAT4:		return "bool compare_mat4     (highp mat4 a, highp mat4 b)    { return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2])&&compare_vec4(a[3], b[3]); }\n";
    794 		case glu::TYPE_INT:				return "bool compare_int      (highp int a, highp int b)      { return a == b; }\n";
    795 		case glu::TYPE_INT_VEC2:		return "bool compare_ivec2    (highp ivec2 a, highp ivec2 b)  { return a == b; }\n";
    796 		case glu::TYPE_INT_VEC3:		return "bool compare_ivec3    (highp ivec3 a, highp ivec3 b)  { return a == b; }\n";
    797 		case glu::TYPE_INT_VEC4:		return "bool compare_ivec4    (highp ivec4 a, highp ivec4 b)  { return a == b; }\n";
    798 		case glu::TYPE_UINT:			return "bool compare_uint     (highp uint a, highp uint b)    { return a == b; }\n";
    799 		case glu::TYPE_UINT_VEC2:		return "bool compare_uvec2    (highp uvec2 a, highp uvec2 b)  { return a == b; }\n";
    800 		case glu::TYPE_UINT_VEC3:		return "bool compare_uvec3    (highp uvec3 a, highp uvec3 b)  { return a == b; }\n";
    801 		case glu::TYPE_UINT_VEC4:		return "bool compare_uvec4    (highp uvec4 a, highp uvec4 b)  { return a == b; }\n";
    802 		case glu::TYPE_BOOL:			return "bool compare_bool     (bool a, bool b)                { return a == b; }\n";
    803 		case glu::TYPE_BOOL_VEC2:		return "bool compare_bvec2    (bvec2 a, bvec2 b)              { return a == b; }\n";
    804 		case glu::TYPE_BOOL_VEC3:		return "bool compare_bvec3    (bvec3 a, bvec3 b)              { return a == b; }\n";
    805 		case glu::TYPE_BOOL_VEC4:		return "bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }\n";
    806 		default:
    807 			DE_ASSERT(false);
    808 			return DE_NULL;
    809 	}
    810 }
    811 
    812 void getCompareDependencies (std::set<glu::DataType>& compareFuncs, glu::DataType basicType)
    813 {
    814 	switch (basicType)
    815 	{
    816 		case glu::TYPE_FLOAT_VEC2:
    817 		case glu::TYPE_FLOAT_VEC3:
    818 		case glu::TYPE_FLOAT_VEC4:
    819 			compareFuncs.insert(glu::TYPE_FLOAT);
    820 			compareFuncs.insert(basicType);
    821 			break;
    822 
    823 		case glu::TYPE_FLOAT_MAT2:
    824 		case glu::TYPE_FLOAT_MAT2X3:
    825 		case glu::TYPE_FLOAT_MAT2X4:
    826 		case glu::TYPE_FLOAT_MAT3X2:
    827 		case glu::TYPE_FLOAT_MAT3:
    828 		case glu::TYPE_FLOAT_MAT3X4:
    829 		case glu::TYPE_FLOAT_MAT4X2:
    830 		case glu::TYPE_FLOAT_MAT4X3:
    831 		case glu::TYPE_FLOAT_MAT4:
    832 			compareFuncs.insert(glu::TYPE_FLOAT);
    833 			compareFuncs.insert(glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType)));
    834 			compareFuncs.insert(basicType);
    835 			break;
    836 
    837 		default:
    838 			compareFuncs.insert(basicType);
    839 			break;
    840 	}
    841 }
    842 
    843 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const VarType& type)
    844 {
    845 	if (type.isStructType())
    846 	{
    847 		for (StructType::ConstIterator iter = type.getStructPtr()->begin(); iter != type.getStructPtr()->end(); ++iter)
    848 			collectUniqueBasicTypes(basicTypes, iter->getType());
    849 	}
    850 	else if (type.isArrayType())
    851 		collectUniqueBasicTypes(basicTypes, type.getElementType());
    852 	else
    853 	{
    854 		DE_ASSERT(type.isBasicType());
    855 		basicTypes.insert(type.getBasicType());
    856 	}
    857 }
    858 
    859 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const BufferBlock& bufferBlock)
    860 {
    861 	for (BufferBlock::const_iterator iter = bufferBlock.begin(); iter != bufferBlock.end(); ++iter)
    862 		collectUniqueBasicTypes(basicTypes, iter->getType());
    863 }
    864 
    865 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const ShaderInterface& interface)
    866 {
    867 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
    868 		collectUniqueBasicTypes(basicTypes, interface.getBlock(ndx));
    869 }
    870 
    871 void generateCompareFuncs (std::ostream& str, const ShaderInterface& interface)
    872 {
    873 	std::set<glu::DataType> types;
    874 	std::set<glu::DataType> compareFuncs;
    875 
    876 	// Collect unique basic types
    877 	collectUniqueBasicTypes(types, interface);
    878 
    879 	// Set of compare functions required
    880 	for (std::set<glu::DataType>::const_iterator iter = types.begin(); iter != types.end(); ++iter)
    881 	{
    882 		getCompareDependencies(compareFuncs, *iter);
    883 	}
    884 
    885 	for (int type = 0; type < glu::TYPE_LAST; ++type)
    886 	{
    887 		if (compareFuncs.find(glu::DataType(type)) != compareFuncs.end())
    888 			str << getCompareFuncForType(glu::DataType(type));
    889 	}
    890 }
    891 
    892 bool usesRelaxedLayout (const ShaderInterface& interface)
    893 {
    894 	//If any of blocks has LAYOUT_RELAXED flag
    895 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
    896 	{
    897 		if (interface.getBlock(ndx).getFlags() & LAYOUT_RELAXED)
    898 			return true;
    899 	}
    900 	return false;
    901 }
    902 
    903 struct Indent
    904 {
    905 	int level;
    906 	Indent (int level_) : level(level_) {}
    907 };
    908 
    909 std::ostream& operator<< (std::ostream& str, const Indent& indent)
    910 {
    911 	for (int i = 0; i < indent.level; i++)
    912 		str << "\t";
    913 	return str;
    914 }
    915 
    916 void generateDeclaration (std::ostream& src, const BufferVar& bufferVar, int indentLevel)
    917 {
    918 	// \todo [pyry] Qualifiers
    919 	if ((bufferVar.getFlags() & LAYOUT_MASK) != 0)
    920 		src << "layout(" << LayoutFlagsFmt(bufferVar.getFlags() & LAYOUT_MASK) << ") ";
    921 	else if (bufferVar.getOffset()!= ~0u)
    922 		src << "layout(offset = "<<bufferVar.getOffset()<<") ";
    923 
    924 	src << glu::declare(bufferVar.getType(), bufferVar.getName(), indentLevel);
    925 }
    926 
    927 void generateDeclaration (std::ostream& src, const BufferBlock& block, int bindingPoint)
    928 {
    929 	src << "layout(";
    930 	if ((block.getFlags() & LAYOUT_MASK) != 0)
    931 		src << LayoutFlagsFmt(block.getFlags() & LAYOUT_MASK) << ", ";
    932 
    933 	src << "binding = " << bindingPoint;
    934 
    935 	src << ") ";
    936 
    937 	src << "buffer " << block.getBlockName();
    938 	src << "\n{\n";
    939 
    940 	for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
    941 	{
    942 		src << Indent(1);
    943 
    944 		generateDeclaration(src, *varIter, 1 /* indent level */);
    945 		src << ";\n";
    946 	}
    947 
    948 	src << "}";
    949 
    950 	if (block.getInstanceName() != DE_NULL)
    951 	{
    952 		src << " " << block.getInstanceName();
    953 		if (block.isArray())
    954 			src << "[" << block.getArraySize() << "]";
    955 	}
    956 	else
    957 		DE_ASSERT(!block.isArray());
    958 
    959 	src << ";\n";
    960 }
    961 
    962 void generateImmMatrixSrc (std::ostream& src, glu::DataType basicType, int matrixStride, bool isRowMajor, const void* valuePtr)
    963 {
    964 	DE_ASSERT(glu::isDataTypeMatrix(basicType));
    965 
    966 	const int		compSize		= sizeof(deUint32);
    967 	const int		numRows			= glu::getDataTypeMatrixNumRows(basicType);
    968 	const int		numCols			= glu::getDataTypeMatrixNumColumns(basicType);
    969 
    970 	src << glu::getDataTypeName(basicType) << "(";
    971 
    972 	// Constructed in column-wise order.
    973 	for (int colNdx = 0; colNdx < numCols; colNdx++)
    974 	{
    975 		for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
    976 		{
    977 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? rowNdx*matrixStride + colNdx*compSize
    978 																				: colNdx*matrixStride + rowNdx*compSize);
    979 
    980 			if (colNdx > 0 || rowNdx > 0)
    981 				src << ", ";
    982 
    983 			src << de::floatToString(*((const float*)compPtr), 1);
    984 		}
    985 	}
    986 
    987 	src << ")";
    988 }
    989 
    990 void generateImmMatrixSrc (std::ostream& src,
    991 						   glu::DataType basicType,
    992 						   int matrixStride,
    993 						   bool isRowMajor,
    994 						   const void* valuePtr,
    995 						   const char* resultVar,
    996 						   const char* typeName,
    997 						   const string shaderName)
    998 {
    999 	const int		compSize		= sizeof(deUint32);
   1000 	const int		numRows			= glu::getDataTypeMatrixNumRows(basicType);
   1001 	const int		numCols			= glu::getDataTypeMatrixNumColumns(basicType);
   1002 
   1003 	typeName = "float";
   1004 	for (int colNdex = 0; colNdex < numCols; colNdex++)
   1005 	{
   1006 		for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
   1007 		{
   1008 			src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << "[" << colNdex << "][" << rowNdex << "], ";
   1009 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? rowNdex*matrixStride + colNdex*compSize
   1010 																						: colNdex*matrixStride + rowNdex*compSize);
   1011 
   1012 			src << de::floatToString(*((const float*)compPtr), 1);
   1013 			src << ");\n";
   1014 		}
   1015 	}
   1016 
   1017 	typeName = "vec";
   1018 	for (int colNdex = 0; colNdex < numCols; colNdex++)
   1019 	{
   1020 		src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << numRows << "(" << shaderName << "[" << colNdex << "], " << typeName << numRows << "(";
   1021 		for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
   1022 		{
   1023 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? (rowNdex * matrixStride + colNdex * compSize)
   1024 																  : (colNdex * matrixStride + rowNdex * compSize));
   1025 			src << de::floatToString(*((const float*)compPtr), 1);
   1026 
   1027 			if (rowNdex < numRows-1)
   1028 				src << ", ";
   1029 		}
   1030 		src << "));\n";
   1031 	}
   1032 }
   1033 
   1034 void generateImmScalarVectorSrc (std::ostream& src, glu::DataType basicType, const void* valuePtr)
   1035 {
   1036 	DE_ASSERT(glu::isDataTypeFloatOrVec(basicType)	||
   1037 			  glu::isDataTypeIntOrIVec(basicType)	||
   1038 			  glu::isDataTypeUintOrUVec(basicType)	||
   1039 			  glu::isDataTypeBoolOrBVec(basicType));
   1040 
   1041 	const glu::DataType		scalarType		= glu::getDataTypeScalarType(basicType);
   1042 	const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
   1043 	const int				compSize		= sizeof(deUint32);
   1044 
   1045 	if (scalarSize > 1)
   1046 		src << glu::getDataTypeName(basicType) << "(";
   1047 
   1048 	for (int scalarNdx = 0; scalarNdx < scalarSize; scalarNdx++)
   1049 	{
   1050 		const deUint8* compPtr = (const deUint8*)valuePtr + scalarNdx*compSize;
   1051 
   1052 		if (scalarNdx > 0)
   1053 			src << ", ";
   1054 
   1055 		switch (scalarType)
   1056 		{
   1057 			case glu::TYPE_FLOAT:	src << de::floatToString(*((const float*)compPtr), 1);			break;
   1058 			case glu::TYPE_INT:		src << *((const int*)compPtr);									break;
   1059 			case glu::TYPE_UINT:	src << *((const deUint32*)compPtr) << "u";						break;
   1060 			case glu::TYPE_BOOL:	src << (*((const deUint32*)compPtr) != 0u ? "true" : "false");	break;
   1061 			default:
   1062 				DE_ASSERT(false);
   1063 		}
   1064 	}
   1065 
   1066 	if (scalarSize > 1)
   1067 		src << ")";
   1068 }
   1069 
   1070 string getAPIName (const BufferBlock& block, const BufferVar& var, const glu::TypeComponentVector& accessPath)
   1071 {
   1072 	std::ostringstream name;
   1073 
   1074 	if (block.getInstanceName())
   1075 		name << block.getBlockName() << ".";
   1076 
   1077 	name << var.getName();
   1078 
   1079 	for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
   1080 	{
   1081 		if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
   1082 		{
   1083 			const VarType		curType		= glu::getVarType(var.getType(), accessPath.begin(), pathComp);
   1084 			const StructType*	structPtr	= curType.getStructPtr();
   1085 
   1086 			name << "." << structPtr->getMember(pathComp->index).getName();
   1087 		}
   1088 		else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
   1089 		{
   1090 			if (pathComp == accessPath.begin() || (pathComp+1) == accessPath.end())
   1091 				name << "[0]"; // Top- / bottom-level array
   1092 			else
   1093 				name << "[" << pathComp->index << "]";
   1094 		}
   1095 		else
   1096 			DE_ASSERT(false);
   1097 	}
   1098 
   1099 	return name.str();
   1100 }
   1101 
   1102 string getShaderName (const BufferBlock& block, int instanceNdx, const BufferVar& var, const glu::TypeComponentVector& accessPath)
   1103 {
   1104 	std::ostringstream name;
   1105 
   1106 	if (block.getInstanceName())
   1107 	{
   1108 		name << block.getInstanceName();
   1109 
   1110 		if (block.isArray())
   1111 			name << "[" << instanceNdx << "]";
   1112 
   1113 		name << ".";
   1114 	}
   1115 	else
   1116 		DE_ASSERT(instanceNdx == 0);
   1117 
   1118 	name << var.getName();
   1119 
   1120 	for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
   1121 	{
   1122 		if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
   1123 		{
   1124 			const VarType		curType		= glu::getVarType(var.getType(), accessPath.begin(), pathComp);
   1125 			const StructType*	structPtr	= curType.getStructPtr();
   1126 
   1127 			name << "." << structPtr->getMember(pathComp->index).getName();
   1128 		}
   1129 		else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
   1130 			name << "[" << pathComp->index << "]";
   1131 		else
   1132 			DE_ASSERT(false);
   1133 	}
   1134 
   1135 	return name.str();
   1136 }
   1137 
   1138 int computeOffset (const BufferVarLayoutEntry& varLayout, const glu::TypeComponentVector& accessPath)
   1139 {
   1140 	const int	topLevelNdx		= (accessPath.size() > 1 && accessPath.front().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.front().index : 0;
   1141 	const int	bottomLevelNdx	= (!accessPath.empty() && accessPath.back().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.back().index : 0;
   1142 
   1143 	return varLayout.offset + varLayout.topLevelArrayStride*topLevelNdx + varLayout.arrayStride*bottomLevelNdx;
   1144 }
   1145 
   1146 void generateCompareSrc (
   1147 	std::ostream&				src,
   1148 	const char*					resultVar,
   1149 	const BufferLayout&			bufferLayout,
   1150 	const BufferBlock&			block,
   1151 	int							instanceNdx,
   1152 	const BlockDataPtr&			blockPtr,
   1153 	const BufferVar&			bufVar,
   1154 	const glu::SubTypeAccess&	accessPath,
   1155 	MatrixLoadFlags				matrixLoadFlag)
   1156 {
   1157 	const VarType curType = accessPath.getType();
   1158 
   1159 	if (curType.isArrayType())
   1160 	{
   1161 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
   1162 
   1163 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
   1164 			generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx), LOAD_FULL_MATRIX);
   1165 	}
   1166 	else if (curType.isStructType())
   1167 	{
   1168 		const int numMembers = curType.getStructPtr()->getNumMembers();
   1169 
   1170 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
   1171 			generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx), LOAD_FULL_MATRIX);
   1172 	}
   1173 	else
   1174 	{
   1175 		DE_ASSERT(curType.isBasicType());
   1176 
   1177 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
   1178 		const int		varNdx	= bufferLayout.getVariableIndex(apiName);
   1179 
   1180 		DE_ASSERT(varNdx >= 0);
   1181 		{
   1182 			const BufferVarLayoutEntry&	varLayout		= bufferLayout.bufferVars[varNdx];
   1183 			const string				shaderName		= getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
   1184 			const glu::DataType			basicType		= curType.getBasicType();
   1185 			const bool					isMatrix		= glu::isDataTypeMatrix(basicType);
   1186 			const char*					typeName		= glu::getDataTypeName(basicType);
   1187 			const void*					valuePtr		= (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
   1188 
   1189 
   1190 			if (isMatrix)
   1191 			{
   1192 				if (matrixLoadFlag == LOAD_MATRIX_COMPONENTS)
   1193 					generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr, resultVar, typeName, shaderName);
   1194 				else
   1195 				{
   1196 					src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << ", ";
   1197 					generateImmMatrixSrc (src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr);
   1198 					src << ");\n";
   1199 				}
   1200 			}
   1201 			else
   1202 			{
   1203 				src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << ", ";
   1204 				generateImmScalarVectorSrc(src, basicType, valuePtr);
   1205 				src << ");\n";
   1206 			}
   1207 		}
   1208 	}
   1209 }
   1210 
   1211 void generateCompareSrc (std::ostream& src, const char* resultVar, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, MatrixLoadFlags matrixLoadFlag)
   1212 {
   1213 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
   1214 	{
   1215 		const BufferBlock&	block			= interface.getBlock(declNdx);
   1216 		const bool			isArray			= block.isArray();
   1217 		const int			numInstances	= isArray ? block.getArraySize() : 1;
   1218 
   1219 		DE_ASSERT(!isArray || block.getInstanceName());
   1220 
   1221 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
   1222 		{
   1223 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
   1224 			const int			blockNdx		= layout.getBlockIndex(instanceName);
   1225 			const BlockDataPtr&	blockPtr		= blockPointers[blockNdx];
   1226 
   1227 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
   1228 			{
   1229 				const BufferVar& bufVar = *varIter;
   1230 
   1231 				if ((bufVar.getFlags() & ACCESS_READ) == 0)
   1232 					continue; // Don't read from that variable.
   1233 
   1234 				generateCompareSrc(src, resultVar, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()), matrixLoadFlag);
   1235 			}
   1236 		}
   1237 	}
   1238 }
   1239 
   1240 // \todo [2013-10-14 pyry] Almost identical to generateCompareSrc - unify?
   1241 
   1242 void generateWriteSrc (
   1243 	std::ostream&				src,
   1244 	const BufferLayout&			bufferLayout,
   1245 	const BufferBlock&			block,
   1246 	int							instanceNdx,
   1247 	const BlockDataPtr&			blockPtr,
   1248 	const BufferVar&			bufVar,
   1249 	const glu::SubTypeAccess&	accessPath)
   1250 {
   1251 	const VarType curType = accessPath.getType();
   1252 
   1253 	if (curType.isArrayType())
   1254 	{
   1255 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
   1256 
   1257 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
   1258 			generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx));
   1259 	}
   1260 	else if (curType.isStructType())
   1261 	{
   1262 		const int numMembers = curType.getStructPtr()->getNumMembers();
   1263 
   1264 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
   1265 			generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx));
   1266 	}
   1267 	else
   1268 	{
   1269 		DE_ASSERT(curType.isBasicType());
   1270 
   1271 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
   1272 		const int		varNdx	= bufferLayout.getVariableIndex(apiName);
   1273 
   1274 		DE_ASSERT(varNdx >= 0);
   1275 		{
   1276 			const BufferVarLayoutEntry&	varLayout		= bufferLayout.bufferVars[varNdx];
   1277 			const string				shaderName		= getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
   1278 			const glu::DataType			basicType		= curType.getBasicType();
   1279 			const bool					isMatrix		= glu::isDataTypeMatrix(basicType);
   1280 			const void*					valuePtr		= (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
   1281 
   1282 			src << "\t" << shaderName << " = ";
   1283 
   1284 			if (isMatrix)
   1285 				generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr);
   1286 			else
   1287 				generateImmScalarVectorSrc(src, basicType, valuePtr);
   1288 
   1289 			src << ";\n";
   1290 		}
   1291 	}
   1292 }
   1293 
   1294 void generateWriteSrc (std::ostream& src, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers)
   1295 {
   1296 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
   1297 	{
   1298 		const BufferBlock&	block			= interface.getBlock(declNdx);
   1299 		const bool			isArray			= block.isArray();
   1300 		const int			numInstances	= isArray ? block.getArraySize() : 1;
   1301 
   1302 		DE_ASSERT(!isArray || block.getInstanceName());
   1303 
   1304 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
   1305 		{
   1306 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
   1307 			const int			blockNdx		= layout.getBlockIndex(instanceName);
   1308 			const BlockDataPtr&	blockPtr		= blockPointers[blockNdx];
   1309 
   1310 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
   1311 			{
   1312 				const BufferVar& bufVar = *varIter;
   1313 
   1314 				if ((bufVar.getFlags() & ACCESS_WRITE) == 0)
   1315 					continue; // Don't write to that variable.
   1316 
   1317 				generateWriteSrc(src, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
   1318 			}
   1319 		}
   1320 	}
   1321 }
   1322 
   1323 string generateComputeShader (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& comparePtrs, const vector<BlockDataPtr>& writePtrs, MatrixLoadFlags matrixLoadFlag)
   1324 {
   1325 	std::ostringstream src;
   1326 
   1327 	if (usesRelaxedLayout(interface))
   1328 		src << "#version 450\n";
   1329 	else
   1330 		src << "#version 310 es\n";
   1331 
   1332 	src << "layout(local_size_x = 1) in;\n";
   1333 	src << "\n";
   1334 
   1335 	// Atomic counter for counting passed invocations.
   1336 	src << "layout(std140, binding = 0) buffer AcBlock { highp uint ac_numPassed; };\n\n";
   1337 
   1338 	std::vector<const StructType*> namedStructs;
   1339 	interface.getNamedStructs(namedStructs);
   1340 	for (std::vector<const StructType*>::const_iterator structIter = namedStructs.begin(); structIter != namedStructs.end(); structIter++)
   1341 		src << glu::declare(*structIter) << ";\n";
   1342 
   1343 	{
   1344 		for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
   1345 		{
   1346 			const BufferBlock& block = interface.getBlock(blockNdx);
   1347 			generateDeclaration(src, block, 1 + blockNdx);
   1348 		}
   1349 	}
   1350 
   1351 	// Comparison utilities.
   1352 	src << "\n";
   1353 	generateCompareFuncs(src, interface);
   1354 
   1355 	src << "\n"
   1356 		   "void main (void)\n"
   1357 		   "{\n"
   1358 		   "	bool allOk = true;\n";
   1359 
   1360 	// Value compare.
   1361 	generateCompareSrc(src, "allOk", interface, layout, comparePtrs, matrixLoadFlag);
   1362 
   1363 	src << "	if (allOk)\n"
   1364 		<< "		ac_numPassed++;\n"
   1365 		<< "\n";
   1366 
   1367 	// Value write.
   1368 	generateWriteSrc(src, interface, layout, writePtrs);
   1369 
   1370 	src << "}\n";
   1371 
   1372 	return src.str();
   1373 }
   1374 
   1375 void copyBufferVarData (const BufferVarLayoutEntry& dstEntry, const BlockDataPtr& dstBlockPtr, const BufferVarLayoutEntry& srcEntry, const BlockDataPtr& srcBlockPtr)
   1376 {
   1377 	DE_ASSERT(dstEntry.arraySize <= srcEntry.arraySize);
   1378 	DE_ASSERT(dstEntry.topLevelArraySize <= srcEntry.topLevelArraySize);
   1379 	DE_ASSERT(dstBlockPtr.lastUnsizedArraySize <= srcBlockPtr.lastUnsizedArraySize);
   1380 	DE_ASSERT(dstEntry.type == srcEntry.type);
   1381 
   1382 	deUint8* const			dstBasePtr			= (deUint8*)dstBlockPtr.ptr + dstEntry.offset;
   1383 	const deUint8* const	srcBasePtr			= (const deUint8*)srcBlockPtr.ptr + srcEntry.offset;
   1384 	const int				scalarSize			= glu::getDataTypeScalarSize(dstEntry.type);
   1385 	const bool				isMatrix			= glu::isDataTypeMatrix(dstEntry.type);
   1386 	const int				compSize			= sizeof(deUint32);
   1387 	const int				dstArraySize		= dstEntry.arraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.arraySize;
   1388 	const int				dstArrayStride		= dstEntry.arrayStride;
   1389 	const int				dstTopLevelSize		= dstEntry.topLevelArraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.topLevelArraySize;
   1390 	const int				dstTopLevelStride	= dstEntry.topLevelArrayStride;
   1391 	const int				srcArraySize		= srcEntry.arraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.arraySize;
   1392 	const int				srcArrayStride		= srcEntry.arrayStride;
   1393 	const int				srcTopLevelSize		= srcEntry.topLevelArraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.topLevelArraySize;
   1394 	const int				srcTopLevelStride	= srcEntry.topLevelArrayStride;
   1395 
   1396 	DE_ASSERT(dstArraySize <= srcArraySize && dstTopLevelSize <= srcTopLevelSize);
   1397 	DE_UNREF(srcArraySize && srcTopLevelSize);
   1398 
   1399 	for (int topElemNdx = 0; topElemNdx < dstTopLevelSize; topElemNdx++)
   1400 	{
   1401 		deUint8* const			dstTopPtr	= dstBasePtr + topElemNdx*dstTopLevelStride;
   1402 		const deUint8* const	srcTopPtr	= srcBasePtr + topElemNdx*srcTopLevelStride;
   1403 
   1404 		for (int elementNdx = 0; elementNdx < dstArraySize; elementNdx++)
   1405 		{
   1406 			deUint8* const			dstElemPtr	= dstTopPtr + elementNdx*dstArrayStride;
   1407 			const deUint8* const	srcElemPtr	= srcTopPtr + elementNdx*srcArrayStride;
   1408 
   1409 			if (isMatrix)
   1410 			{
   1411 				const int	numRows	= glu::getDataTypeMatrixNumRows(dstEntry.type);
   1412 				const int	numCols	= glu::getDataTypeMatrixNumColumns(dstEntry.type);
   1413 
   1414 				for (int colNdx = 0; colNdx < numCols; colNdx++)
   1415 				{
   1416 					for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
   1417 					{
   1418 						deUint8*		dstCompPtr	= dstElemPtr + (dstEntry.isRowMajor ? rowNdx*dstEntry.matrixStride + colNdx*compSize
   1419 																						: colNdx*dstEntry.matrixStride + rowNdx*compSize);
   1420 						const deUint8*	srcCompPtr	= srcElemPtr + (srcEntry.isRowMajor ? rowNdx*srcEntry.matrixStride + colNdx*compSize
   1421 																						: colNdx*srcEntry.matrixStride + rowNdx*compSize);
   1422 
   1423 						DE_ASSERT((deIntptr)(srcCompPtr + compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
   1424 						DE_ASSERT((deIntptr)(dstCompPtr + compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
   1425 						deMemcpy(dstCompPtr, srcCompPtr, compSize);
   1426 					}
   1427 				}
   1428 			}
   1429 			else
   1430 			{
   1431 				DE_ASSERT((deIntptr)(srcElemPtr + scalarSize*compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
   1432 				DE_ASSERT((deIntptr)(dstElemPtr + scalarSize*compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
   1433 				deMemcpy(dstElemPtr, srcElemPtr, scalarSize*compSize);
   1434 			}
   1435 		}
   1436 	}
   1437 }
   1438 
   1439 void copyData (const BufferLayout& dstLayout, const vector<BlockDataPtr>& dstBlockPointers, const BufferLayout& srcLayout, const vector<BlockDataPtr>& srcBlockPointers)
   1440 {
   1441 	// \note Src layout is used as reference in case of activeVarIndices happens to be incorrect in dstLayout blocks.
   1442 	int numBlocks = (int)srcLayout.blocks.size();
   1443 
   1444 	for (int srcBlockNdx = 0; srcBlockNdx < numBlocks; srcBlockNdx++)
   1445 	{
   1446 		const BlockLayoutEntry&		srcBlock	= srcLayout.blocks[srcBlockNdx];
   1447 		const BlockDataPtr&			srcBlockPtr	= srcBlockPointers[srcBlockNdx];
   1448 		int							dstBlockNdx	= dstLayout.getBlockIndex(srcBlock.name.c_str());
   1449 
   1450 		if (dstBlockNdx >= 0)
   1451 		{
   1452 			DE_ASSERT(de::inBounds(dstBlockNdx, 0, (int)dstBlockPointers.size()));
   1453 
   1454 			const BlockDataPtr& dstBlockPtr = dstBlockPointers[dstBlockNdx];
   1455 
   1456 			for (vector<int>::const_iterator srcVarNdxIter = srcBlock.activeVarIndices.begin(); srcVarNdxIter != srcBlock.activeVarIndices.end(); srcVarNdxIter++)
   1457 			{
   1458 				const BufferVarLayoutEntry&	srcEntry	= srcLayout.bufferVars[*srcVarNdxIter];
   1459 				int							dstVarNdx	= dstLayout.getVariableIndex(srcEntry.name.c_str());
   1460 
   1461 				if (dstVarNdx >= 0)
   1462 					copyBufferVarData(dstLayout.bufferVars[dstVarNdx], dstBlockPtr, srcEntry, srcBlockPtr);
   1463 			}
   1464 		}
   1465 	}
   1466 }
   1467 
   1468 void copyNonWrittenData (
   1469 	const BufferLayout&			layout,
   1470 	const BufferBlock&			block,
   1471 	int							instanceNdx,
   1472 	const BlockDataPtr&			srcBlockPtr,
   1473 	const BlockDataPtr&			dstBlockPtr,
   1474 	const BufferVar&			bufVar,
   1475 	const glu::SubTypeAccess&	accessPath)
   1476 {
   1477 	const VarType curType = accessPath.getType();
   1478 
   1479 	if (curType.isArrayType())
   1480 	{
   1481 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
   1482 
   1483 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
   1484 			copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.element(elemNdx));
   1485 	}
   1486 	else if (curType.isStructType())
   1487 	{
   1488 		const int numMembers = curType.getStructPtr()->getNumMembers();
   1489 
   1490 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
   1491 			copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.member(memberNdx));
   1492 	}
   1493 	else
   1494 	{
   1495 		DE_ASSERT(curType.isBasicType());
   1496 
   1497 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
   1498 		const int		varNdx	= layout.getVariableIndex(apiName);
   1499 
   1500 		DE_ASSERT(varNdx >= 0);
   1501 		{
   1502 			const BufferVarLayoutEntry& varLayout = layout.bufferVars[varNdx];
   1503 			copyBufferVarData(varLayout, dstBlockPtr, varLayout, srcBlockPtr);
   1504 		}
   1505 	}
   1506 }
   1507 
   1508 void copyNonWrittenData (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& srcPtrs, const vector<BlockDataPtr>& dstPtrs)
   1509 {
   1510 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
   1511 	{
   1512 		const BufferBlock&	block			= interface.getBlock(declNdx);
   1513 		const bool			isArray			= block.isArray();
   1514 		const int			numInstances	= isArray ? block.getArraySize() : 1;
   1515 
   1516 		DE_ASSERT(!isArray || block.getInstanceName());
   1517 
   1518 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
   1519 		{
   1520 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
   1521 			const int			blockNdx		= layout.getBlockIndex(instanceName);
   1522 			const BlockDataPtr&	srcBlockPtr		= srcPtrs[blockNdx];
   1523 			const BlockDataPtr&	dstBlockPtr		= dstPtrs[blockNdx];
   1524 
   1525 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
   1526 			{
   1527 				const BufferVar& bufVar = *varIter;
   1528 
   1529 				if (bufVar.getFlags() & ACCESS_WRITE)
   1530 					continue;
   1531 
   1532 				copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
   1533 			}
   1534 		}
   1535 	}
   1536 }
   1537 
   1538 bool compareComponents (glu::DataType scalarType, const void* ref, const void* res, int numComps)
   1539 {
   1540 	if (scalarType == glu::TYPE_FLOAT)
   1541 	{
   1542 		const float threshold = 0.05f; // Same as used in shaders - should be fine for values being used.
   1543 
   1544 		for (int ndx = 0; ndx < numComps; ndx++)
   1545 		{
   1546 			const float		refVal		= *((const float*)ref + ndx);
   1547 			const float		resVal		= *((const float*)res + ndx);
   1548 
   1549 			if (deFloatAbs(resVal - refVal) >= threshold)
   1550 				return false;
   1551 		}
   1552 	}
   1553 	else if (scalarType == glu::TYPE_BOOL)
   1554 	{
   1555 		for (int ndx = 0; ndx < numComps; ndx++)
   1556 		{
   1557 			const deUint32	refVal		= *((const deUint32*)ref + ndx);
   1558 			const deUint32	resVal		= *((const deUint32*)res + ndx);
   1559 
   1560 			if ((refVal != 0) != (resVal != 0))
   1561 				return false;
   1562 		}
   1563 	}
   1564 	else
   1565 	{
   1566 		DE_ASSERT(scalarType == glu::TYPE_INT || scalarType == glu::TYPE_UINT);
   1567 
   1568 		for (int ndx = 0; ndx < numComps; ndx++)
   1569 		{
   1570 			const deUint32	refVal		= *((const deUint32*)ref + ndx);
   1571 			const deUint32	resVal		= *((const deUint32*)res + ndx);
   1572 
   1573 			if (refVal != resVal)
   1574 				return false;
   1575 		}
   1576 	}
   1577 
   1578 	return true;
   1579 }
   1580 
   1581 bool compareBufferVarData (tcu::TestLog& log, const BufferVarLayoutEntry& refEntry, const BlockDataPtr& refBlockPtr, const BufferVarLayoutEntry& resEntry, const BlockDataPtr& resBlockPtr)
   1582 {
   1583 	DE_ASSERT(resEntry.arraySize <= refEntry.arraySize);
   1584 	DE_ASSERT(resEntry.topLevelArraySize <= refEntry.topLevelArraySize);
   1585 	DE_ASSERT(resBlockPtr.lastUnsizedArraySize <= refBlockPtr.lastUnsizedArraySize);
   1586 	DE_ASSERT(resEntry.type == refEntry.type);
   1587 
   1588 	deUint8* const			resBasePtr			= (deUint8*)resBlockPtr.ptr + resEntry.offset;
   1589 	const deUint8* const	refBasePtr			= (const deUint8*)refBlockPtr.ptr + refEntry.offset;
   1590 	const glu::DataType		scalarType			= glu::getDataTypeScalarType(refEntry.type);
   1591 	const int				scalarSize			= glu::getDataTypeScalarSize(resEntry.type);
   1592 	const bool				isMatrix			= glu::isDataTypeMatrix(resEntry.type);
   1593 	const int				compSize			= sizeof(deUint32);
   1594 	const int				maxPrints			= 3;
   1595 	int						numFailed			= 0;
   1596 
   1597 	const int				resArraySize		= resEntry.arraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.arraySize;
   1598 	const int				resArrayStride		= resEntry.arrayStride;
   1599 	const int				resTopLevelSize		= resEntry.topLevelArraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.topLevelArraySize;
   1600 	const int				resTopLevelStride	= resEntry.topLevelArrayStride;
   1601 	const int				refArraySize		= refEntry.arraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.arraySize;
   1602 	const int				refArrayStride		= refEntry.arrayStride;
   1603 	const int				refTopLevelSize		= refEntry.topLevelArraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.topLevelArraySize;
   1604 	const int				refTopLevelStride	= refEntry.topLevelArrayStride;
   1605 
   1606 	DE_ASSERT(resArraySize <= refArraySize && resTopLevelSize <= refTopLevelSize);
   1607 	DE_UNREF(refArraySize && refTopLevelSize);
   1608 
   1609 	for (int topElemNdx = 0; topElemNdx < resTopLevelSize; topElemNdx++)
   1610 	{
   1611 		deUint8* const			resTopPtr	= resBasePtr + topElemNdx*resTopLevelStride;
   1612 		const deUint8* const	refTopPtr	= refBasePtr + topElemNdx*refTopLevelStride;
   1613 
   1614 		for (int elementNdx = 0; elementNdx < resArraySize; elementNdx++)
   1615 		{
   1616 			deUint8* const			resElemPtr	= resTopPtr + elementNdx*resArrayStride;
   1617 			const deUint8* const	refElemPtr	= refTopPtr + elementNdx*refArrayStride;
   1618 
   1619 			if (isMatrix)
   1620 			{
   1621 				const int	numRows	= glu::getDataTypeMatrixNumRows(resEntry.type);
   1622 				const int	numCols	= glu::getDataTypeMatrixNumColumns(resEntry.type);
   1623 				bool		isOk	= true;
   1624 
   1625 				for (int colNdx = 0; colNdx < numCols; colNdx++)
   1626 				{
   1627 					for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
   1628 					{
   1629 						deUint8*		resCompPtr	= resElemPtr + (resEntry.isRowMajor ? rowNdx*resEntry.matrixStride + colNdx*compSize
   1630 																						: colNdx*resEntry.matrixStride + rowNdx*compSize);
   1631 						const deUint8*	refCompPtr	= refElemPtr + (refEntry.isRowMajor ? rowNdx*refEntry.matrixStride + colNdx*compSize
   1632 																						: colNdx*refEntry.matrixStride + rowNdx*compSize);
   1633 
   1634 						DE_ASSERT((deIntptr)(refCompPtr + compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
   1635 						DE_ASSERT((deIntptr)(resCompPtr + compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
   1636 
   1637 						isOk = isOk && compareComponents(scalarType, resCompPtr, refCompPtr, 1);
   1638 					}
   1639 				}
   1640 
   1641 				if (!isOk)
   1642 				{
   1643 					numFailed += 1;
   1644 					if (numFailed < maxPrints)
   1645 					{
   1646 						std::ostringstream expected, got;
   1647 						generateImmMatrixSrc(expected, refEntry.type, refEntry.matrixStride, refEntry.isRowMajor, refElemPtr);
   1648 						generateImmMatrixSrc(got, resEntry.type, resEntry.matrixStride, resEntry.isRowMajor, resElemPtr);
   1649 						log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
   1650 												<< "  expected " << expected.str() << "\n"
   1651 												<< "  got " << got.str()
   1652 							<< TestLog::EndMessage;
   1653 					}
   1654 				}
   1655 			}
   1656 			else
   1657 			{
   1658 				DE_ASSERT((deIntptr)(refElemPtr + scalarSize*compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
   1659 				DE_ASSERT((deIntptr)(resElemPtr + scalarSize*compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
   1660 
   1661 				const bool isOk = compareComponents(scalarType, resElemPtr, refElemPtr, scalarSize);
   1662 
   1663 				if (!isOk)
   1664 				{
   1665 					numFailed += 1;
   1666 					if (numFailed < maxPrints)
   1667 					{
   1668 						std::ostringstream expected, got;
   1669 						generateImmScalarVectorSrc(expected, refEntry.type, refElemPtr);
   1670 						generateImmScalarVectorSrc(got, resEntry.type, resElemPtr);
   1671 						log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
   1672 												<< "  expected " << expected.str() << "\n"
   1673 												<< "  got " << got.str()
   1674 							<< TestLog::EndMessage;
   1675 					}
   1676 				}
   1677 			}
   1678 		}
   1679 	}
   1680 
   1681 	if (numFailed >= maxPrints)
   1682 		log << TestLog::Message << "... (" << numFailed << " failures for " << refEntry.name << " in total)" << TestLog::EndMessage;
   1683 
   1684 	return numFailed == 0;
   1685 }
   1686 
   1687 bool compareData (tcu::TestLog& log, const BufferLayout& refLayout, const vector<BlockDataPtr>& refBlockPointers, const BufferLayout& resLayout, const vector<BlockDataPtr>& resBlockPointers)
   1688 {
   1689 	const int	numBlocks	= (int)refLayout.blocks.size();
   1690 	bool		allOk		= true;
   1691 
   1692 	for (int refBlockNdx = 0; refBlockNdx < numBlocks; refBlockNdx++)
   1693 	{
   1694 		const BlockLayoutEntry&		refBlock	= refLayout.blocks[refBlockNdx];
   1695 		const BlockDataPtr&			refBlockPtr	= refBlockPointers[refBlockNdx];
   1696 		int							resBlockNdx	= resLayout.getBlockIndex(refBlock.name.c_str());
   1697 
   1698 		if (resBlockNdx >= 0)
   1699 		{
   1700 			DE_ASSERT(de::inBounds(resBlockNdx, 0, (int)resBlockPointers.size()));
   1701 
   1702 			const BlockDataPtr& resBlockPtr = resBlockPointers[resBlockNdx];
   1703 
   1704 			for (vector<int>::const_iterator refVarNdxIter = refBlock.activeVarIndices.begin(); refVarNdxIter != refBlock.activeVarIndices.end(); refVarNdxIter++)
   1705 			{
   1706 				const BufferVarLayoutEntry&	refEntry	= refLayout.bufferVars[*refVarNdxIter];
   1707 				int							resVarNdx	= resLayout.getVariableIndex(refEntry.name.c_str());
   1708 
   1709 				if (resVarNdx >= 0)
   1710 				{
   1711 					const BufferVarLayoutEntry& resEntry = resLayout.bufferVars[resVarNdx];
   1712 					allOk = compareBufferVarData(log, refEntry, refBlockPtr, resEntry, resBlockPtr) && allOk;
   1713 				}
   1714 			}
   1715 		}
   1716 	}
   1717 
   1718 	return allOk;
   1719 }
   1720 
   1721 string getBlockAPIName (const BufferBlock& block, int instanceNdx)
   1722 {
   1723 	DE_ASSERT(block.isArray() || instanceNdx == 0);
   1724 	return block.getBlockName() + (block.isArray() ? ("[" + de::toString(instanceNdx) + "]") : string());
   1725 }
   1726 
   1727 // \note Some implementations don't report block members in the order they are declared.
   1728 //		 For checking whether size has to be adjusted by some top-level array actual size,
   1729 //		 we only need to know a) whether there is a unsized top-level array, and b)
   1730 //		 what is stride of that array.
   1731 
   1732 static bool hasUnsizedArray (const BufferLayout& layout, const BlockLayoutEntry& entry)
   1733 {
   1734 	for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
   1735 	{
   1736 		if (isUnsizedArray(layout.bufferVars[*varNdx]))
   1737 			return true;
   1738 	}
   1739 
   1740 	return false;
   1741 }
   1742 
   1743 static int getUnsizedArrayStride (const BufferLayout& layout, const BlockLayoutEntry& entry)
   1744 {
   1745 	for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
   1746 	{
   1747 		const BufferVarLayoutEntry& varEntry = layout.bufferVars[*varNdx];
   1748 
   1749 		if (varEntry.arraySize == 0)
   1750 			return varEntry.arrayStride;
   1751 		else if (varEntry.topLevelArraySize == 0)
   1752 			return varEntry.topLevelArrayStride;
   1753 	}
   1754 
   1755 	return 0;
   1756 }
   1757 
   1758 vector<int> computeBufferSizes (const ShaderInterface& interface, const BufferLayout& layout)
   1759 {
   1760 	vector<int> sizes(layout.blocks.size());
   1761 
   1762 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
   1763 	{
   1764 		const BufferBlock&	block			= interface.getBlock(declNdx);
   1765 		const bool			isArray			= block.isArray();
   1766 		const int			numInstances	= isArray ? block.getArraySize() : 1;
   1767 
   1768 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
   1769 		{
   1770 			const string	apiName		= getBlockAPIName(block, instanceNdx);
   1771 			const int		blockNdx	= layout.getBlockIndex(apiName);
   1772 
   1773 			if (blockNdx >= 0)
   1774 			{
   1775 				const BlockLayoutEntry&		blockLayout		= layout.blocks[blockNdx];
   1776 				const int					baseSize		= blockLayout.size;
   1777 				const bool					isLastUnsized	= hasUnsizedArray(layout, blockLayout);
   1778 				const int					lastArraySize	= isLastUnsized ? block.getLastUnsizedArraySize(instanceNdx) : 0;
   1779 				const int					stride			= isLastUnsized ? getUnsizedArrayStride(layout, blockLayout) : 0;
   1780 
   1781 				sizes[blockNdx] = baseSize + lastArraySize*stride;
   1782 			}
   1783 		}
   1784 	}
   1785 
   1786 	return sizes;
   1787 }
   1788 
   1789 BlockDataPtr getBlockDataPtr (const BufferLayout& layout, const BlockLayoutEntry& blockLayout, void* ptr, int bufferSize)
   1790 {
   1791 	const bool	isLastUnsized	= hasUnsizedArray(layout, blockLayout);
   1792 	const int	baseSize		= blockLayout.size;
   1793 
   1794 	if (isLastUnsized)
   1795 	{
   1796 		const int		lastArrayStride	= getUnsizedArrayStride(layout, blockLayout);
   1797 		const int		lastArraySize	= (bufferSize-baseSize) / (lastArrayStride ? lastArrayStride : 1);
   1798 
   1799 		DE_ASSERT(baseSize + lastArraySize*lastArrayStride == bufferSize);
   1800 
   1801 		return BlockDataPtr(ptr, bufferSize, lastArraySize);
   1802 	}
   1803 	else
   1804 		return BlockDataPtr(ptr, bufferSize, 0);
   1805 }
   1806 
   1807 struct Buffer
   1808 {
   1809 	deUint32				buffer;
   1810 	int						size;
   1811 
   1812 	Buffer (deUint32 buffer_, int size_) : buffer(buffer_), size(size_) {}
   1813 	Buffer (void) : buffer(0), size(0) {}
   1814 };
   1815 
   1816 struct BlockLocation
   1817 {
   1818 	int						index;
   1819 	int						offset;
   1820 	int						size;
   1821 
   1822 	BlockLocation (int index_, int offset_, int size_) : index(index_), offset(offset_), size(size_) {}
   1823 	BlockLocation (void) : index(0), offset(0), size(0) {}
   1824 };
   1825 
   1826 void initRefDataStorage (const ShaderInterface& interface, const BufferLayout& layout, RefDataStorage& storage)
   1827 {
   1828 	DE_ASSERT(storage.data.empty() && storage.pointers.empty());
   1829 
   1830 	const vector<int>	bufferSizes = computeBufferSizes(interface, layout);
   1831 	int					totalSize	= 0;
   1832 
   1833 	for (vector<int>::const_iterator sizeIter = bufferSizes.begin(); sizeIter != bufferSizes.end(); ++sizeIter)
   1834 		totalSize += *sizeIter;
   1835 
   1836 	storage.data.resize(totalSize);
   1837 
   1838 	// Pointers for each block.
   1839 	{
   1840 		deUint8*	basePtr		= storage.data.empty() ? DE_NULL : &storage.data[0];
   1841 		int			curOffset	= 0;
   1842 
   1843 		DE_ASSERT(bufferSizes.size() == layout.blocks.size());
   1844 		DE_ASSERT(totalSize == 0 || basePtr);
   1845 
   1846 		storage.pointers.resize(layout.blocks.size());
   1847 
   1848 		for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
   1849 		{
   1850 			const BlockLayoutEntry&	blockLayout		= layout.blocks[blockNdx];
   1851 			const int				bufferSize		= bufferSizes[blockNdx];
   1852 
   1853 			storage.pointers[blockNdx] = getBlockDataPtr(layout, blockLayout, basePtr + curOffset, bufferSize);
   1854 
   1855 			curOffset += bufferSize;
   1856 		}
   1857 	}
   1858 }
   1859 
   1860 
   1861 vector<BlockDataPtr> blockLocationsToPtrs (const BufferLayout& layout, const vector<BlockLocation>& blockLocations, const vector<void*>& bufPtrs)
   1862 {
   1863 	vector<BlockDataPtr> blockPtrs(blockLocations.size());
   1864 
   1865 	DE_ASSERT(layout.blocks.size() == blockLocations.size());
   1866 
   1867 	for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
   1868 	{
   1869 		const BlockLayoutEntry&	blockLayout		= layout.blocks[blockNdx];
   1870 		const BlockLocation&	location		= blockLocations[blockNdx];
   1871 
   1872 		blockPtrs[blockNdx] = getBlockDataPtr(layout, blockLayout, (deUint8*)bufPtrs[location.index] + location.offset, location.size);
   1873 	}
   1874 
   1875 	return blockPtrs;
   1876 }
   1877 
   1878 } // anonymous (utilities)
   1879 
   1880 de::MovePtr<vk::Allocation> allocateAndBindMemory (Context& context, vk::VkBuffer buffer, vk::MemoryRequirement memReqs)
   1881 {
   1882 	const vk::DeviceInterface&		vkd		= context.getDeviceInterface();
   1883 	const vk::VkMemoryRequirements	bufReqs	= vk::getBufferMemoryRequirements(vkd, context.getDevice(), buffer);
   1884 	de::MovePtr<vk::Allocation>		memory	= context.getDefaultAllocator().allocate(bufReqs, memReqs);
   1885 
   1886 	vkd.bindBufferMemory(context.getDevice(), buffer, memory->getMemory(), memory->getOffset());
   1887 
   1888 	return memory;
   1889 }
   1890 
   1891 vk::Move<vk::VkBuffer> createBuffer (Context& context, vk::VkDeviceSize bufferSize, vk::VkBufferUsageFlags usageFlags)
   1892 {
   1893 	const vk::VkDevice			vkDevice			= context.getDevice();
   1894 	const vk::DeviceInterface&	vk					= context.getDeviceInterface();
   1895 	const deUint32			queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
   1896 
   1897 	const vk::VkBufferCreateInfo	bufferInfo		=
   1898 	{
   1899 		vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
   1900 		DE_NULL,									// const void*			pNext;
   1901 		0u,											// VkBufferCreateFlags	flags;
   1902 		bufferSize,									// VkDeviceSize			size;
   1903 		usageFlags,									// VkBufferUsageFlags	usage;
   1904 		vk::VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
   1905 		1u,											// deUint32				queueFamilyCount;
   1906 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
   1907 	};
   1908 
   1909 	return vk::createBuffer(vk, vkDevice, &bufferInfo);
   1910 }
   1911 
   1912 // SSBOLayoutCaseInstance
   1913 
   1914 class SSBOLayoutCaseInstance : public TestInstance
   1915 {
   1916 public:
   1917 								SSBOLayoutCaseInstance	(Context&					context,
   1918 														SSBOLayoutCase::BufferMode	bufferMode,
   1919 														const ShaderInterface&		interface,
   1920 														const BufferLayout&			refLayout,
   1921 														const RefDataStorage&		initialData,
   1922 														const RefDataStorage&		writeData);
   1923 	virtual						~SSBOLayoutCaseInstance	(void);
   1924 	virtual tcu::TestStatus		iterate						(void);
   1925 
   1926 private:
   1927 	SSBOLayoutCase::BufferMode	m_bufferMode;
   1928 	const ShaderInterface&		m_interface;
   1929 	const BufferLayout&			m_refLayout;
   1930 	const RefDataStorage&		m_initialData;	// Initial data stored in buffer.
   1931 	const RefDataStorage&		m_writeData;	// Data written by compute shader.
   1932 
   1933 
   1934 	typedef de::SharedPtr<vk::Unique<vk::VkBuffer> >	VkBufferSp;
   1935 	typedef de::SharedPtr<vk::Allocation>				AllocationSp;
   1936 
   1937 	std::vector<VkBufferSp>		m_uniformBuffers;
   1938 	std::vector<AllocationSp>	m_uniformAllocs;
   1939 };
   1940 
   1941 SSBOLayoutCaseInstance::SSBOLayoutCaseInstance (Context&					context,
   1942 												SSBOLayoutCase::BufferMode	bufferMode,
   1943 												const ShaderInterface&		interface,
   1944 												const BufferLayout&			refLayout,
   1945 												const RefDataStorage&		initialData,
   1946 												const RefDataStorage&		writeData)
   1947 	: TestInstance	(context)
   1948 	, m_bufferMode	(bufferMode)
   1949 	, m_interface	(interface)
   1950 	, m_refLayout	(refLayout)
   1951 	, m_initialData	(initialData)
   1952 	, m_writeData	(writeData)
   1953 {
   1954 }
   1955 
   1956 SSBOLayoutCaseInstance::~SSBOLayoutCaseInstance (void)
   1957 {
   1958 }
   1959 
   1960 tcu::TestStatus SSBOLayoutCaseInstance::iterate (void)
   1961 {
   1962 	// todo: add compute stage availability check
   1963 	const vk::DeviceInterface&	vk					= m_context.getDeviceInterface();
   1964 	const vk::VkDevice			device				= m_context.getDevice();
   1965 	const vk::VkQueue			queue				= m_context.getUniversalQueue();
   1966 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
   1967 
   1968 	// Create descriptor set
   1969 	const deUint32 acBufferSize = 1024;
   1970 	vk::Move<vk::VkBuffer> acBuffer (createBuffer(m_context, acBufferSize, vk:: VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
   1971 	de::UniquePtr<vk::Allocation> acBufferAlloc (allocateAndBindMemory(m_context, *acBuffer, vk::MemoryRequirement::HostVisible));
   1972 
   1973 	deMemset(acBufferAlloc->getHostPtr(), 0, acBufferSize);
   1974 	flushMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
   1975 
   1976 	vk::DescriptorSetLayoutBuilder setLayoutBuilder;
   1977 	vk::DescriptorPoolBuilder poolBuilder;
   1978 
   1979 	setLayoutBuilder
   1980 		.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
   1981 
   1982 	int numBlocks = 0;
   1983 	const int numBindings = m_interface.getNumBlocks();
   1984 	for (int bindingNdx = 0; bindingNdx < numBindings; bindingNdx++)
   1985 	{
   1986 		const BufferBlock& block = m_interface.getBlock(bindingNdx);
   1987 		if (block.isArray())
   1988 		{
   1989 			setLayoutBuilder
   1990 				.addArrayBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, block.getArraySize(), vk::VK_SHADER_STAGE_COMPUTE_BIT);
   1991 			numBlocks += block.getArraySize();
   1992 		}
   1993 		else
   1994 		{
   1995 			setLayoutBuilder
   1996 				.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
   1997 			numBlocks += 1;
   1998 		}
   1999 	}
   2000 
   2001 	poolBuilder
   2002 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)(1 + numBlocks));
   2003 
   2004 	const vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(setLayoutBuilder.build(vk, device));
   2005 	const vk::Unique<vk::VkDescriptorPool> descriptorPool(poolBuilder.build(vk, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   2006 
   2007 	const vk::VkDescriptorSetAllocateInfo allocInfo =
   2008 	{
   2009 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
   2010 		DE_NULL,
   2011 		*descriptorPool,
   2012 		1u,
   2013 		&descriptorSetLayout.get(),
   2014 	};
   2015 
   2016 	const vk::Unique<vk::VkDescriptorSet> descriptorSet(allocateDescriptorSet(vk, device, &allocInfo));
   2017 	const vk::VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*acBuffer, 0ull, acBufferSize);
   2018 
   2019 	vk::DescriptorSetUpdateBuilder setUpdateBuilder;
   2020 	std::vector<vk::VkDescriptorBufferInfo>	descriptors(numBlocks);
   2021 
   2022 	setUpdateBuilder
   2023 		.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo);
   2024 
   2025 	vector<BlockDataPtr>  mappedBlockPtrs;
   2026 
   2027 	// Upload base buffers
   2028 	const std::vector<int> bufferSizes	= computeBufferSizes(m_interface, m_refLayout);
   2029 	{
   2030 		std::vector<void*>				mapPtrs;
   2031 		std::vector<BlockLocation>		blockLocations	(numBlocks);
   2032 
   2033 		DE_ASSERT(bufferSizes.size() == m_refLayout.blocks.size());
   2034 
   2035 		if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
   2036 		{
   2037 			mapPtrs.resize(numBlocks);
   2038 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
   2039 			{
   2040 				const deUint32 bufferSize = bufferSizes[blockNdx];
   2041 				DE_ASSERT(bufferSize > 0);
   2042 
   2043 				blockLocations[blockNdx] = BlockLocation(blockNdx, 0, bufferSize);
   2044 
   2045 				vk::Move<vk::VkBuffer>				buffer		= createBuffer(m_context, bufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
   2046 				de::MovePtr<vk::Allocation>			alloc		= allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible);
   2047 
   2048 				descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, 0ull, bufferSize);
   2049 
   2050 				mapPtrs[blockNdx] = alloc->getHostPtr();
   2051 
   2052 				m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
   2053 				m_uniformAllocs.push_back(AllocationSp(alloc.release()));
   2054 			}
   2055 		}
   2056 		else
   2057 		{
   2058 			DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
   2059 
   2060 			vk::VkPhysicalDeviceProperties properties;
   2061 			m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
   2062 			const int	bindingAlignment	= (int)properties.limits.minStorageBufferOffsetAlignment;
   2063 			int			curOffset			= 0;
   2064 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
   2065 			{
   2066 				const int bufferSize = bufferSizes[blockNdx];
   2067 				DE_ASSERT(bufferSize > 0);
   2068 
   2069 				if (bindingAlignment > 0)
   2070 					curOffset = deRoundUp32(curOffset, bindingAlignment);
   2071 
   2072 				blockLocations[blockNdx] = BlockLocation(0, curOffset, bufferSize);
   2073 				curOffset += bufferSize;
   2074 			}
   2075 
   2076 			const int						totalBufferSize = curOffset;
   2077 			vk::Move<vk::VkBuffer>			buffer			= createBuffer(m_context, totalBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
   2078 			de::MovePtr<vk::Allocation>		alloc			= allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible);
   2079 
   2080 			mapPtrs.push_back(alloc->getHostPtr());
   2081 
   2082 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
   2083 			{
   2084 				const deUint32						bufferSize	= bufferSizes[blockNdx];
   2085 				const deUint32						offset		= blockLocations[blockNdx].offset;
   2086 
   2087 				descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, offset, bufferSize);
   2088 			}
   2089 
   2090 			m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
   2091 			m_uniformAllocs.push_back(AllocationSp(alloc.release()));
   2092 		}
   2093 
   2094 		// Update remaining bindings
   2095 		{
   2096 			int blockNdx = 0;
   2097 			for (int bindingNdx = 0; bindingNdx < numBindings; ++bindingNdx)
   2098 			{
   2099 				const BufferBlock&	block				= m_interface.getBlock(bindingNdx);
   2100 				const int			numBlocksInBinding	= (block.isArray() ? block.getArraySize() : 1);
   2101 
   2102 				setUpdateBuilder.writeArray(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(bindingNdx + 1),
   2103 					vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBlocksInBinding, &descriptors[blockNdx]);
   2104 
   2105 				blockNdx += numBlocksInBinding;
   2106 			}
   2107 		}
   2108 
   2109 		// Copy the initial data to the storage buffers
   2110 		{
   2111 			mappedBlockPtrs = blockLocationsToPtrs(m_refLayout, blockLocations, mapPtrs);
   2112 			copyData(m_refLayout, mappedBlockPtrs, m_refLayout, m_initialData.pointers);
   2113 
   2114 			for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
   2115 			{
   2116 				vk::Allocation* alloc = m_uniformAllocs[allocNdx].get();
   2117 				flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), VK_WHOLE_SIZE);
   2118 			}
   2119 		}
   2120 	}
   2121 
   2122 	setUpdateBuilder.update(vk, device);
   2123 
   2124 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams =
   2125 	{
   2126 		vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// VkStructureType				sType;
   2127 		DE_NULL,											// const void*					pNext;
   2128 		(vk::VkPipelineLayoutCreateFlags)0,
   2129 		1u,													// deUint32						descriptorSetCount;
   2130 		&*descriptorSetLayout,								// const VkDescriptorSetLayout*	pSetLayouts;
   2131 		0u,													// deUint32						pushConstantRangeCount;
   2132 		DE_NULL,											// const VkPushConstantRange*	pPushConstantRanges;
   2133 	};
   2134 	vk::Move<vk::VkPipelineLayout> pipelineLayout(createPipelineLayout(vk, device, &pipelineLayoutParams));
   2135 
   2136 	vk::Move<vk::VkShaderModule> shaderModule (createShaderModule(vk, device, m_context.getBinaryCollection().get("compute"), 0));
   2137 	const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
   2138 	{
   2139 		vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,// VkStructureType				sType;
   2140 		DE_NULL,												// const void*					pNext;
   2141 		(vk::VkPipelineShaderStageCreateFlags)0,
   2142 		vk::VK_SHADER_STAGE_COMPUTE_BIT,						// VkShaderStage				stage;
   2143 		*shaderModule,											// VkShader						shader;
   2144 		"main",													//
   2145 		DE_NULL,												// const VkSpecializationInfo*	pSpecializationInfo;
   2146 	};
   2147 	const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
   2148 	{
   2149 		vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType					sType;
   2150 		DE_NULL,											// const void*						pNext;
   2151 		0,													// VkPipelineCreateFlags			flags;
   2152 		pipelineShaderStageParams,							// VkPipelineShaderStageCreateInfo	stage;
   2153 		*pipelineLayout,									// VkPipelineLayout					layout;
   2154 		DE_NULL,											// VkPipeline						basePipelineHandle;
   2155 		0,													// deInt32							basePipelineIndex;
   2156 	};
   2157 	vk::Move<vk::VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
   2158 
   2159 	vk::Move<vk::VkCommandPool> cmdPool (createCommandPool(vk, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
   2160 	vk::Move<vk::VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
   2161 
   2162 	const vk::VkCommandBufferBeginInfo cmdBufBeginParams =
   2163 	{
   2164 		vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,	//	VkStructureType				sType;
   2165 		DE_NULL,											//	const void*					pNext;
   2166 		0u,													//	VkCmdBufferOptimizeFlags	flags;
   2167 		(const vk::VkCommandBufferInheritanceInfo*)DE_NULL,
   2168 	};
   2169 	VK_CHECK(vk.beginCommandBuffer(*cmdBuffer, &cmdBufBeginParams));
   2170 
   2171 	vk.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
   2172 	vk.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
   2173 
   2174 	vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
   2175 
   2176 	// Add barriers for shader writes to storage buffers before host access
   2177 	std::vector<vk::VkBufferMemoryBarrier> barriers;
   2178 	if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
   2179 	{
   2180 		for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
   2181 		{
   2182 			const vk::VkBuffer uniformBuffer = m_uniformBuffers[blockNdx].get()->get();
   2183 
   2184 			const vk::VkBufferMemoryBarrier barrier	=
   2185 			{
   2186 				vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
   2187 				DE_NULL,
   2188 				vk::VK_ACCESS_SHADER_WRITE_BIT,
   2189 				vk::VK_ACCESS_HOST_READ_BIT,
   2190 				VK_QUEUE_FAMILY_IGNORED,
   2191 				VK_QUEUE_FAMILY_IGNORED,
   2192 				uniformBuffer,
   2193 				0u,
   2194 				static_cast<vk::VkDeviceSize>(bufferSizes[blockNdx])
   2195 			};
   2196 			barriers.push_back(barrier);
   2197 		}
   2198 	}
   2199 	else
   2200 	{
   2201 		const vk::VkBuffer uniformBuffer = m_uniformBuffers[0].get()->get();
   2202 
   2203 		vk::VkDeviceSize totalSize	= 0;
   2204 		for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
   2205 			totalSize += bufferSizes[bufferNdx];
   2206 
   2207 		const vk::VkBufferMemoryBarrier barrier	=
   2208 		{
   2209 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
   2210 			DE_NULL,
   2211 			vk::VK_ACCESS_SHADER_WRITE_BIT,
   2212 			vk::VK_ACCESS_HOST_READ_BIT,
   2213 			VK_QUEUE_FAMILY_IGNORED,
   2214 			VK_QUEUE_FAMILY_IGNORED,
   2215 			uniformBuffer,
   2216 			0u,
   2217 			totalSize
   2218 		};
   2219 		barriers.push_back(barrier);
   2220 	}
   2221 	vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
   2222 						  0u, DE_NULL, static_cast<deUint32>(barriers.size()), &barriers[0], 0u, DE_NULL);
   2223 
   2224 	VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
   2225 
   2226 	vk::Move<vk::VkFence> fence (createFence(vk, device));
   2227 
   2228 	const vk::VkSubmitInfo  submitInfo  =
   2229 	{
   2230 		vk::VK_STRUCTURE_TYPE_SUBMIT_INFO,
   2231 		DE_NULL,
   2232 		0u,
   2233 		(const vk::VkSemaphore*)DE_NULL,
   2234 		(const vk::VkPipelineStageFlags*)DE_NULL,
   2235 		1u,
   2236 		&cmdBuffer.get(),
   2237 		0u,
   2238 		(const vk::VkSemaphore*)DE_NULL,
   2239 	};
   2240 
   2241 	VK_CHECK(vk.queueSubmit(queue, 1u, &submitInfo, *fence));
   2242 	VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
   2243 
   2244 	// Read back ac_numPassed data
   2245 	bool counterOk;
   2246 	{
   2247 		const int refCount = 1;
   2248 		int resCount = 0;
   2249 
   2250 		invalidateMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
   2251 
   2252 		resCount = *((const int*)acBufferAlloc->getHostPtr());
   2253 
   2254 		counterOk = (refCount == resCount);
   2255 		if (!counterOk)
   2256 		{
   2257 			m_context.getTestContext().getLog() << TestLog::Message << "Error: ac_numPassed = " << resCount << ", expected " << refCount << TestLog::EndMessage;
   2258 		}
   2259 	}
   2260 
   2261 	for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
   2262 	{
   2263 		vk::Allocation *alloc = m_uniformAllocs[allocNdx].get();
   2264 		invalidateMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), VK_WHOLE_SIZE);
   2265 	}
   2266 
   2267 	// Validate result
   2268 	const bool compareOk = compareData(m_context.getTestContext().getLog(), m_refLayout, m_writeData.pointers, m_refLayout, mappedBlockPtrs);
   2269 
   2270 	if (compareOk && counterOk)
   2271 		return tcu::TestStatus::pass("Result comparison and counter values are OK");
   2272 	else if (!compareOk && counterOk)
   2273 		return tcu::TestStatus::fail("Result comparison failed");
   2274 	else if (compareOk && !counterOk)
   2275 		return tcu::TestStatus::fail("Counter value incorrect");
   2276 	else
   2277 		return tcu::TestStatus::fail("Result comparison and counter values are incorrect");
   2278 }
   2279 
   2280 // SSBOLayoutCase.
   2281 
   2282 SSBOLayoutCase::SSBOLayoutCase (tcu::TestContext& testCtx, const char* name, const char* description, BufferMode bufferMode, MatrixLoadFlags matrixLoadFlag)
   2283 	: TestCase			(testCtx, name, description)
   2284 	, m_bufferMode		(bufferMode)
   2285 	, m_matrixLoadFlag	(matrixLoadFlag)
   2286 {
   2287 }
   2288 
   2289 SSBOLayoutCase::~SSBOLayoutCase (void)
   2290 {
   2291 }
   2292 
   2293 void SSBOLayoutCase::initPrograms (vk::SourceCollections& programCollection) const
   2294 {
   2295 	DE_ASSERT(!m_computeShaderSrc.empty());
   2296 
   2297 	if (usesRelaxedLayout(m_interface))
   2298 	{
   2299 		programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc)
   2300 			<< vk::GlslBuildOptions(vk::SPIRV_VERSION_1_0, vk::GlslBuildOptions::FLAG_ALLOW_RELAXED_OFFSETS);
   2301 	}
   2302 	else
   2303 		programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc);
   2304 }
   2305 
   2306 TestInstance* SSBOLayoutCase::createInstance (Context& context) const
   2307 {
   2308 	if (!de::contains(context.getDeviceExtensions().begin(), context.getDeviceExtensions().end(), "VK_KHR_relaxed_block_layout") && usesRelaxedLayout(m_interface))
   2309 		TCU_THROW(NotSupportedError, "VK_KHR_relaxed_block_layout not supported");
   2310 	return new SSBOLayoutCaseInstance(context, m_bufferMode, m_interface, m_refLayout, m_initialData, m_writeData);
   2311 }
   2312 
   2313 void SSBOLayoutCase::init ()
   2314 {
   2315 	computeReferenceLayout	(m_refLayout, m_interface);
   2316 	initRefDataStorage		(m_interface, m_refLayout, m_initialData);
   2317 	initRefDataStorage		(m_interface, m_refLayout, m_writeData);
   2318 	generateValues			(m_refLayout, m_initialData.pointers, deStringHash(getName()) ^ 0xad2f7214);
   2319 	generateValues			(m_refLayout, m_writeData.pointers, deStringHash(getName()) ^ 0x25ca4e7);
   2320 	copyNonWrittenData		(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers);
   2321 
   2322 	m_computeShaderSrc = generateComputeShader(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers, m_matrixLoadFlag);
   2323 }
   2324 
   2325 } // ssbo
   2326 } // vkt
   2327