Home | History | Annotate | Download | only in gl
      1 /*-------------------------------------------------------------------------
      2  * OpenGL Conformance Test Suite
      3  * -----------------------------
      4  *
      5  * Copyright (c) 2014-2016 The Khronos Group Inc.
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  */ /*!
     20  * \file
     21  * \brief
     22  */ /*-------------------------------------------------------------------*/
     23 
     24 #include "gl4cComputeShaderTests.hpp"
     25 #include "glwEnums.hpp"
     26 #include "glwFunctions.hpp"
     27 #include "tcuMatrix.hpp"
     28 #include "tcuMatrixUtil.hpp"
     29 #include "tcuRenderTarget.hpp"
     30 #include <cmath>
     31 #include <cstdarg>
     32 #include <sstream>
     33 
     34 namespace gl4cts
     35 {
     36 
     37 using namespace glw;
     38 using tcu::Vec2;
     39 using tcu::Vec3;
     40 using tcu::Vec4;
     41 using tcu::UVec4;
     42 using tcu::UVec3;
     43 using tcu::Mat4;
     44 
     45 namespace
     46 {
     47 
     48 typedef Vec3  vec2;
     49 typedef Vec3  vec3;
     50 typedef Vec4  vec4;
     51 typedef UVec3 uvec3;
     52 typedef UVec4 uvec4;
     53 typedef Mat4  mat4;
     54 
     55 const char* const kGLSLVer = "#version 430 core\n";
     56 
     57 class ComputeShaderBase : public deqp::SubcaseBase
     58 {
     59 
     60 public:
     61 	virtual ~ComputeShaderBase()
     62 	{
     63 	}
     64 
     65 	ComputeShaderBase()
     66 		: renderTarget(m_context.getRenderContext().getRenderTarget()), pixelFormat(renderTarget.getPixelFormat())
     67 	{
     68 		float epsilon_zero = 1.f / (1 << 13);
     69 		if (pixelFormat.redBits != 0 && pixelFormat.greenBits != 0 && pixelFormat.blueBits != 0 &&
     70 			pixelFormat.alphaBits != 0)
     71 		{
     72 			g_color_eps = vec4(1.f / ((float)(1 << pixelFormat.redBits) - 1.0f),
     73 							   1.f / ((float)(1 << pixelFormat.greenBits) - 1.0f),
     74 							   1.f / ((float)(1 << pixelFormat.blueBits) - 1.0f),
     75 							   1.f / ((float)(1 << pixelFormat.alphaBits) - 1.0f)) +
     76 						  vec4(epsilon_zero);
     77 		}
     78 		else if (pixelFormat.redBits != 0 && pixelFormat.greenBits != 0 && pixelFormat.blueBits != 0)
     79 		{
     80 			g_color_eps = vec4(1.f / ((float)(1 << pixelFormat.redBits) - 1.0f),
     81 							   1.f / ((float)(1 << pixelFormat.greenBits) - 1.0f),
     82 							   1.f / ((float)(1 << pixelFormat.blueBits) - 1.0f), 1.f) +
     83 						  vec4(epsilon_zero);
     84 		}
     85 		else
     86 		{
     87 			g_color_eps = vec4(epsilon_zero);
     88 		}
     89 	}
     90 
     91 	const tcu::RenderTarget& renderTarget;
     92 	const tcu::PixelFormat&  pixelFormat;
     93 	vec4					 g_color_eps;
     94 
     95 	uvec3 IndexTo3DCoord(GLuint idx, GLuint max_x, GLuint max_y)
     96 	{
     97 		const GLuint x = idx % max_x;
     98 		idx /= max_x;
     99 		const GLuint y = idx % max_y;
    100 		idx /= max_y;
    101 		const GLuint z = idx;
    102 		return uvec3(x, y, z);
    103 	}
    104 
    105 	bool CheckProgram(GLuint program, bool* compile_error = NULL)
    106 	{
    107 		GLint compile_status = GL_TRUE;
    108 		GLint status		 = GL_TRUE;
    109 		glGetProgramiv(program, GL_LINK_STATUS, &status);
    110 
    111 		if (status == GL_FALSE)
    112 		{
    113 			GLint attached_shaders;
    114 			glGetProgramiv(program, GL_ATTACHED_SHADERS, &attached_shaders);
    115 
    116 			if (attached_shaders > 0)
    117 			{
    118 				std::vector<GLuint> shaders(attached_shaders);
    119 				glGetAttachedShaders(program, attached_shaders, NULL, &shaders[0]);
    120 
    121 				for (GLint i = 0; i < attached_shaders; ++i)
    122 				{
    123 					GLenum type;
    124 					glGetShaderiv(shaders[i], GL_SHADER_TYPE, reinterpret_cast<GLint*>(&type));
    125 					switch (type)
    126 					{
    127 					case GL_VERTEX_SHADER:
    128 						m_context.getTestContext().getLog()
    129 							<< tcu::TestLog::Message << "*** Vertex Shader ***" << tcu::TestLog::EndMessage;
    130 						break;
    131 					case GL_TESS_CONTROL_SHADER:
    132 						m_context.getTestContext().getLog()
    133 							<< tcu::TestLog::Message << "*** Tessellation Control Shader ***"
    134 							<< tcu::TestLog::EndMessage;
    135 						break;
    136 					case GL_TESS_EVALUATION_SHADER:
    137 						m_context.getTestContext().getLog()
    138 							<< tcu::TestLog::Message << "*** Tessellation Evaluation Shader ***"
    139 							<< tcu::TestLog::EndMessage;
    140 						break;
    141 					case GL_GEOMETRY_SHADER:
    142 						m_context.getTestContext().getLog()
    143 							<< tcu::TestLog::Message << "*** Geometry Shader ***" << tcu::TestLog::EndMessage;
    144 						break;
    145 					case GL_FRAGMENT_SHADER:
    146 						m_context.getTestContext().getLog()
    147 							<< tcu::TestLog::Message << "*** Fragment Shader ***" << tcu::TestLog::EndMessage;
    148 						break;
    149 					case GL_COMPUTE_SHADER:
    150 						m_context.getTestContext().getLog()
    151 							<< tcu::TestLog::Message << "*** Compute Shader ***" << tcu::TestLog::EndMessage;
    152 						break;
    153 					default:
    154 						m_context.getTestContext().getLog()
    155 							<< tcu::TestLog::Message << "*** Unknown Shader ***" << tcu::TestLog::EndMessage;
    156 						break;
    157 					}
    158 
    159 					GLint res;
    160 					glGetShaderiv(shaders[i], GL_COMPILE_STATUS, &res);
    161 					if (res != GL_TRUE)
    162 						compile_status = res;
    163 
    164 					GLint length;
    165 					glGetShaderiv(shaders[i], GL_SHADER_SOURCE_LENGTH, &length);
    166 					if (length > 0)
    167 					{
    168 						std::vector<GLchar> source(length);
    169 						glGetShaderSource(shaders[i], length, NULL, &source[0]);
    170 						m_context.getTestContext().getLog()
    171 							<< tcu::TestLog::Message << &source[0] << tcu::TestLog::EndMessage;
    172 					}
    173 
    174 					glGetShaderiv(shaders[i], GL_INFO_LOG_LENGTH, &length);
    175 					if (length > 0)
    176 					{
    177 						std::vector<GLchar> log(length);
    178 						glGetShaderInfoLog(shaders[i], length, NULL, &log[0]);
    179 						m_context.getTestContext().getLog()
    180 							<< tcu::TestLog::Message << &log[0] << tcu::TestLog::EndMessage;
    181 					}
    182 				}
    183 			}
    184 
    185 			GLint length;
    186 			glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length);
    187 			if (length > 0)
    188 			{
    189 				std::vector<GLchar> log(length);
    190 				glGetProgramInfoLog(program, length, NULL, &log[0]);
    191 				m_context.getTestContext().getLog() << tcu::TestLog::Message << &log[0] << tcu::TestLog::EndMessage;
    192 			}
    193 		}
    194 
    195 		if (compile_error)
    196 			*compile_error = (compile_status == GL_TRUE ? false : true);
    197 		if (compile_status != GL_TRUE)
    198 			return false;
    199 		return status == GL_TRUE ? true : false;
    200 	}
    201 
    202 	GLuint CreateComputeProgram(const std::string& cs)
    203 	{
    204 		const GLuint p = glCreateProgram();
    205 
    206 		if (!cs.empty())
    207 		{
    208 			const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
    209 			glAttachShader(p, sh);
    210 			glDeleteShader(sh);
    211 			const char* const src[2] = { kGLSLVer, cs.c_str() };
    212 			glShaderSource(sh, 2, src, NULL);
    213 			glCompileShader(sh);
    214 		}
    215 
    216 		return p;
    217 	}
    218 
    219 	GLuint CreateProgram(const std::string& vs, const std::string& fs)
    220 	{
    221 		const GLuint p = glCreateProgram();
    222 
    223 		if (!vs.empty())
    224 		{
    225 			const GLuint sh = glCreateShader(GL_VERTEX_SHADER);
    226 			glAttachShader(p, sh);
    227 			glDeleteShader(sh);
    228 			const char* const src[2] = { kGLSLVer, vs.c_str() };
    229 			glShaderSource(sh, 2, src, NULL);
    230 			glCompileShader(sh);
    231 		}
    232 		if (!fs.empty())
    233 		{
    234 			const GLuint sh = glCreateShader(GL_FRAGMENT_SHADER);
    235 			glAttachShader(p, sh);
    236 			glDeleteShader(sh);
    237 			const char* const src[2] = { kGLSLVer, fs.c_str() };
    238 			glShaderSource(sh, 2, src, NULL);
    239 			glCompileShader(sh);
    240 		}
    241 
    242 		return p;
    243 	}
    244 
    245 	GLuint BuildShaderProgram(GLenum type, const std::string& source)
    246 	{
    247 		const char* const src[2] = { kGLSLVer, source.c_str() };
    248 		return glCreateShaderProgramv(type, 2, src);
    249 	}
    250 
    251 	GLfloat distance(GLfloat p0, GLfloat p1)
    252 	{
    253 		return de::abs(p0 - p1);
    254 	}
    255 
    256 	inline bool ColorEqual(const vec4& c0, const vec4& c1, const vec4& epsilon)
    257 	{
    258 		if (distance(c0.x(), c1.x()) > epsilon.x())
    259 			return false;
    260 		if (distance(c0.y(), c1.y()) > epsilon.y())
    261 			return false;
    262 		if (distance(c0.z(), c1.z()) > epsilon.z())
    263 			return false;
    264 		if (distance(c0.w(), c1.w()) > epsilon.w())
    265 			return false;
    266 		return true;
    267 	}
    268 
    269 	inline bool ColorEqual(const vec3& c0, const vec3& c1, const vec4& epsilon)
    270 	{
    271 		if (distance(c0.x(), c1.x()) > epsilon.x())
    272 			return false;
    273 		if (distance(c0.y(), c1.y()) > epsilon.y())
    274 			return false;
    275 		if (distance(c0.z(), c1.z()) > epsilon.z())
    276 			return false;
    277 		return true;
    278 	}
    279 
    280 	bool ValidateReadBuffer(int x, int y, int w, int h, const vec4& expected)
    281 	{
    282 		std::vector<vec4> display(w * h);
    283 		glReadPixels(x, y, w, h, GL_RGBA, GL_FLOAT, &display[0]);
    284 
    285 		for (int j = 0; j < h; ++j)
    286 		{
    287 			for (int i = 0; i < w; ++i)
    288 			{
    289 				if (!ColorEqual(display[j * w + i], expected, g_color_eps))
    290 				{
    291 					m_context.getTestContext().getLog()
    292 						<< tcu::TestLog::Message << "Color at (" << (x + i) << ", " << (y + j) << ") is ["
    293 						<< display[j * w + i].x() << ", " << display[j * w + i].y() << ", " << display[j * w + i].z()
    294 						<< ", " << display[j * w + i].w() << "] should be [" << expected.x() << ", " << expected.y()
    295 						<< ", " << expected.z() << ", " << expected.w() << "]." << tcu::TestLog::EndMessage;
    296 					return false;
    297 				}
    298 			}
    299 		}
    300 
    301 		return true;
    302 	}
    303 
    304 	bool ValidateReadBufferCenteredQuad(int width, int height, const vec3& expected)
    305 	{
    306 		bool			  result = true;
    307 		std::vector<vec3> fb(width * height);
    308 		glReadPixels(0, 0, width, height, GL_RGB, GL_FLOAT, &fb[0]);
    309 
    310 		int startx = int(((float)width * 0.1f) + 1);
    311 		int starty = int(((float)height * 0.1f) + 1);
    312 		int endx   = int((float)width - 2 * (((float)width * 0.1f) + 1) - 1);
    313 		int endy   = int((float)height - 2 * (((float)height * 0.1f) + 1) - 1);
    314 
    315 		for (int y = starty; y < endy; ++y)
    316 		{
    317 			for (int x = startx; x < endx; ++x)
    318 			{
    319 				const int idx = y * width + x;
    320 				if (!ColorEqual(fb[idx], expected, g_color_eps))
    321 				{
    322 					return false;
    323 				}
    324 			}
    325 		}
    326 
    327 		if (!ColorEqual(fb[2 * width + 2], vec3(0), g_color_eps))
    328 		{
    329 			result = false;
    330 		}
    331 		if (!ColorEqual(fb[2 * width + (width - 3)], vec3(0), g_color_eps))
    332 		{
    333 			result = false;
    334 		}
    335 		if (!ColorEqual(fb[(height - 3) * width + (width - 3)], vec3(0), g_color_eps))
    336 		{
    337 			result = false;
    338 		}
    339 		if (!ColorEqual(fb[(height - 3) * width + 2], vec3(0), g_color_eps))
    340 		{
    341 			result = false;
    342 		}
    343 
    344 		return result;
    345 	}
    346 
    347 	int getWindowWidth()
    348 	{
    349 		return renderTarget.getWidth();
    350 	}
    351 
    352 	int getWindowHeight()
    353 	{
    354 		return renderTarget.getHeight();
    355 	}
    356 
    357 	bool ValidateWindow4Quads(const vec3& lb, const vec3& rb, const vec3& rt, const vec3& lt)
    358 	{
    359 		int				  width  = 100;
    360 		int				  height = 100;
    361 		std::vector<vec3> fb(width * height);
    362 		glReadPixels(0, 0, width, height, GL_RGB, GL_FLOAT, &fb[0]);
    363 
    364 		bool status = true;
    365 
    366 		// left-bottom quad
    367 		for (int y = 10; y < height / 2 - 10; ++y)
    368 		{
    369 			for (int x = 10; x < width / 2 - 10; ++x)
    370 			{
    371 				const int idx = y * width + x;
    372 				if (!ColorEqual(fb[idx], lb, g_color_eps))
    373 				{
    374 					m_context.getTestContext().getLog()
    375 						<< tcu::TestLog::Message << "First bad color (" << x << ", " << y << "): " << fb[idx].x() << " "
    376 						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
    377 					status = false;
    378 				}
    379 			}
    380 		}
    381 		// right-bottom quad
    382 		for (int y = 10; y < height / 2 - 10; ++y)
    383 		{
    384 			for (int x = width / 2 + 10; x < width - 10; ++x)
    385 			{
    386 				const int idx = y * width + x;
    387 				if (!ColorEqual(fb[idx], rb, g_color_eps))
    388 				{
    389 					m_context.getTestContext().getLog()
    390 						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
    391 						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
    392 					status = false;
    393 				}
    394 			}
    395 		}
    396 		// right-top quad
    397 		for (int y = height / 2 + 10; y < height - 10; ++y)
    398 		{
    399 			for (int x = width / 2 + 10; x < width - 10; ++x)
    400 			{
    401 				const int idx = y * width + x;
    402 				if (!ColorEqual(fb[idx], rt, g_color_eps))
    403 				{
    404 					m_context.getTestContext().getLog()
    405 						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
    406 						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
    407 					status = false;
    408 				}
    409 			}
    410 		}
    411 		// left-top quad
    412 		for (int y = height / 2 + 10; y < height - 10; ++y)
    413 		{
    414 			for (int x = 10; x < width / 2 - 10; ++x)
    415 			{
    416 				const int idx = y * width + x;
    417 				if (!ColorEqual(fb[idx], lt, g_color_eps))
    418 				{
    419 					m_context.getTestContext().getLog()
    420 						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
    421 						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
    422 					status = false;
    423 				}
    424 			}
    425 		}
    426 		// middle horizontal line should be black
    427 		for (int y = height / 2 - 2; y < height / 2 + 2; ++y)
    428 		{
    429 			for (int x = 0; x < width; ++x)
    430 			{
    431 				const int idx = y * width + x;
    432 				if (!ColorEqual(fb[idx], vec3(0), g_color_eps))
    433 				{
    434 					m_context.getTestContext().getLog()
    435 						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
    436 						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
    437 					status = false;
    438 				}
    439 			}
    440 		}
    441 		// middle vertical line should be black
    442 		for (int y = 0; y < height; ++y)
    443 		{
    444 			for (int x = width / 2 - 2; x < width / 2 + 2; ++x)
    445 			{
    446 				const int idx = y * width + x;
    447 				if (!ColorEqual(fb[idx], vec3(0), g_color_eps))
    448 				{
    449 					m_context.getTestContext().getLog()
    450 						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
    451 						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
    452 					status = false;
    453 				}
    454 			}
    455 		}
    456 
    457 		return status;
    458 	}
    459 
    460 	bool IsEqual(vec4 a, vec4 b)
    461 	{
    462 		return (a.x() == b.x()) && (a.y() == b.y()) && (a.z() == b.z()) && (a.w() == b.w());
    463 	}
    464 
    465 	bool IsEqual(uvec4 a, uvec4 b)
    466 	{
    467 		return (a.x() == b.x()) && (a.y() == b.y()) && (a.z() == b.z()) && (a.w() == b.w());
    468 	}
    469 };
    470 
    471 class SimpleCompute : public ComputeShaderBase
    472 {
    473 
    474 	virtual std::string Title()
    475 	{
    476 		return "Simplest possible Compute Shader";
    477 	}
    478 
    479 	virtual std::string Purpose()
    480 	{
    481 		return "1. Verify that CS can be created, compiled and linked.\n"
    482 			   "2. Verify that local work size can be queried with GetProgramiv command.\n"
    483 			   "3. Verify that CS can be dispatched with DispatchCompute command.\n"
    484 			   "4. Verify that CS can write to SSBO.";
    485 	}
    486 
    487 	virtual std::string Method()
    488 	{
    489 		return "Create and dispatch CS. Verify SSBO content.";
    490 	}
    491 
    492 	virtual std::string PassCriteria()
    493 	{
    494 		return "Everything works as expected.";
    495 	}
    496 
    497 	GLuint m_program;
    498 	GLuint m_buffer;
    499 
    500 	virtual long Setup()
    501 	{
    502 
    503 		const char* const glsl_cs =
    504 			NL "layout(local_size_x = 1, local_size_y = 1) in;" NL "layout(std430) buffer Output {" NL "  vec4 data;" NL
    505 			   "} g_out;" NL "void main() {" NL "  g_out.data = vec4(1.0, 2.0, 3.0, 4.0);" NL "}";
    506 		m_program = CreateComputeProgram(glsl_cs);
    507 		glLinkProgram(m_program);
    508 		if (!CheckProgram(m_program))
    509 			return ERROR;
    510 
    511 		GLint v[3];
    512 		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
    513 		if (v[0] != 1 || v[1] != 1 || v[2] != 1)
    514 		{
    515 			m_context.getTestContext().getLog()
    516 				<< tcu::TestLog::Message << "Got " << v[0] << ", " << v[1] << ", " << v[2]
    517 				<< ", expected: 1, 1, 1 in GL_COMPUTE_WORK_GROUP_SIZE check" << tcu::TestLog::EndMessage;
    518 			return ERROR;
    519 		}
    520 
    521 		glGenBuffers(1, &m_buffer);
    522 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
    523 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), NULL, GL_DYNAMIC_DRAW);
    524 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
    525 
    526 		return NO_ERROR;
    527 	}
    528 
    529 	virtual long Run()
    530 	{
    531 		glUseProgram(m_program);
    532 		glDispatchCompute(1, 1, 1);
    533 
    534 		vec4* data;
    535 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer);
    536 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
    537 		data	   = static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), GL_MAP_READ_BIT));
    538 		long error = NO_ERROR;
    539 		if (!IsEqual(data[0], vec4(1.0f, 2.0f, 3.0f, 4.0f)))
    540 		{
    541 			error = ERROR;
    542 		}
    543 		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
    544 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
    545 		return error;
    546 	}
    547 
    548 	virtual long Cleanup()
    549 	{
    550 		glUseProgram(0);
    551 		glDeleteProgram(m_program);
    552 		glDeleteBuffers(1, &m_buffer);
    553 		return NO_ERROR;
    554 	}
    555 };
    556 
    557 class BasicOneWorkGroup : public ComputeShaderBase
    558 {
    559 
    560 	virtual std::string Title()
    561 	{
    562 		return "One work group with various local sizes";
    563 	}
    564 
    565 	virtual std::string Purpose()
    566 	{
    567 		return NL "1. Verify that declared local work size has correct effect." NL
    568 				  "2. Verify that the number of shader invocations is correct." NL
    569 				  "3. Verify that the built-in variables: gl_WorkGroupSize, gl_WorkGroupID, gl_GlobalInvocationID," NL
    570 				  "    gl_LocalInvocationID and gl_LocalInvocationIndex has correct values." NL
    571 				  "4. Verify that DispatchCompute and DispatchComputeIndirect commands work as expected.";
    572 	}
    573 
    574 	virtual std::string Method()
    575 	{
    576 		return NL "1. Create several CS with various local sizes." NL
    577 				  "2. Dispatch each CS with DispatchCompute and DispatchComputeIndirect commands." NL
    578 				  "3. Verify SSBO content.";
    579 	}
    580 
    581 	virtual std::string PassCriteria()
    582 	{
    583 		return "Everything works as expected.";
    584 	}
    585 
    586 	GLuint m_program;
    587 	GLuint m_storage_buffer;
    588 	GLuint m_dispatch_buffer;
    589 
    590 	std::string GenSource(int x, int y, int z, GLuint binding)
    591 	{
    592 		std::stringstream ss;
    593 		ss << NL "layout(local_size_x = " << x << ", local_size_y = " << y << ", local_size_z = " << z
    594 		   << ") in;" NL "layout(std430, binding = " << binding
    595 		   << ") buffer Output {" NL "  uvec4 local_id[];" NL "} g_out;" NL "void main() {" NL
    596 			  "  if (gl_WorkGroupSize == uvec3("
    597 		   << x << ", " << y << ", " << z
    598 		   << ") && gl_WorkGroupID == uvec3(0) &&" NL "      gl_GlobalInvocationID == gl_LocalInvocationID) {" NL
    599 			  "    g_out.local_id[gl_LocalInvocationIndex] = uvec4(gl_LocalInvocationID, 0);" NL "  } else {" NL
    600 			  "    g_out.local_id[gl_LocalInvocationIndex] = uvec4(0xffff);" NL "  }" NL "}";
    601 		return ss.str();
    602 	}
    603 
    604 	bool RunIteration(int local_size_x, int local_size_y, int local_size_z, GLuint binding, bool dispatch_indirect)
    605 	{
    606 		if (m_program != 0)
    607 			glDeleteProgram(m_program);
    608 		m_program = CreateComputeProgram(GenSource(local_size_x, local_size_y, local_size_z, binding));
    609 		glLinkProgram(m_program);
    610 		if (!CheckProgram(m_program))
    611 			return false;
    612 
    613 		GLint v[3];
    614 		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
    615 		if (v[0] != local_size_x || v[1] != local_size_y || v[2] != local_size_z)
    616 		{
    617 			m_context.getTestContext().getLog()
    618 				<< tcu::TestLog::Message << "GL_COMPUTE_LOCAL_WORK_SIZE is (" << v[0] << " " << v[1] << " " << v[2]
    619 				<< ") should be (" << local_size_x << " " << local_size_y << " " << local_size_z << ")"
    620 				<< tcu::TestLog::EndMessage;
    621 			return false;
    622 		}
    623 
    624 		const int kSize = local_size_x * local_size_y * local_size_z;
    625 
    626 		if (m_storage_buffer == 0)
    627 			glGenBuffers(1, &m_storage_buffer);
    628 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding, m_storage_buffer);
    629 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * kSize, NULL, GL_DYNAMIC_DRAW);
    630 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
    631 
    632 		glUseProgram(m_program);
    633 		if (dispatch_indirect)
    634 		{
    635 			const GLuint num_groups[3] = { 1, 1, 1 };
    636 			if (m_dispatch_buffer == 0)
    637 				glGenBuffers(1, &m_dispatch_buffer);
    638 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
    639 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), num_groups, GL_STATIC_DRAW);
    640 			glDispatchComputeIndirect(0);
    641 		}
    642 		else
    643 		{
    644 			glDispatchCompute(1, 1, 1);
    645 		}
    646 
    647 		uvec4* data;
    648 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
    649 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
    650 		data =
    651 			static_cast<uvec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, kSize * sizeof(uvec4), GL_MAP_READ_BIT));
    652 
    653 		bool ret = true;
    654 
    655 		for (int z = 0; z < local_size_z; ++z)
    656 		{
    657 			for (int y = 0; y < local_size_y; ++y)
    658 			{
    659 				for (int x = 0; x < local_size_x; ++x)
    660 				{
    661 					const int index = z * local_size_x * local_size_y + y * local_size_x + x;
    662 					if (!IsEqual(data[index], uvec4(x, y, z, 0)))
    663 					{
    664 						m_context.getTestContext().getLog()
    665 							<< tcu::TestLog::Message << "Invalid data at offset " << index << tcu::TestLog::EndMessage;
    666 						ret = false;
    667 					}
    668 				}
    669 			}
    670 		}
    671 		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
    672 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
    673 		return ret;
    674 	}
    675 
    676 	virtual long Setup()
    677 	{
    678 		m_program		  = 0;
    679 		m_storage_buffer  = 0;
    680 		m_dispatch_buffer = 0;
    681 		return NO_ERROR;
    682 	}
    683 
    684 	virtual long Run()
    685 	{
    686 		if (!RunIteration(16, 1, 1, 0, true))
    687 			return ERROR;
    688 		if (!RunIteration(8, 8, 1, 1, false))
    689 			return ERROR;
    690 		if (!RunIteration(4, 4, 4, 2, true))
    691 			return ERROR;
    692 		if (!RunIteration(1, 2, 3, 3, false))
    693 			return ERROR;
    694 		if (!RunIteration(1024, 1, 1, 3, true))
    695 			return ERROR;
    696 		if (!RunIteration(16, 8, 8, 3, false))
    697 			return ERROR;
    698 		if (!RunIteration(32, 1, 32, 7, true))
    699 			return ERROR;
    700 		return NO_ERROR;
    701 	}
    702 
    703 	virtual long Cleanup()
    704 	{
    705 		glUseProgram(0);
    706 		glDeleteProgram(m_program);
    707 		glDeleteBuffers(1, &m_storage_buffer);
    708 		glDeleteBuffers(1, &m_dispatch_buffer);
    709 		return NO_ERROR;
    710 	}
    711 };
    712 
    713 class BasicResourceUBO : public ComputeShaderBase
    714 {
    715 
    716 	virtual std::string Title()
    717 	{
    718 		return "Compute Shader resources - UBOs";
    719 	}
    720 
    721 	virtual std::string Purpose()
    722 	{
    723 		return "Verify that CS is able to read data from UBOs and write it to SSBO.";
    724 	}
    725 
    726 	virtual std::string Method()
    727 	{
    728 		return NL "1. Create CS which uses array of UBOs." NL
    729 				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
    730 				  "3. Read data from each UBO and write it to SSBO." NL "4. Verify SSBO content." NL
    731 				  "5. Repeat for different buffer and CS work sizes.";
    732 	}
    733 
    734 	virtual std::string PassCriteria()
    735 	{
    736 		return "Everything works as expected.";
    737 	}
    738 
    739 	GLuint m_program;
    740 	GLuint m_storage_buffer;
    741 	GLuint m_uniform_buffer[12];
    742 	GLuint m_dispatch_buffer;
    743 
    744 	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
    745 	{
    746 		const uvec3		  global_size = local_size * num_groups;
    747 		std::stringstream ss;
    748 		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
    749 		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
    750 		   << ", " << global_size.y() << ", " << global_size.z()
    751 		   << ");" NL "layout(std140) uniform InputBuffer {" NL "  vec4 data["
    752 		   << global_size.x() * global_size.y() * global_size.z()
    753 		   << "];" NL "} g_in_buffer[12];" NL "layout(std430) buffer OutputBuffer {" NL "  vec4 data0["
    754 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data1["
    755 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data2["
    756 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data3["
    757 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data4["
    758 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data5["
    759 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data6["
    760 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data7["
    761 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data8["
    762 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data9["
    763 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data10["
    764 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data11["
    765 		   << global_size.x() * global_size.y() * global_size.z()
    766 		   << "];" NL "} g_out_buffer;" NL "void main() {" NL "  const uint global_index = gl_GlobalInvocationID.x +" NL
    767 			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
    768 			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
    769 			  "  g_out_buffer.data0[global_index] = g_in_buffer[0].data[global_index];" NL
    770 			  "  g_out_buffer.data1[global_index] = g_in_buffer[1].data[global_index];" NL
    771 			  "  g_out_buffer.data2[global_index] = g_in_buffer[2].data[global_index];" NL
    772 			  "  g_out_buffer.data3[global_index] = g_in_buffer[3].data[global_index];" NL
    773 			  "  g_out_buffer.data4[global_index] = g_in_buffer[4].data[global_index];" NL
    774 			  "  g_out_buffer.data5[global_index] = g_in_buffer[5].data[global_index];" NL
    775 			  "  g_out_buffer.data6[global_index] = g_in_buffer[6].data[global_index];" NL
    776 			  "  g_out_buffer.data7[global_index] = g_in_buffer[7].data[global_index];" NL
    777 			  "  g_out_buffer.data8[global_index] = g_in_buffer[8].data[global_index];" NL
    778 			  "  g_out_buffer.data9[global_index] = g_in_buffer[9].data[global_index];" NL
    779 			  "  g_out_buffer.data10[global_index] = g_in_buffer[10].data[global_index];" NL
    780 			  "  g_out_buffer.data11[global_index] = g_in_buffer[11].data[global_index];" NL "}";
    781 		return ss.str();
    782 	}
    783 
    784 	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
    785 	{
    786 		if (m_program != 0)
    787 			glDeleteProgram(m_program);
    788 		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
    789 		glLinkProgram(m_program);
    790 		if (!CheckProgram(m_program))
    791 			return false;
    792 
    793 		for (GLuint i = 0; i < 12; ++i)
    794 		{
    795 			char name[32];
    796 			sprintf(name, "InputBuffer[%u]", i);
    797 			const GLuint index = glGetUniformBlockIndex(m_program, name);
    798 			glUniformBlockBinding(m_program, index, i);
    799 			GLint p = 0;
    800 			glGetActiveUniformBlockiv(m_program, index, GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER, &p);
    801 			if (p == GL_FALSE)
    802 			{
    803 				m_context.getTestContext().getLog()
    804 					<< tcu::TestLog::Message << "UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER should be TRUE."
    805 					<< tcu::TestLog::EndMessage;
    806 				return false;
    807 			}
    808 		}
    809 
    810 		const GLuint kBufferSize =
    811 			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
    812 
    813 		if (m_storage_buffer == 0)
    814 			glGenBuffers(1, &m_storage_buffer);
    815 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
    816 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize * 12, NULL, GL_DYNAMIC_DRAW);
    817 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
    818 
    819 		if (m_uniform_buffer[0] == 0)
    820 			glGenBuffers(12, m_uniform_buffer);
    821 		for (GLuint i = 0; i < 12; ++i)
    822 		{
    823 			std::vector<vec4> data(kBufferSize);
    824 			for (GLuint j = 0; j < kBufferSize; ++j)
    825 			{
    826 				data[j] = vec4(static_cast<float>(i * kBufferSize + j));
    827 			}
    828 			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
    829 			glBufferData(GL_UNIFORM_BUFFER, sizeof(vec4) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
    830 		}
    831 		glBindBuffer(GL_UNIFORM_BUFFER, 0);
    832 
    833 		glUseProgram(m_program);
    834 		if (dispatch_indirect)
    835 		{
    836 			if (m_dispatch_buffer == 0)
    837 				glGenBuffers(1, &m_dispatch_buffer);
    838 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
    839 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
    840 			glDispatchComputeIndirect(0);
    841 		}
    842 		else
    843 		{
    844 			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
    845 		}
    846 
    847 		std::vector<vec4> data(kBufferSize * 12);
    848 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
    849 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
    850 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * kBufferSize * 12, &data[0]);
    851 
    852 		for (GLuint z = 0; z < local_size.z() * num_groups.z(); ++z)
    853 		{
    854 			for (GLuint y = 0; y < local_size.y() * num_groups.y(); ++y)
    855 			{
    856 				for (GLuint x = 0; x < local_size.x() * num_groups.x(); ++x)
    857 				{
    858 					const GLuint index = z * local_size.x() * num_groups.x() * local_size.y() * num_groups.y() +
    859 										 y * local_size.x() * num_groups.x() + x;
    860 					for (int i = 0; i < 1; ++i)
    861 					{
    862 						if (!IsEqual(data[index * 12 + i], vec4(static_cast<float>(index * 12 + i))))
    863 						{
    864 							m_context.getTestContext().getLog() << tcu::TestLog::Message << "Incorrect data at offset "
    865 																<< index * 12 + i << "." << tcu::TestLog::EndMessage;
    866 							return false;
    867 						}
    868 					}
    869 				}
    870 			}
    871 		}
    872 		return true;
    873 	}
    874 
    875 	virtual long Setup()
    876 	{
    877 		m_program		 = 0;
    878 		m_storage_buffer = 0;
    879 		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
    880 		m_dispatch_buffer = 0;
    881 		return NO_ERROR;
    882 	}
    883 
    884 	virtual long Run()
    885 	{
    886 		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
    887 			return ERROR;
    888 		if (!RunIteration(uvec3(2, 2, 2), uvec3(2, 2, 2), true))
    889 			return ERROR;
    890 		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
    891 			return ERROR;
    892 		return NO_ERROR;
    893 	}
    894 
    895 	virtual long Cleanup()
    896 	{
    897 		glUseProgram(0);
    898 		glDeleteProgram(m_program);
    899 		glDeleteBuffers(1, &m_storage_buffer);
    900 		glDeleteBuffers(12, m_uniform_buffer);
    901 		glDeleteBuffers(1, &m_dispatch_buffer);
    902 		return NO_ERROR;
    903 	}
    904 };
    905 
    906 class BasicResourceTexture : public ComputeShaderBase
    907 {
    908 
    909 	virtual std::string Title()
    910 	{
    911 		return NL "Compute Shader resources - Textures";
    912 	}
    913 
    914 	virtual std::string Purpose()
    915 	{
    916 		return NL "Verify that texture access works correctly in CS.";
    917 	}
    918 
    919 	virtual std::string Method()
    920 	{
    921 		return NL "1. Create CS which uses all sampler types (sampler1D, sampler2D, sampler3D, sampler2DRect," NL
    922 				  "    sampler1DArray, sampler2DArray, samplerBuffer, sampler2DMS, sampler2DMSArray)." NL
    923 				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
    924 				  "3. Sample each texture and write sampled value to SSBO." NL "4. Verify SSBO content." NL
    925 				  "5. Repeat for different texture and CS work sizes.";
    926 	}
    927 
    928 	virtual std::string PassCriteria()
    929 	{
    930 		return NL "Everything works as expected.";
    931 	}
    932 
    933 	GLuint m_program;
    934 	GLuint m_storage_buffer;
    935 	GLuint m_texture[9];
    936 	GLuint m_texture_buffer;
    937 	GLuint m_dispatch_buffer;
    938 
    939 	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
    940 	{
    941 		const uvec3		  global_size = local_size * num_groups;
    942 		std::stringstream ss;
    943 		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
    944 		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
    945 		   << ", " << global_size.y() << ", " << global_size.z()
    946 		   << ");" NL "uniform sampler1D g_sampler0;" NL "uniform sampler2D g_sampler1;" NL
    947 			  "uniform sampler3D g_sampler2;" NL "uniform sampler2DRect g_sampler3;" NL
    948 			  "uniform sampler1DArray g_sampler4;" NL "uniform sampler2DArray g_sampler5;" NL
    949 			  "uniform samplerBuffer g_sampler6;" NL "uniform sampler2DMS g_sampler7;" NL
    950 			  "uniform sampler2DMSArray g_sampler8;" NL "layout(std430) buffer OutputBuffer {" NL "  vec4 data0["
    951 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data1["
    952 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data2["
    953 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data3["
    954 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data4["
    955 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data5["
    956 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data6["
    957 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data7["
    958 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data8["
    959 		   << global_size.x() * global_size.y() * global_size.z()
    960 		   << "];" NL "} g_out_buffer;" NL "void main() {" NL "  const uint global_index = gl_GlobalInvocationID.x +" NL
    961 			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
    962 			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
    963 			  "  g_out_buffer.data0[global_index] = texelFetch(g_sampler0, int(gl_GlobalInvocationID), 0);" NL
    964 			  "  g_out_buffer.data1[global_index] = texture(g_sampler1, vec2(gl_GlobalInvocationID) / "
    965 			  "vec2(kGlobalSize));" NL "  g_out_buffer.data2[global_index] = textureProj(g_sampler2, "
    966 			  "vec4(vec3(gl_GlobalInvocationID) / vec3(kGlobalSize), 1.0));" NL
    967 			  "  g_out_buffer.data3[global_index] = textureProjOffset(g_sampler3, vec3(vec2(gl_GlobalInvocationID), "
    968 			  "1.0), ivec2(0));" NL "  g_out_buffer.data4[global_index] = textureLodOffset(g_sampler4, "
    969 			  "vec2(gl_GlobalInvocationID.x / kGlobalSize.x, gl_GlobalInvocationID.y), 0.0, "
    970 			  "0);" NL "  g_out_buffer.data5[global_index] = texelFetchOffset(g_sampler5, "
    971 			  "ivec3(gl_GlobalInvocationID), 0, ivec2(0));" NL
    972 			  "  g_out_buffer.data6[global_index] = texelFetch(g_sampler6, int(global_index));" NL
    973 			  "  g_out_buffer.data7[global_index] = texelFetch(g_sampler7, ivec2(gl_GlobalInvocationID), 1);" NL
    974 			  "  g_out_buffer.data8[global_index] = texelFetch(g_sampler8, ivec3(gl_GlobalInvocationID), 2);" NL "}";
    975 		return ss.str();
    976 	}
    977 
    978 	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
    979 	{
    980 		if (m_program != 0)
    981 			glDeleteProgram(m_program);
    982 		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
    983 		glLinkProgram(m_program);
    984 		if (!CheckProgram(m_program))
    985 			return false;
    986 
    987 		glUseProgram(m_program);
    988 		for (int i = 0; i < 9; ++i)
    989 		{
    990 			char name[32];
    991 			sprintf(name, "g_sampler%d", i);
    992 			glUniform1i(glGetUniformLocation(m_program, name), i);
    993 		}
    994 		glUseProgram(0);
    995 
    996 		const GLuint kBufferSize =
    997 			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
    998 		const GLint kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
    999 		const GLint kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
   1000 		const GLint kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
   1001 
   1002 		std::vector<vec4> buffer_data(kBufferSize * 9);
   1003 		if (m_storage_buffer == 0)
   1004 			glGenBuffers(1, &m_storage_buffer);
   1005 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   1006 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize * 9, &buffer_data[0], GL_DYNAMIC_DRAW);
   1007 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   1008 
   1009 		std::vector<vec4> texture_data(kBufferSize, vec4(123.0f));
   1010 		if (m_texture[0] == 0)
   1011 			glGenTextures(9, m_texture);
   1012 		if (m_texture_buffer == 0)
   1013 			glGenBuffers(1, &m_texture_buffer);
   1014 
   1015 		glActiveTexture(GL_TEXTURE0);
   1016 		glBindTexture(GL_TEXTURE_1D, m_texture[0]);
   1017 		glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1018 		glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1019 		glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, kWidth, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
   1020 
   1021 		glActiveTexture(GL_TEXTURE1);
   1022 		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
   1023 		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1024 		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1025 		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
   1026 
   1027 		glActiveTexture(GL_TEXTURE2);
   1028 		glBindTexture(GL_TEXTURE_3D, m_texture[2]);
   1029 		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1030 		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1031 		glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA32F, kWidth, kHeight, kDepth, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
   1032 
   1033 		glActiveTexture(GL_TEXTURE3);
   1034 		glBindTexture(GL_TEXTURE_RECTANGLE, m_texture[3]);
   1035 		glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1036 		glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1037 		glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
   1038 
   1039 		glActiveTexture(GL_TEXTURE4);
   1040 		glBindTexture(GL_TEXTURE_1D_ARRAY, m_texture[4]);
   1041 		glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1042 		glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1043 		glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
   1044 
   1045 		glActiveTexture(GL_TEXTURE5);
   1046 		glBindTexture(GL_TEXTURE_2D_ARRAY, m_texture[5]);
   1047 		glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1048 		glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1049 		glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, kWidth, kHeight, kDepth, 0, GL_RGBA, GL_FLOAT,
   1050 					 &texture_data[0]);
   1051 
   1052 		glActiveTexture(GL_TEXTURE6);
   1053 		glBindBuffer(GL_TEXTURE_BUFFER, m_texture_buffer);
   1054 		glBufferData(GL_TEXTURE_BUFFER, kBufferSize * sizeof(vec4), &texture_data[0], GL_DYNAMIC_DRAW);
   1055 		glBindBuffer(GL_TEXTURE_BUFFER, 0);
   1056 		glBindTexture(GL_TEXTURE_BUFFER, m_texture[6]);
   1057 		glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, m_texture_buffer);
   1058 
   1059 		glActiveTexture(GL_TEXTURE7);
   1060 		glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, m_texture[7]);
   1061 		glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, 4, GL_RGBA32F, kWidth, kHeight, GL_FALSE);
   1062 
   1063 		glActiveTexture(GL_TEXTURE8);
   1064 		glBindTexture(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, m_texture[8]);
   1065 		glTexImage3DMultisample(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, 4, GL_RGBA32F, kWidth, kHeight, kDepth, GL_FALSE);
   1066 
   1067 		// clear MS textures
   1068 		GLuint fbo;
   1069 		glGenFramebuffers(1, &fbo);
   1070 		glBindFramebuffer(GL_FRAMEBUFFER, fbo);
   1071 		glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_texture[7], 0);
   1072 		glClearBufferfv(GL_COLOR, 0, &vec4(123.0f)[0]);
   1073 		glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_texture[8], 0);
   1074 		glClearBufferfv(GL_COLOR, 0, &vec4(123.0f)[0]);
   1075 		glDeleteFramebuffers(1, &fbo);
   1076 
   1077 		glUseProgram(m_program);
   1078 		if (dispatch_indirect)
   1079 		{
   1080 			if (m_dispatch_buffer == 0)
   1081 				glGenBuffers(1, &m_dispatch_buffer);
   1082 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   1083 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
   1084 			glDispatchComputeIndirect(0);
   1085 		}
   1086 		else
   1087 		{
   1088 			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
   1089 		}
   1090 
   1091 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   1092 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   1093 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * kBufferSize * 9, &buffer_data[0]);
   1094 		for (GLuint index = 0; index < kBufferSize * 9; ++index)
   1095 		{
   1096 			if (!IsEqual(buffer_data[index], vec4(123.0f)))
   1097 			{
   1098 				m_context.getTestContext().getLog()
   1099 					<< tcu::TestLog::Message << "Incorrect data at index " << index << "." << tcu::TestLog::EndMessage;
   1100 				return false;
   1101 			}
   1102 		}
   1103 		return true;
   1104 	}
   1105 
   1106 	virtual long Setup()
   1107 	{
   1108 		m_program		 = 0;
   1109 		m_storage_buffer = 0;
   1110 		memset(m_texture, 0, sizeof(m_texture));
   1111 		m_texture_buffer  = 0;
   1112 		m_dispatch_buffer = 0;
   1113 		return NO_ERROR;
   1114 	}
   1115 
   1116 	virtual long Run()
   1117 	{
   1118 		if (!RunIteration(uvec3(4, 4, 4), uvec3(8, 1, 1), false))
   1119 			return ERROR;
   1120 		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), true))
   1121 			return ERROR;
   1122 		if (!RunIteration(uvec3(2, 2, 2), uvec3(2, 2, 2), false))
   1123 			return ERROR;
   1124 		return NO_ERROR;
   1125 	}
   1126 
   1127 	virtual long Cleanup()
   1128 	{
   1129 		glActiveTexture(GL_TEXTURE0);
   1130 		glUseProgram(0);
   1131 		glDeleteProgram(m_program);
   1132 		glDeleteBuffers(1, &m_storage_buffer);
   1133 		glDeleteTextures(9, m_texture);
   1134 		glDeleteBuffers(1, &m_texture_buffer);
   1135 		glDeleteBuffers(1, &m_dispatch_buffer);
   1136 		return NO_ERROR;
   1137 	}
   1138 };
   1139 
   1140 class BasicResourceImage : public ComputeShaderBase
   1141 {
   1142 
   1143 	virtual std::string Title()
   1144 	{
   1145 		return NL "Compute Shader resources - Images";
   1146 	}
   1147 
   1148 	virtual std::string Purpose()
   1149 	{
   1150 		return NL "Verify that reading/writing GPU memory via image variables work as expected.";
   1151 	}
   1152 
   1153 	virtual std::string Method()
   1154 	{
   1155 		return NL "1. Create CS which uses two image2D variables to read and write underlying GPU memory." NL
   1156 				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
   1157 				  "3. Verify memory content." NL "4. Repeat for different texture and CS work sizes.";
   1158 	}
   1159 
   1160 	virtual std::string PassCriteria()
   1161 	{
   1162 		return NL "Everything works as expected.";
   1163 	}
   1164 
   1165 	GLuint m_program;
   1166 	GLuint m_draw_program;
   1167 	GLuint m_texture[2];
   1168 	GLuint m_dispatch_buffer;
   1169 	GLuint m_vertex_array;
   1170 
   1171 	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
   1172 	{
   1173 		const uvec3		  global_size = local_size * num_groups;
   1174 		std::stringstream ss;
   1175 		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
   1176 		   << ", local_size_z = " << local_size.z()
   1177 		   << ") in;" NL "layout(rgba32f) coherent uniform image2D g_image1;" NL
   1178 			  "layout(rgba32f) uniform image2D g_image2;" NL "const uvec3 kGlobalSize = uvec3("
   1179 		   << global_size.x() << ", " << global_size.y() << ", " << global_size.z()
   1180 		   << ");" NL "void main() {" NL
   1181 			  "  if (gl_GlobalInvocationID.x >= kGlobalSize.x || gl_GlobalInvocationID.y >= kGlobalSize.y) return;" NL
   1182 			  "  vec4 color = vec4(gl_GlobalInvocationID.x + gl_GlobalInvocationID.y) / 255.0;" NL
   1183 			  "  imageStore(g_image1, ivec2(gl_GlobalInvocationID), color);" NL
   1184 			  "  vec4 c = imageLoad(g_image1, ivec2(gl_GlobalInvocationID));" NL
   1185 			  "  imageStore(g_image2, ivec2(gl_GlobalInvocationID), c);" NL "}";
   1186 		return ss.str();
   1187 	}
   1188 
   1189 	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
   1190 	{
   1191 		if (m_program != 0)
   1192 			glDeleteProgram(m_program);
   1193 		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
   1194 		glLinkProgram(m_program);
   1195 		if (!CheckProgram(m_program))
   1196 			return false;
   1197 
   1198 		glUseProgram(m_program);
   1199 		glUniform1i(glGetUniformLocation(m_program, "g_image1"), 0);
   1200 		glUniform1i(glGetUniformLocation(m_program, "g_image2"), 1);
   1201 		glUseProgram(0);
   1202 
   1203 		const GLint  kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
   1204 		const GLint  kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
   1205 		const GLint  kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
   1206 		const GLuint kSize   = kWidth * kHeight * kDepth;
   1207 
   1208 		std::vector<vec4> data(kSize);
   1209 		if (m_texture[0] == 0)
   1210 			glGenTextures(2, m_texture);
   1211 
   1212 		for (int i = 0; i < 2; ++i)
   1213 		{
   1214 			glBindTexture(GL_TEXTURE_2D, m_texture[i]);
   1215 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
   1216 			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &data[0]);
   1217 		}
   1218 		glBindTexture(GL_TEXTURE_2D, 0);
   1219 
   1220 		glBindImageTexture(0, m_texture[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
   1221 		glBindImageTexture(1, m_texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
   1222 		glUseProgram(m_program);
   1223 		if (dispatch_indirect)
   1224 		{
   1225 			if (m_dispatch_buffer == 0)
   1226 				glGenBuffers(1, &m_dispatch_buffer);
   1227 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   1228 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
   1229 			glDispatchComputeIndirect(0);
   1230 		}
   1231 		else
   1232 		{
   1233 			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
   1234 		}
   1235 		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
   1236 
   1237 		glClear(GL_COLOR_BUFFER_BIT);
   1238 		glActiveTexture(GL_TEXTURE0);
   1239 		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
   1240 		glActiveTexture(GL_TEXTURE1);
   1241 		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
   1242 		glUseProgram(m_draw_program);
   1243 		glBindVertexArray(m_vertex_array);
   1244 		glViewport(0, 0, kWidth, kHeight);
   1245 		glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, 1);
   1246 
   1247 		std::vector<vec4> display(kWidth * kHeight);
   1248 		glReadPixels(0, 0, kWidth, kHeight, GL_RGBA, GL_FLOAT, &display[0]);
   1249 
   1250 		for (int y = 0; y < kHeight; ++y)
   1251 		{
   1252 			for (int x = 0; x < kWidth; ++x)
   1253 			{
   1254 				if (y >= getWindowHeight() || x >= getWindowWidth())
   1255 				{
   1256 					continue;
   1257 				}
   1258 				const vec4 c = vec4(float(y + x) / 255.0f);
   1259 				if (!ColorEqual(display[y * kWidth + x], c, g_color_eps))
   1260 				{
   1261 					m_context.getTestContext().getLog()
   1262 						<< tcu::TestLog::Message << "Got " << display[y * kWidth + x].x() << ", "
   1263 						<< display[y * kWidth + x].y() << ", " << display[y * kWidth + x].z() << ", "
   1264 						<< display[y * kWidth + x].w() << ", expected " << c.x() << ", " << c.y() << ", " << c.z()
   1265 						<< ", " << c.w() << " at " << x << ", " << y << tcu::TestLog::EndMessage;
   1266 					return false;
   1267 				}
   1268 			}
   1269 		}
   1270 
   1271 		return true;
   1272 	}
   1273 
   1274 	virtual long Setup()
   1275 	{
   1276 		m_program = 0;
   1277 		memset(m_texture, 0, sizeof(m_texture));
   1278 		m_dispatch_buffer = 0;
   1279 		return NO_ERROR;
   1280 	}
   1281 
   1282 	virtual long Run()
   1283 	{
   1284 
   1285 		const char* const glsl_vs =
   1286 			NL "out StageData {" NL "  vec2 texcoord;" NL "} vs_out;" NL
   1287 			   "const vec2 g_quad[] = vec2[](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));" NL "void main() {" NL
   1288 			   "  gl_Position = vec4(g_quad[gl_VertexID], 0, 1);" NL
   1289 			   "  vs_out.texcoord = 0.5 + 0.5 * g_quad[gl_VertexID];" NL "}";
   1290 
   1291 		const char* glsl_fs =
   1292 			NL "in StageData {" NL "  vec2 texcoord;" NL "} fs_in;" NL "layout(location = 0) out vec4 o_color;" NL
   1293 			   "uniform sampler2D g_image1;" NL "uniform sampler2D g_image2;" NL "void main() {" NL
   1294 			   "  vec4 c1 = texture(g_image1, fs_in.texcoord);" NL "  vec4 c2 = texture(g_image2, fs_in.texcoord);" NL
   1295 			   "  if (c1 == c2) o_color = c1;" NL "  else o_color = vec4(1, 0, 0, 1);" NL "}";
   1296 
   1297 		m_draw_program = CreateProgram(glsl_vs, glsl_fs);
   1298 		glLinkProgram(m_draw_program);
   1299 		if (!CheckProgram(m_draw_program))
   1300 			return ERROR;
   1301 
   1302 		glUseProgram(m_draw_program);
   1303 		glUniform1i(glGetUniformLocation(m_draw_program, "g_image1"), 0);
   1304 		glUniform1i(glGetUniformLocation(m_draw_program, "g_image2"), 1);
   1305 		glUseProgram(0);
   1306 
   1307 		glGenVertexArrays(1, &m_vertex_array);
   1308 
   1309 		if (!pixelFormat.alphaBits)
   1310 		{
   1311 			m_context.getTestContext().getLog()
   1312 				<< tcu::TestLog::Message << "Test requires default framebuffer alpha bits" << tcu::TestLog::EndMessage;
   1313 			return NO_ERROR;
   1314 		}
   1315 
   1316 		if (!RunIteration(uvec3(8, 16, 1), uvec3(8, 4, 1), true))
   1317 			return ERROR;
   1318 		if (!RunIteration(uvec3(4, 32, 1), uvec3(16, 2, 1), false))
   1319 			return ERROR;
   1320 		if (!RunIteration(uvec3(16, 4, 1), uvec3(4, 16, 1), false))
   1321 			return ERROR;
   1322 		if (!RunIteration(uvec3(8, 8, 1), uvec3(8, 8, 1), true))
   1323 			return ERROR;
   1324 
   1325 		return NO_ERROR;
   1326 	}
   1327 
   1328 	virtual long Cleanup()
   1329 	{
   1330 		glUseProgram(0);
   1331 		glDeleteProgram(m_program);
   1332 		glDeleteProgram(m_draw_program);
   1333 		glDeleteVertexArrays(1, &m_vertex_array);
   1334 		glDeleteTextures(2, m_texture);
   1335 		glDeleteBuffers(1, &m_dispatch_buffer);
   1336 		glViewport(0, 0, getWindowWidth(), getWindowHeight());
   1337 		return NO_ERROR;
   1338 	}
   1339 };
   1340 
   1341 class BasicResourceAtomicCounter : public ComputeShaderBase
   1342 {
   1343 
   1344 	virtual std::string Title()
   1345 	{
   1346 		return "Compute Shader resources - Atomic Counters";
   1347 	}
   1348 
   1349 	virtual std::string Purpose()
   1350 	{
   1351 		return NL
   1352 			"1. Verify that Atomic Counters work as expected in CS." NL
   1353 			"2. Verify that built-in functions: atomicCounterIncrement and atomicCounterDecrement work correctly." NL
   1354 			"3. Verify that GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER is accepted by" NL
   1355 			"    GetActiveAtomicCounterBufferiv command.";
   1356 	}
   1357 
   1358 	virtual std::string Method()
   1359 	{
   1360 		return NL
   1361 			"1. Create CS which uses two atomic_uint variables." NL
   1362 			"2. In CS write values returned by atomicCounterIncrement and atomicCounterDecrement functions to SSBO." NL
   1363 			"3. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL "4. Verify SSBO content." NL
   1364 			"5. Repeat for different buffer and CS work sizes.";
   1365 	}
   1366 
   1367 	virtual std::string PassCriteria()
   1368 	{
   1369 		return "Everything works as expected.";
   1370 	}
   1371 
   1372 	GLuint m_program;
   1373 	GLuint m_storage_buffer;
   1374 	GLuint m_counter_buffer[2];
   1375 	GLuint m_dispatch_buffer;
   1376 
   1377 	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
   1378 	{
   1379 		const uvec3		  global_size = local_size * num_groups;
   1380 		std::stringstream ss;
   1381 		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
   1382 		   << ", local_size_z = " << local_size.z()
   1383 		   << ") in;" NL "layout(std430, binding = 0) buffer Output {" NL "  uint inc_data["
   1384 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uint dec_data["
   1385 		   << global_size.x() * global_size.y() * global_size.z()
   1386 		   << "];" NL "};" NL "layout(binding = 0, offset = 0) uniform atomic_uint g_inc_counter;" NL
   1387 			  "layout(binding = 1, offset = 0) uniform atomic_uint g_dec_counter;" NL "void main() {" NL
   1388 			  "  const uint index = atomicCounterIncrement(g_inc_counter);" NL "  inc_data[index] = index;" NL
   1389 			  "  dec_data[index] = atomicCounterDecrement(g_dec_counter);" NL "}";
   1390 		return ss.str();
   1391 	}
   1392 
   1393 	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
   1394 	{
   1395 		if (m_program != 0)
   1396 			glDeleteProgram(m_program);
   1397 		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
   1398 		glLinkProgram(m_program);
   1399 		if (!CheckProgram(m_program))
   1400 			return false;
   1401 
   1402 		GLint p[2] = { 0 };
   1403 		glGetActiveAtomicCounterBufferiv(m_program, 0, GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER, &p[0]);
   1404 		glGetActiveAtomicCounterBufferiv(m_program, 1, GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER, &p[1]);
   1405 
   1406 		if (p[0] == GL_FALSE || p[1] == GL_FALSE)
   1407 		{
   1408 			m_context.getTestContext().getLog()
   1409 				<< tcu::TestLog::Message << "ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER should be TRUE."
   1410 				<< tcu::TestLog::EndMessage;
   1411 			return false;
   1412 		}
   1413 
   1414 		const GLint  kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
   1415 		const GLint  kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
   1416 		const GLint  kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
   1417 		const GLuint kSize   = kWidth * kHeight * kDepth;
   1418 
   1419 		if (m_storage_buffer == 0)
   1420 			glGenBuffers(1, &m_storage_buffer);
   1421 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   1422 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kSize * 2, NULL, GL_DYNAMIC_DRAW);
   1423 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   1424 
   1425 		if (m_counter_buffer[0] == 0)
   1426 			glGenBuffers(2, m_counter_buffer);
   1427 
   1428 		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counter_buffer[0]);
   1429 		glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), NULL, GL_STREAM_DRAW);
   1430 		*static_cast<GLuint*>(glMapBuffer(GL_ATOMIC_COUNTER_BUFFER, GL_WRITE_ONLY)) = 0;
   1431 		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
   1432 
   1433 		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 1, m_counter_buffer[1]);
   1434 		glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), NULL, GL_STREAM_DRAW);
   1435 		*static_cast<GLuint*>(glMapBuffer(GL_ATOMIC_COUNTER_BUFFER, GL_WRITE_ONLY)) = kSize;
   1436 		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
   1437 
   1438 		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0);
   1439 
   1440 		glUseProgram(m_program);
   1441 		if (dispatch_indirect)
   1442 		{
   1443 			if (m_dispatch_buffer == 0)
   1444 				glGenBuffers(1, &m_dispatch_buffer);
   1445 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   1446 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
   1447 			glDispatchComputeIndirect(0);
   1448 		}
   1449 		else
   1450 		{
   1451 			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
   1452 		}
   1453 
   1454 		std::vector<GLuint> data(kSize);
   1455 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   1456 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   1457 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kSize, &data[0]);
   1458 
   1459 		for (GLuint i = 0; i < kSize; ++i)
   1460 		{
   1461 			if (data[i] != i)
   1462 			{
   1463 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Value at index " << i << " is "
   1464 													<< data[i] << " should be " << i << "." << tcu::TestLog::EndMessage;
   1465 				return false;
   1466 			}
   1467 		}
   1468 
   1469 		GLuint value;
   1470 		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counter_buffer[0]);
   1471 		glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &value);
   1472 		if (value != kSize)
   1473 		{
   1474 			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Final atomic counter value (buffer 0) is "
   1475 												<< value << " should be " << kSize << "." << tcu::TestLog::EndMessage;
   1476 			return false;
   1477 		}
   1478 
   1479 		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counter_buffer[1]);
   1480 		glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &value);
   1481 		if (value != 0)
   1482 		{
   1483 			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Final atomic counter value (buffer 1) is "
   1484 												<< value << " should be 0." << tcu::TestLog::EndMessage;
   1485 			return false;
   1486 		}
   1487 
   1488 		return true;
   1489 	}
   1490 
   1491 	virtual long Setup()
   1492 	{
   1493 		m_program		 = 0;
   1494 		m_storage_buffer = 0;
   1495 		memset(m_counter_buffer, 0, sizeof(m_counter_buffer));
   1496 		m_dispatch_buffer = 0;
   1497 		return NO_ERROR;
   1498 	}
   1499 
   1500 	virtual long Run()
   1501 	{
   1502 		if (!RunIteration(uvec3(4, 3, 2), uvec3(2, 3, 4), false))
   1503 			return ERROR;
   1504 		if (!RunIteration(uvec3(1, 1, 1), uvec3(1, 1, 1), true))
   1505 			return ERROR;
   1506 		if (!RunIteration(uvec3(1, 6, 1), uvec3(1, 1, 8), false))
   1507 			return ERROR;
   1508 		if (!RunIteration(uvec3(4, 1, 2), uvec3(10, 3, 4), true))
   1509 			return ERROR;
   1510 		return NO_ERROR;
   1511 	}
   1512 
   1513 	virtual long Cleanup()
   1514 	{
   1515 		glUseProgram(0);
   1516 		glDeleteProgram(m_program);
   1517 		glDeleteBuffers(2, m_counter_buffer);
   1518 		glDeleteBuffers(1, &m_dispatch_buffer);
   1519 		glDeleteBuffers(1, &m_storage_buffer);
   1520 		return NO_ERROR;
   1521 	}
   1522 };
   1523 
   1524 class BasicResourceSubroutine : public ComputeShaderBase
   1525 {
   1526 
   1527 	virtual std::string Title()
   1528 	{
   1529 		return "Compute Shader resources - Subroutines";
   1530 	}
   1531 
   1532 	virtual std::string Purpose()
   1533 	{
   1534 		return NL "1. Verify that subroutines work as expected in CS." NL
   1535 				  "2. Verify that subroutines array can be indexed with gl_WorkGroupID built-in variable." NL
   1536 				  "3. Verify that atomicCounterIncrement, imageLoad and texelFetch functions" NL
   1537 				  "    work as expected when called in CS from subroutine.";
   1538 	}
   1539 
   1540 	virtual std::string Method()
   1541 	{
   1542 		return NL "1. Create CS which uses array of subroutines." NL
   1543 				  "2. In CS index subroutine array with gl_WorkGroupID built-in variable." NL
   1544 				  "3. In each subroutine load data from SSBO0 and write it to SSBO1." NL
   1545 				  "3. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
   1546 				  "4. Verify SSBO1 content." NL "5. Repeat for different buffer and CS work sizes.";
   1547 	}
   1548 
   1549 	virtual std::string PassCriteria()
   1550 	{
   1551 		return "Everything works as expected.";
   1552 	}
   1553 
   1554 	GLuint m_program;
   1555 	GLuint m_atomic_counter_buffer;
   1556 	GLuint m_storage_buffer[2];
   1557 	GLuint m_buffer[2];
   1558 	GLuint m_texture_buffer[2];
   1559 
   1560 	virtual long Setup()
   1561 	{
   1562 		m_program				= 0;
   1563 		m_atomic_counter_buffer = 0;
   1564 		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
   1565 		memset(m_buffer, 0, sizeof(m_buffer));
   1566 		memset(m_texture_buffer, 0, sizeof(m_texture_buffer));
   1567 		return NO_ERROR;
   1568 	}
   1569 
   1570 	virtual long Run()
   1571 	{
   1572 		const char* const glsl_cs =
   1573 			NL "layout(local_size_x = 16) in;" NL "layout(binding = 1, std430) buffer Input {" NL "  uvec4 data[16];" NL
   1574 			   "} g_input;" NL "layout(std430, binding = 0) buffer Output {" NL "  uvec4 g_output[64];" NL "};" NL
   1575 			   "subroutine void ComputeType();" NL "subroutine uniform ComputeType Compute[4];" NL
   1576 			   "layout(binding = 0, offset = 0) uniform atomic_uint g_atomic_counter;" NL
   1577 			   "layout(rgba32ui) readonly uniform uimageBuffer g_image_buffer;" NL
   1578 			   "uniform usamplerBuffer g_sampler_buffer;" NL "subroutine(ComputeType)" NL "void Compute0() {" NL
   1579 			   "  const uint index = atomicCounterIncrement(g_atomic_counter);" NL
   1580 			   "  g_output[index] = uvec4(index);" NL "}" NL "subroutine(ComputeType)" NL "void Compute1() {" NL
   1581 			   "  g_output[gl_GlobalInvocationID.x] = g_input.data[gl_LocalInvocationIndex];" NL "}" NL
   1582 			   "subroutine(ComputeType)" NL "void Compute2() {" NL
   1583 			   "  g_output[gl_GlobalInvocationID.x] = imageLoad(g_image_buffer, int(gl_LocalInvocationIndex));" NL
   1584 			   "}" NL "subroutine(ComputeType)" NL "void Compute3() {" NL
   1585 			   "  g_output[gl_GlobalInvocationID.x] = texelFetch(g_sampler_buffer, int(gl_LocalInvocationIndex));" NL
   1586 			   "}" NL "void main() {" NL "  Compute[gl_WorkGroupID.x]();" NL "}";
   1587 		m_program = CreateComputeProgram(glsl_cs);
   1588 		glLinkProgram(m_program);
   1589 		if (!CheckProgram(m_program))
   1590 			return ERROR;
   1591 
   1592 		glGenBuffers(2, m_storage_buffer);
   1593 		/* output buffer */
   1594 		{
   1595 			std::vector<uvec4> data(64, uvec4(0xffff));
   1596 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
   1597 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * 64, &data[0], GL_DYNAMIC_DRAW);
   1598 		}
   1599 		/* input buffer */
   1600 		{
   1601 			std::vector<uvec4> data(16);
   1602 			for (GLuint i = 0; i < 16; ++i)
   1603 				data[i]   = uvec4(i + 16);
   1604 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
   1605 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * 16, &data[0], GL_DYNAMIC_DRAW);
   1606 		}
   1607 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   1608 
   1609 		glGenBuffers(1, &m_atomic_counter_buffer);
   1610 		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_atomic_counter_buffer);
   1611 		glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), NULL, GL_STREAM_DRAW);
   1612 		*static_cast<GLuint*>(glMapBuffer(GL_ATOMIC_COUNTER_BUFFER, GL_WRITE_ONLY)) = 0;
   1613 		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
   1614 
   1615 		glGenBuffers(2, m_buffer);
   1616 		/* image buffer */
   1617 		{
   1618 			std::vector<uvec4> data(16);
   1619 			for (GLuint i = 0; i < 16; ++i)
   1620 				data[i]   = uvec4(i + 32);
   1621 			glBindBuffer(GL_TEXTURE_BUFFER, m_buffer[0]);
   1622 			glBufferData(GL_TEXTURE_BUFFER, sizeof(uvec4) * 16, &data[0], GL_STATIC_DRAW);
   1623 		}
   1624 		/* texture buffer */
   1625 		{
   1626 			std::vector<uvec4> data(16);
   1627 			for (GLuint i = 0; i < 16; ++i)
   1628 				data[i]   = uvec4(i + 48);
   1629 			glBindBuffer(GL_TEXTURE_BUFFER, m_buffer[1]);
   1630 			glBufferData(GL_TEXTURE_BUFFER, sizeof(uvec4) * 16, &data[0], GL_STATIC_DRAW);
   1631 		}
   1632 		glBindBuffer(GL_TEXTURE_BUFFER, 0);
   1633 
   1634 		glGenTextures(2, m_texture_buffer);
   1635 		glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer[0]);
   1636 		glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32UI, m_buffer[0]);
   1637 		glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer[1]);
   1638 		glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32UI, m_buffer[1]);
   1639 		glBindTexture(GL_TEXTURE_BUFFER, 0);
   1640 
   1641 		const GLuint index_compute0 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Compute0");
   1642 		const GLuint index_compute1 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Compute1");
   1643 		const GLuint index_compute2 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Compute2");
   1644 		const GLuint index_compute3 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Compute3");
   1645 		const GLint  loc_compute0   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "Compute[0]");
   1646 		const GLint  loc_compute1   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "Compute[1]");
   1647 		const GLint  loc_compute2   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "Compute[2]");
   1648 		const GLint  loc_compute3   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "Compute[3]");
   1649 
   1650 		// bind resources
   1651 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
   1652 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
   1653 		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_atomic_counter_buffer);
   1654 		glBindImageTexture(0, m_texture_buffer[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32UI);
   1655 		glActiveTexture(GL_TEXTURE0);
   1656 		glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer[1]);
   1657 
   1658 		glUseProgram(m_program);
   1659 
   1660 		// setup subroutines
   1661 		GLuint indices[4];
   1662 		indices[loc_compute0] = index_compute0;
   1663 		indices[loc_compute1] = index_compute1;
   1664 		indices[loc_compute2] = index_compute2;
   1665 		indices[loc_compute3] = index_compute3;
   1666 		glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 4, indices);
   1667 
   1668 		glDispatchCompute(4, 1, 1);
   1669 
   1670 		std::vector<uvec4> data(64);
   1671 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
   1672 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   1673 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(uvec4) * 64, &data[0]);
   1674 
   1675 		for (GLuint i = 0; i < 64; ++i)
   1676 		{
   1677 			if (!IsEqual(data[i], uvec4(i)))
   1678 			{
   1679 				m_context.getTestContext().getLog()
   1680 					<< tcu::TestLog::Message << "Invalid value at index " << i << "." << tcu::TestLog::EndMessage;
   1681 				return ERROR;
   1682 			}
   1683 		}
   1684 
   1685 		GLuint value;
   1686 		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_atomic_counter_buffer);
   1687 		glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &value);
   1688 		if (value != 16)
   1689 		{
   1690 			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Final atomic counter value is " << value
   1691 												<< " should be 16." << tcu::TestLog::EndMessage;
   1692 			return ERROR;
   1693 		}
   1694 
   1695 		return NO_ERROR;
   1696 	}
   1697 
   1698 	virtual long Cleanup()
   1699 	{
   1700 		glUseProgram(0);
   1701 		glDeleteProgram(m_program);
   1702 		glDeleteBuffers(1, &m_atomic_counter_buffer);
   1703 		glDeleteBuffers(2, m_storage_buffer);
   1704 		glDeleteBuffers(2, m_buffer);
   1705 		glDeleteTextures(2, m_texture_buffer);
   1706 		return NO_ERROR;
   1707 	}
   1708 };
   1709 
   1710 class BasicResourceUniform : public ComputeShaderBase
   1711 {
   1712 
   1713 	virtual std::string Title()
   1714 	{
   1715 		return "Compute Shader resources - Uniforms";
   1716 	}
   1717 
   1718 	virtual std::string Purpose()
   1719 	{
   1720 		return NL "1. Verify that all types of uniform variables work as expected in CS." NL
   1721 				  "2. Verify that uniform variables can be updated with Uniform* and ProgramUniform* commands." NL
   1722 				  "3. Verify that re-linking CS program works as expected.";
   1723 	}
   1724 
   1725 	virtual std::string Method()
   1726 	{
   1727 		return NL "1. Create CS which uses all (single precision and integer) types of uniform variables." NL
   1728 				  "2. Update uniform variables with ProgramUniform* commands." NL
   1729 				  "3. Verify that uniform variables were updated correctly." NL "4. Re-link CS program." NL
   1730 				  "5. Update uniform variables with Uniform* commands." NL
   1731 				  "6. Verify that uniform variables were updated correctly.";
   1732 	}
   1733 
   1734 	virtual std::string PassCriteria()
   1735 	{
   1736 		return "Everything works as expected.";
   1737 	}
   1738 
   1739 	GLuint m_program;
   1740 	GLuint m_storage_buffer;
   1741 
   1742 	virtual long Setup()
   1743 	{
   1744 		m_program		 = 0;
   1745 		m_storage_buffer = 0;
   1746 		return NO_ERROR;
   1747 	}
   1748 
   1749 	virtual long Run()
   1750 	{
   1751 		const char* const glsl_cs = NL
   1752 			"layout(local_size_x = 1) in;" NL "buffer Result {" NL "  int g_result;" NL "};" NL "uniform float g_0;" NL
   1753 			"uniform vec2 g_1;" NL "uniform vec3 g_2;" NL "uniform vec4 g_3;" NL "uniform mat2 g_4;" NL
   1754 			"uniform mat2x3 g_5;" NL "uniform mat2x4 g_6;" NL "uniform mat3x2 g_7;" NL "uniform mat3 g_8;" NL
   1755 			"uniform mat3x4 g_9;" NL "uniform mat4x2 g_10;" NL "uniform mat4x3 g_11;" NL "uniform mat4 g_12;" NL
   1756 			"uniform int g_13;" NL "uniform ivec2 g_14;" NL "uniform ivec3 g_15;" NL "uniform ivec4 g_16;" NL
   1757 			"uniform uint g_17;" NL "uniform uvec2 g_18;" NL "uniform uvec3 g_19;" NL "uniform uvec4 g_20;" NL NL
   1758 			"void main() {" NL "  g_result = 1;" NL NL "  if (g_0 != 1.0) g_result = 0;" NL
   1759 			"  if (g_1 != vec2(2.0, 3.0)) g_result = 0;" NL "  if (g_2 != vec3(4.0, 5.0, 6.0)) g_result = 0;" NL
   1760 			"  if (g_3 != vec4(7.0, 8.0, 9.0, 10.0)) g_result = 0;" NL NL
   1761 			"  if (g_4 != mat2(11.0, 12.0, 13.0, 14.0)) g_result = 0;" NL
   1762 			"  if (g_5 != mat2x3(15.0, 16.0, 17.0, 18.0, 19.0, 20.0)) g_result = 0;" NL
   1763 			"  if (g_6 != mat2x4(21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0)) g_result = 0;" NL NL
   1764 			"  if (g_7 != mat3x2(29.0, 30.0, 31.0, 32.0, 33.0, 34.0)) g_result = 0;" NL
   1765 			"  if (g_8 != mat3(35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0)) g_result = 0;" NL
   1766 			"  if (g_9 != mat3x4(44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0)) g_result = "
   1767 			"0;" NL NL "  if (g_10 != mat4x2(56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0)) g_result = 0;" NL
   1768 			"  if (g_11 != mat4x3(63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0)) g_result = "
   1769 			"0;" NL "  if (g_12 != mat4(75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, "
   1770 			"88.0, 89.0, 90.0)) g_result = 0;" NL NL "  if (g_13 != 91) g_result = 0;" NL
   1771 			"  if (g_14 != ivec2(92, 93)) g_result = 0;" NL "  if (g_15 != ivec3(94, 95, 96)) g_result = 0;" NL
   1772 			"  if (g_16 != ivec4(97, 98, 99, 100)) g_result = 0;" NL NL "  if (g_17 != 101u) g_result = 0;" NL
   1773 			"  if (g_18 != uvec2(102u, 103u)) g_result = 0;" NL
   1774 			"  if (g_19 != uvec3(104u, 105u, 106u)) g_result = 0;" NL
   1775 			"  if (g_20 != uvec4(107u, 108u, 109u, 110u)) g_result = 0;" NL "}";
   1776 		m_program = CreateComputeProgram(glsl_cs);
   1777 		glLinkProgram(m_program);
   1778 		if (!CheckProgram(m_program))
   1779 			return ERROR;
   1780 
   1781 		glGenBuffers(1, &m_storage_buffer);
   1782 		/* create buffer */
   1783 		{
   1784 			const int data = 123;
   1785 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   1786 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
   1787 		}
   1788 
   1789 		glProgramUniform1f(m_program, glGetUniformLocation(m_program, "g_0"), 1.0f);
   1790 		glProgramUniform2f(m_program, glGetUniformLocation(m_program, "g_1"), 2.0f, 3.0f);
   1791 		glProgramUniform3f(m_program, glGetUniformLocation(m_program, "g_2"), 4.0f, 5.0f, 6.0f);
   1792 		glProgramUniform4f(m_program, glGetUniformLocation(m_program, "g_3"), 7.0f, 8.0f, 9.0f, 10.0f);
   1793 
   1794 		/* mat2 */
   1795 		{
   1796 			const GLfloat value[4] = { 11.0f, 12.0f, 13.0f, 14.0f };
   1797 			glProgramUniformMatrix2fv(m_program, glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
   1798 		}
   1799 		/* mat2x3 */
   1800 		{
   1801 			const GLfloat value[6] = { 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f };
   1802 			glProgramUniformMatrix2x3fv(m_program, glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
   1803 		}
   1804 		/* mat2x4 */
   1805 		{
   1806 			const GLfloat value[8] = { 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
   1807 			glProgramUniformMatrix2x4fv(m_program, glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
   1808 		}
   1809 
   1810 		/* mat3x2 */
   1811 		{
   1812 			const GLfloat value[6] = { 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f };
   1813 			glProgramUniformMatrix3x2fv(m_program, glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
   1814 		}
   1815 		/* mat3 */
   1816 		{
   1817 			const GLfloat value[9] = { 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f };
   1818 			glProgramUniformMatrix3fv(m_program, glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
   1819 		}
   1820 		/* mat3x4 */
   1821 		{
   1822 			const GLfloat value[12] = { 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
   1823 										50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f };
   1824 			glProgramUniformMatrix3x4fv(m_program, glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
   1825 		}
   1826 
   1827 		/* mat4x2 */
   1828 		{
   1829 			const GLfloat value[8] = { 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f };
   1830 			glProgramUniformMatrix4x2fv(m_program, glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
   1831 		}
   1832 		/* mat4x3 */
   1833 		{
   1834 			const GLfloat value[12] = {
   1835 				63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 27.0f, 73, 74.0f
   1836 			};
   1837 			glProgramUniformMatrix4x3fv(m_program, glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
   1838 		}
   1839 		/* mat4 */
   1840 		{
   1841 			const GLfloat value[16] = { 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f,
   1842 										83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f };
   1843 			glProgramUniformMatrix4fv(m_program, glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
   1844 		}
   1845 
   1846 		glProgramUniform1i(m_program, glGetUniformLocation(m_program, "g_13"), 91);
   1847 		glProgramUniform2i(m_program, glGetUniformLocation(m_program, "g_14"), 92, 93);
   1848 		glProgramUniform3i(m_program, glGetUniformLocation(m_program, "g_15"), 94, 95, 96);
   1849 		glProgramUniform4i(m_program, glGetUniformLocation(m_program, "g_16"), 97, 98, 99, 100);
   1850 
   1851 		glProgramUniform1ui(m_program, glGetUniformLocation(m_program, "g_17"), 101);
   1852 		glProgramUniform2ui(m_program, glGetUniformLocation(m_program, "g_18"), 102, 103);
   1853 		glProgramUniform3ui(m_program, glGetUniformLocation(m_program, "g_19"), 104, 105, 106);
   1854 		glProgramUniform4ui(m_program, glGetUniformLocation(m_program, "g_20"), 107, 108, 109, 110);
   1855 
   1856 		glUseProgram(m_program);
   1857 		glDispatchCompute(1, 1, 1);
   1858 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   1859 
   1860 		{
   1861 			int data;
   1862 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   1863 			if (data != 1)
   1864 			{
   1865 				m_context.getTestContext().getLog()
   1866 					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
   1867 				return ERROR;
   1868 			}
   1869 		}
   1870 
   1871 		// re-link program (all uniforms will be set to zero)
   1872 		glLinkProgram(m_program);
   1873 
   1874 		{
   1875 			const int data = 123;
   1876 			glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   1877 		}
   1878 
   1879 		glUniform1f(glGetUniformLocation(m_program, "g_0"), 1.0f);
   1880 		glUniform2f(glGetUniformLocation(m_program, "g_1"), 2.0f, 3.0f);
   1881 		glUniform3f(glGetUniformLocation(m_program, "g_2"), 4.0f, 5.0f, 6.0f);
   1882 		glUniform4f(glGetUniformLocation(m_program, "g_3"), 7.0f, 8.0f, 9.0f, 10.0f);
   1883 
   1884 		/* mat2 */
   1885 		{
   1886 			const GLfloat value[4] = { 11.0f, 12.0f, 13.0f, 14.0f };
   1887 			glUniformMatrix2fv(glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
   1888 		}
   1889 		/* mat2x3 */
   1890 		{
   1891 			const GLfloat value[6] = { 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f };
   1892 			glUniformMatrix2x3fv(glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
   1893 		}
   1894 		/* mat2x4 */
   1895 		{
   1896 			const GLfloat value[8] = { 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
   1897 			glUniformMatrix2x4fv(glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
   1898 		}
   1899 
   1900 		/* mat3x2 */
   1901 		{
   1902 			const GLfloat value[6] = { 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f };
   1903 			glUniformMatrix3x2fv(glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
   1904 		}
   1905 		/* mat3 */
   1906 		{
   1907 			const GLfloat value[9] = { 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f };
   1908 			glUniformMatrix3fv(glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
   1909 		}
   1910 		/* mat3x4 */
   1911 		{
   1912 			const GLfloat value[12] = { 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
   1913 										50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f };
   1914 			glUniformMatrix3x4fv(glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
   1915 		}
   1916 
   1917 		/* mat4x2 */
   1918 		{
   1919 			const GLfloat value[8] = { 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f };
   1920 			glUniformMatrix4x2fv(glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
   1921 		}
   1922 		/* mat4x3 */
   1923 		{
   1924 			const GLfloat value[12] = {
   1925 				63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 27.0f, 73, 74.0f
   1926 			};
   1927 			glUniformMatrix4x3fv(glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
   1928 		}
   1929 		/* mat4 */
   1930 		{
   1931 			const GLfloat value[16] = { 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f,
   1932 										83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f };
   1933 			glUniformMatrix4fv(glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
   1934 		}
   1935 
   1936 		glUniform1i(glGetUniformLocation(m_program, "g_13"), 91);
   1937 		glUniform2i(glGetUniformLocation(m_program, "g_14"), 92, 93);
   1938 		glUniform3i(glGetUniformLocation(m_program, "g_15"), 94, 95, 96);
   1939 		glUniform4i(glGetUniformLocation(m_program, "g_16"), 97, 98, 99, 100);
   1940 
   1941 		glUniform1ui(glGetUniformLocation(m_program, "g_17"), 101);
   1942 		glUniform2ui(glGetUniformLocation(m_program, "g_18"), 102, 103);
   1943 		glUniform3ui(glGetUniformLocation(m_program, "g_19"), 104, 105, 106);
   1944 		glUniform4ui(glGetUniformLocation(m_program, "g_20"), 107, 108, 109, 110);
   1945 
   1946 		glDispatchCompute(1, 1, 1);
   1947 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   1948 
   1949 		/* validate */
   1950 		{
   1951 			int data;
   1952 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   1953 			if (data != 1)
   1954 			{
   1955 				m_context.getTestContext().getLog()
   1956 					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
   1957 				return ERROR;
   1958 			}
   1959 		}
   1960 
   1961 		return NO_ERROR;
   1962 	}
   1963 
   1964 	virtual long Cleanup()
   1965 	{
   1966 		glUseProgram(0);
   1967 		glDeleteProgram(m_program);
   1968 		glDeleteBuffers(1, &m_storage_buffer);
   1969 		return NO_ERROR;
   1970 	}
   1971 };
   1972 
   1973 class BasicBuiltinVariables : public ComputeShaderBase
   1974 {
   1975 
   1976 	virtual std::string Title()
   1977 	{
   1978 		return "CS built-in variables";
   1979 	}
   1980 
   1981 	virtual std::string Purpose()
   1982 	{
   1983 		return NL "Verify that all (gl_WorkGroupSize, gl_WorkGroupID, gl_LocalInvocationID," NL
   1984 				  "gl_GlobalInvocationID, gl_NumWorkGroups, gl_WorkGroupSize)" NL
   1985 				  "CS built-in variables has correct values.";
   1986 	}
   1987 
   1988 	virtual std::string Method()
   1989 	{
   1990 		return NL "1. Create CS which writes all built-in variables to SSBO." NL
   1991 				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
   1992 				  "3. Verify SSBO content." NL "4. Repeat for several different local and global work sizes.";
   1993 	}
   1994 
   1995 	virtual std::string PassCriteria()
   1996 	{
   1997 		return "Everything works as expected.";
   1998 	}
   1999 
   2000 	GLuint m_program;
   2001 	GLuint m_storage_buffer;
   2002 	GLuint m_dispatch_buffer;
   2003 
   2004 	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
   2005 	{
   2006 		const uvec3		  global_size = local_size * num_groups;
   2007 		std::stringstream ss;
   2008 		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
   2009 		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
   2010 		   << ", " << global_size.y() << ", " << global_size.z()
   2011 		   << ");" NL "layout(std430) buffer OutputBuffer {" NL "  uvec4 num_work_groups["
   2012 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 work_group_size["
   2013 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 work_group_id["
   2014 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 local_invocation_id["
   2015 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 global_invocation_id["
   2016 		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 local_invocation_index["
   2017 		   << global_size.x() * global_size.y() * global_size.z()
   2018 		   << "];" NL "} g_out_buffer;" NL "void main() {" NL
   2019 			  "  if ((gl_WorkGroupSize * gl_WorkGroupID + gl_LocalInvocationID) != gl_GlobalInvocationID) return;" NL
   2020 			  "  const uint global_index = gl_GlobalInvocationID.x +" NL
   2021 			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
   2022 			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
   2023 			  "  g_out_buffer.num_work_groups[global_index] = uvec4(gl_NumWorkGroups, 0);" NL
   2024 			  "  g_out_buffer.work_group_size[global_index] = uvec4(gl_WorkGroupSize, 0);" NL
   2025 			  "  g_out_buffer.work_group_id[global_index] = uvec4(gl_WorkGroupID, 0);" NL
   2026 			  "  g_out_buffer.local_invocation_id[global_index] = uvec4(gl_LocalInvocationID, 0);" NL
   2027 			  "  g_out_buffer.global_invocation_id[global_index] = uvec4(gl_GlobalInvocationID, 0);" NL
   2028 			  "  g_out_buffer.local_invocation_index[global_index] = uvec4(gl_LocalInvocationIndex);" NL "}";
   2029 		return ss.str();
   2030 	}
   2031 
   2032 	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
   2033 	{
   2034 		if (m_program != 0)
   2035 			glDeleteProgram(m_program);
   2036 		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
   2037 		glLinkProgram(m_program);
   2038 		if (!CheckProgram(m_program))
   2039 			return false;
   2040 
   2041 		const GLuint kBufferSize =
   2042 			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
   2043 
   2044 		std::vector<uvec4> data(kBufferSize * 6);
   2045 		if (m_storage_buffer == 0)
   2046 			glGenBuffers(1, &m_storage_buffer);
   2047 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   2048 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * kBufferSize * 6, &data[0], GL_DYNAMIC_DRAW);
   2049 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   2050 
   2051 		glUseProgram(m_program);
   2052 		if (dispatch_indirect)
   2053 		{
   2054 			if (m_dispatch_buffer == 0)
   2055 				glGenBuffers(1, &m_dispatch_buffer);
   2056 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   2057 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
   2058 			glDispatchComputeIndirect(0);
   2059 		}
   2060 		else
   2061 		{
   2062 			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
   2063 		}
   2064 
   2065 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   2066 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   2067 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(uvec4) * kBufferSize * 6, &data[0]);
   2068 
   2069 		// gl_NumWorkGroups
   2070 		for (GLuint index = 0; index < kBufferSize; ++index)
   2071 		{
   2072 			if (!IsEqual(data[index], uvec4(num_groups.x(), num_groups.y(), num_groups.z(), 0)))
   2073 			{
   2074 				m_context.getTestContext().getLog()
   2075 					<< tcu::TestLog::Message << "gl_NumWorkGroups: Invalid data at index " << index << "."
   2076 					<< tcu::TestLog::EndMessage;
   2077 				return false;
   2078 			}
   2079 		}
   2080 		// gl_WorkGroupSize
   2081 		for (GLuint index = kBufferSize; index < 2 * kBufferSize; ++index)
   2082 		{
   2083 			if (!IsEqual(data[index], uvec4(local_size.x(), local_size.y(), local_size.z(), 0)))
   2084 			{
   2085 				m_context.getTestContext().getLog()
   2086 					<< tcu::TestLog::Message << "gl_WorkGroupSize: Invalid data at index " << index << "."
   2087 					<< tcu::TestLog::EndMessage;
   2088 				return false;
   2089 			}
   2090 		}
   2091 		// gl_WorkGroupID
   2092 		for (GLuint index = 2 * kBufferSize; index < 3 * kBufferSize; ++index)
   2093 		{
   2094 			uvec3 expected = IndexTo3DCoord(index - 2 * kBufferSize, local_size.x() * num_groups.x(),
   2095 											local_size.y() * num_groups.y());
   2096 			expected.x() /= local_size.x();
   2097 			expected.y() /= local_size.y();
   2098 			expected.z() /= local_size.z();
   2099 			if (!IsEqual(data[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
   2100 			{
   2101 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "gl_WorkGroupID: Invalid data at index "
   2102 													<< index << "." << tcu::TestLog::EndMessage;
   2103 				return false;
   2104 			}
   2105 		}
   2106 		// gl_LocalInvocationID
   2107 		for (GLuint index = 3 * kBufferSize; index < 4 * kBufferSize; ++index)
   2108 		{
   2109 			uvec3 expected = IndexTo3DCoord(index - 3 * kBufferSize, local_size.x() * num_groups.x(),
   2110 											local_size.y() * num_groups.y());
   2111 			expected.x() %= local_size.x();
   2112 			expected.y() %= local_size.y();
   2113 			expected.z() %= local_size.z();
   2114 			if (!IsEqual(data[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
   2115 			{
   2116 				m_context.getTestContext().getLog()
   2117 					<< tcu::TestLog::Message << "gl_LocalInvocationID: Invalid data at index " << index << "."
   2118 					<< tcu::TestLog::EndMessage;
   2119 				return false;
   2120 			}
   2121 		}
   2122 		// gl_GlobalInvocationID
   2123 		for (GLuint index = 4 * kBufferSize; index < 5 * kBufferSize; ++index)
   2124 		{
   2125 			uvec3 expected = IndexTo3DCoord(index - 4 * kBufferSize, local_size.x() * num_groups.x(),
   2126 											local_size.y() * num_groups.y());
   2127 			if (!IsEqual(data[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
   2128 			{
   2129 				m_context.getTestContext().getLog()
   2130 					<< tcu::TestLog::Message << "gl_GlobalInvocationID: Invalid data at index " << index << "."
   2131 					<< tcu::TestLog::EndMessage;
   2132 				return false;
   2133 			}
   2134 		}
   2135 		// gl_LocalInvocationIndex
   2136 		for (GLuint index = 5 * kBufferSize; index < 6 * kBufferSize; ++index)
   2137 		{
   2138 			uvec3 coord = IndexTo3DCoord(index - 5 * kBufferSize, local_size.x() * num_groups.x(),
   2139 										 local_size.y() * num_groups.y());
   2140 			const GLuint expected = (coord.x() % local_size.x()) + (coord.y() % local_size.y()) * local_size.x() +
   2141 									(coord.z() % local_size.z()) * local_size.x() * local_size.y();
   2142 			if (!IsEqual(data[index], uvec4(expected)))
   2143 			{
   2144 				m_context.getTestContext().getLog()
   2145 					<< tcu::TestLog::Message << "gl_LocalInvocationIndex: Invalid data at index " << index << "."
   2146 					<< tcu::TestLog::EndMessage;
   2147 				return false;
   2148 			}
   2149 		}
   2150 		return true;
   2151 	}
   2152 
   2153 	virtual long Setup()
   2154 	{
   2155 		m_program		  = 0;
   2156 		m_storage_buffer  = 0;
   2157 		m_dispatch_buffer = 0;
   2158 		return NO_ERROR;
   2159 	}
   2160 
   2161 	virtual long Run()
   2162 	{
   2163 		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
   2164 			return ERROR;
   2165 		if (!RunIteration(uvec3(1, 1, 64), uvec3(1, 5, 2), true))
   2166 			return ERROR;
   2167 		if (!RunIteration(uvec3(1, 1, 4), uvec3(2, 2, 2), false))
   2168 			return ERROR;
   2169 		if (!RunIteration(uvec3(3, 2, 1), uvec3(1, 2, 3), true))
   2170 			return ERROR;
   2171 		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
   2172 			return ERROR;
   2173 		if (!RunIteration(uvec3(2, 4, 7), uvec3(2, 1, 4), true))
   2174 			return ERROR;
   2175 		return NO_ERROR;
   2176 	}
   2177 
   2178 	virtual long Cleanup()
   2179 	{
   2180 		glUseProgram(0);
   2181 		glDeleteProgram(m_program);
   2182 		glDeleteBuffers(1, &m_storage_buffer);
   2183 		glDeleteBuffers(1, &m_dispatch_buffer);
   2184 		return NO_ERROR;
   2185 	}
   2186 };
   2187 
   2188 class BasicMax : public ComputeShaderBase
   2189 {
   2190 
   2191 	virtual std::string Title()
   2192 	{
   2193 		return NL "CS max values";
   2194 	}
   2195 
   2196 	virtual std::string Purpose()
   2197 	{
   2198 		return NL "Verify (on the API and GLSL side) that all GL_MAX_COMPUTE_* values are not less than" NL
   2199 				  "required by the OpenGL specification.";
   2200 	}
   2201 
   2202 	virtual std::string Method()
   2203 	{
   2204 		return NL "1. Use all API commands to query all GL_MAX_COMPUTE_* values. Verify that they are correct." NL
   2205 				  "2. Verify all gl_MaxCompute* constants in the GLSL.";
   2206 	}
   2207 
   2208 	virtual std::string PassCriteria()
   2209 	{
   2210 		return NL "Everything works as expected.";
   2211 	}
   2212 
   2213 	GLuint m_program;
   2214 	GLuint m_buffer;
   2215 
   2216 	bool CheckIndexed(GLenum target, const GLint* min_values)
   2217 	{
   2218 		GLint	 i;
   2219 		GLint64   i64;
   2220 		GLfloat   f;
   2221 		GLdouble  d;
   2222 		GLboolean b;
   2223 
   2224 		for (GLuint c = 0; c < 3; c++)
   2225 		{
   2226 			glGetIntegeri_v(target, c, &i);
   2227 			if (i < min_values[c])
   2228 			{
   2229 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i << " should be at least "
   2230 													<< min_values[c] << "." << tcu::TestLog::EndMessage;
   2231 				return false;
   2232 			}
   2233 		}
   2234 		for (GLuint c = 0; c < 3; c++)
   2235 		{
   2236 			glGetInteger64i_v(target, c, &i64);
   2237 			if (i64 < static_cast<GLint64>(min_values[c]))
   2238 			{
   2239 				m_context.getTestContext().getLog()
   2240 					<< tcu::TestLog::Message << "Is " << static_cast<GLint>(i64) << " should be at least "
   2241 					<< min_values[c] << "." << tcu::TestLog::EndMessage;
   2242 				return false;
   2243 			}
   2244 		}
   2245 		for (GLuint c = 0; c < 3; c++)
   2246 		{
   2247 			glGetFloati_v(target, c, &f);
   2248 			if (f < static_cast<GLfloat>(min_values[c]))
   2249 			{
   2250 				m_context.getTestContext().getLog()
   2251 					<< tcu::TestLog::Message << "Is " << static_cast<GLint>(f) << " should be at least "
   2252 					<< min_values[c] << "." << tcu::TestLog::EndMessage;
   2253 				return false;
   2254 			}
   2255 		}
   2256 		for (GLuint c = 0; c < 3; c++)
   2257 		{
   2258 			glGetDoublei_v(target, c, &d);
   2259 			if (d < static_cast<GLdouble>(min_values[c]))
   2260 			{
   2261 				m_context.getTestContext().getLog()
   2262 					<< tcu::TestLog::Message << "Is " << static_cast<GLint>(d) << " should be at least "
   2263 					<< min_values[c] << "." << tcu::TestLog::EndMessage;
   2264 				return false;
   2265 			}
   2266 		}
   2267 		for (GLuint c = 0; c < 3; c++)
   2268 		{
   2269 			glGetBooleani_v(target, c, &b);
   2270 			if (b == GL_FALSE)
   2271 			{
   2272 				m_context.getTestContext().getLog()
   2273 					<< tcu::TestLog::Message << "Is GL_FALSE should be at least GL_TRUE." << tcu::TestLog::EndMessage;
   2274 				return false;
   2275 			}
   2276 		}
   2277 
   2278 		return true;
   2279 	}
   2280 
   2281 	bool Check(GLenum target, const GLint min_value)
   2282 	{
   2283 		GLint	 i;
   2284 		GLint64   i64;
   2285 		GLfloat   f;
   2286 		GLdouble  d;
   2287 		GLboolean b;
   2288 
   2289 		glGetIntegerv(target, &i);
   2290 		if (i < min_value)
   2291 		{
   2292 			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i << " should be at least "
   2293 												<< min_value << "." << tcu::TestLog::EndMessage;
   2294 			return false;
   2295 		}
   2296 		glGetInteger64v(target, &i64);
   2297 		if (static_cast<GLint>(i64) < min_value)
   2298 		{
   2299 			m_context.getTestContext().getLog()
   2300 				<< tcu::TestLog::Message << "Is " << static_cast<GLint>(i64) << " should be at least " << min_value
   2301 				<< "." << tcu::TestLog::EndMessage;
   2302 			return false;
   2303 		}
   2304 		glGetFloatv(target, &f);
   2305 		if (f < static_cast<GLfloat>(min_value))
   2306 		{
   2307 			m_context.getTestContext().getLog()
   2308 				<< tcu::TestLog::Message << "Is " << static_cast<GLint>(f) << " should be at least " << min_value << "."
   2309 				<< tcu::TestLog::EndMessage;
   2310 			return false;
   2311 		}
   2312 		glGetDoublev(target, &d);
   2313 		if (d < static_cast<GLdouble>(min_value))
   2314 		{
   2315 			m_context.getTestContext().getLog()
   2316 				<< tcu::TestLog::Message << "Is " << static_cast<GLint>(d) << " should be at least " << min_value << "."
   2317 				<< tcu::TestLog::EndMessage;
   2318 			return false;
   2319 		}
   2320 		glGetBooleanv(target, &b);
   2321 		if (b != (min_value ? GL_TRUE : GL_FALSE))
   2322 		{
   2323 			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << b << " should be "
   2324 												<< (min_value ? GL_TRUE : GL_FALSE) << "." << tcu::TestLog::EndMessage;
   2325 			return false;
   2326 		}
   2327 
   2328 		return true;
   2329 	}
   2330 
   2331 	virtual long Setup()
   2332 	{
   2333 		m_program = 0;
   2334 		m_buffer  = 0;
   2335 		return NO_ERROR;
   2336 	}
   2337 
   2338 	virtual long Run()
   2339 	{
   2340 		const GLint work_group_count[3] = { 65535, 65535, 65535 };
   2341 		if (!CheckIndexed(GL_MAX_COMPUTE_WORK_GROUP_COUNT, work_group_count))
   2342 			return ERROR;
   2343 
   2344 		const GLint work_group_size[3] = { 1024, 1024, 64 };
   2345 		if (!CheckIndexed(GL_MAX_COMPUTE_WORK_GROUP_SIZE, work_group_size))
   2346 			return ERROR;
   2347 
   2348 		if (!Check(GL_MAX_COMPUTE_UNIFORM_BLOCKS, 12))
   2349 			return ERROR;
   2350 		if (!Check(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, 16))
   2351 			return ERROR;
   2352 		if (!Check(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, 8))
   2353 			return ERROR;
   2354 		if (!Check(GL_MAX_COMPUTE_ATOMIC_COUNTERS, 8))
   2355 			return ERROR;
   2356 		if (!Check(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, 32768))
   2357 			return ERROR;
   2358 
   2359 		if (glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::core(4, 5)))
   2360 		{
   2361 			if (!Check(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, 1024))
   2362 				return ERROR;
   2363 		}
   2364 		else
   2365 		{
   2366 			if (!Check(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, 512))
   2367 				return ERROR;
   2368 		}
   2369 
   2370 		if (!Check(GL_MAX_COMPUTE_IMAGE_UNIFORMS, 8))
   2371 			return ERROR;
   2372 		if (!Check(GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS, 512))
   2373 			return ERROR;
   2374 
   2375 		const char* const glsl_cs =
   2376 			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  int g_output;" NL "};" NL
   2377 			   "uniform ivec3 MaxComputeWorkGroupCount;" NL "uniform ivec3 MaxComputeWorkGroupSize;" NL
   2378 			   "uniform int MaxComputeUniformComponents;" NL "uniform int MaxComputeTextureImageUnits;" NL
   2379 			   "uniform int MaxComputeImageUniforms;" NL "uniform int MaxComputeAtomicCounters;" NL
   2380 			   "uniform int MaxComputeAtomicCounterBuffers;" NL "void main() {" NL "  g_output = 1;" NL
   2381 			   "  if (MaxComputeWorkGroupCount != gl_MaxComputeWorkGroupCount) g_output = 0;" NL
   2382 			   "  if (MaxComputeWorkGroupSize != gl_MaxComputeWorkGroupSize) g_output = 0;" NL
   2383 			   "  if (MaxComputeUniformComponents != gl_MaxComputeUniformComponents) g_output = 0;" NL
   2384 			   "  if (MaxComputeTextureImageUnits != gl_MaxComputeTextureImageUnits) g_output = 0;" NL
   2385 			   "  if (MaxComputeImageUniforms != gl_MaxComputeImageUniforms) g_output = 0;" NL
   2386 			   "  if (MaxComputeAtomicCounters != gl_MaxComputeAtomicCounters) g_output = 0;" NL
   2387 			   "  if (MaxComputeAtomicCounterBuffers != gl_MaxComputeAtomicCounterBuffers) g_output = 0;" NL "}";
   2388 		m_program = CreateComputeProgram(glsl_cs);
   2389 		glLinkProgram(m_program);
   2390 		if (!CheckProgram(m_program))
   2391 			return ERROR;
   2392 		glUseProgram(m_program);
   2393 
   2394 		GLint p[3];
   2395 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &p[0]);
   2396 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &p[1]);
   2397 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &p[2]);
   2398 		glUniform3i(glGetUniformLocation(m_program, "MaxComputeWorkGroupCount"), p[0], p[1], p[2]);
   2399 
   2400 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &p[0]);
   2401 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &p[1]);
   2402 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &p[2]);
   2403 		glUniform3iv(glGetUniformLocation(m_program, "MaxComputeWorkGroupSize"), 1, p);
   2404 
   2405 		glGetIntegerv(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, p);
   2406 		glUniform1i(glGetUniformLocation(m_program, "MaxComputeUniformComponents"), p[0]);
   2407 
   2408 		glGetIntegerv(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, p);
   2409 		glUniform1iv(glGetUniformLocation(m_program, "MaxComputeTextureImageUnits"), 1, p);
   2410 
   2411 		glGetIntegerv(GL_MAX_COMPUTE_IMAGE_UNIFORMS, p);
   2412 		glUniform1i(glGetUniformLocation(m_program, "MaxComputeImageUniforms"), p[0]);
   2413 
   2414 		glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTERS, p);
   2415 		glUniform1i(glGetUniformLocation(m_program, "MaxComputeAtomicCounters"), p[0]);
   2416 
   2417 		glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, p);
   2418 		glUniform1i(glGetUniformLocation(m_program, "MaxComputeAtomicCounterBuffers"), p[0]);
   2419 
   2420 		GLint data = 0xffff;
   2421 		glGenBuffers(1, &m_buffer);
   2422 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
   2423 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint), &data, GL_DYNAMIC_DRAW);
   2424 
   2425 		glDispatchCompute(1, 1, 1);
   2426 
   2427 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   2428 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint), &data);
   2429 
   2430 		return data == 1 ? NO_ERROR : ERROR;
   2431 	}
   2432 	virtual long Cleanup()
   2433 	{
   2434 		glUseProgram(0);
   2435 		glDeleteProgram(m_program);
   2436 		glDeleteBuffers(1, &m_buffer);
   2437 		return NO_ERROR;
   2438 	}
   2439 };
   2440 
   2441 class BasicBuildMonolithic : public ComputeShaderBase
   2442 {
   2443 
   2444 	virtual std::string Title()
   2445 	{
   2446 		return "Building CS monolithic program";
   2447 	}
   2448 
   2449 	virtual std::string Purpose()
   2450 	{
   2451 		return NL "1. Verify that building monolithic CS program works as expected." NL
   2452 				  "2. Verify that program consisting from 3 compilation units links as expected." NL
   2453 				  "3. Verify that CS consisting from 2 strings compiles as expected.";
   2454 	}
   2455 
   2456 	virtual std::string Method()
   2457 	{
   2458 		return NL "1. Create, compile and link CS using CreateShader, CompileShader and LinkProgram commands." NL
   2459 				  "2. Dispatch and verify CS program.";
   2460 	}
   2461 
   2462 	virtual std::string PassCriteria()
   2463 	{
   2464 		return "Everything works as expected.";
   2465 	}
   2466 
   2467 	virtual long Run()
   2468 	{
   2469 		const char* const cs1[2] = { "#version 430 core",
   2470 
   2471 									 NL "layout(local_size_x = 1) in;" NL "void Run();" NL "void main() {" NL
   2472 										"  Run();" NL "}" };
   2473 
   2474 		const char* const cs2 =
   2475 			"#version 430 core" NL "layout(binding = 0, std430) buffer Output {" NL "  vec4 g_output;" NL "};" NL
   2476 			"vec4 CalculateOutput();" NL "void Run() {" NL "  g_output = CalculateOutput();" NL "}";
   2477 
   2478 		const char* const cs3 =
   2479 			"#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(binding = 0, std430) buffer Output {" NL
   2480 			"  vec4 g_output;" NL "};" NL "vec4 CalculateOutput() {" NL "  g_output = vec4(0);" NL
   2481 			"  return vec4(1, 2, 3, 4);" NL "}";
   2482 
   2483 		const GLuint sh1 = glCreateShader(GL_COMPUTE_SHADER);
   2484 
   2485 		GLint type;
   2486 		glGetShaderiv(sh1, GL_SHADER_TYPE, &type);
   2487 		if (static_cast<GLenum>(type) != GL_COMPUTE_SHADER)
   2488 		{
   2489 			m_context.getTestContext().getLog()
   2490 				<< tcu::TestLog::Message << "SHADER_TYPE should be COMPUTE_SHADER." << tcu::TestLog::EndMessage;
   2491 			glDeleteShader(sh1);
   2492 			return false;
   2493 		}
   2494 
   2495 		glShaderSource(sh1, 2, cs1, NULL);
   2496 		glCompileShader(sh1);
   2497 
   2498 		const GLuint sh2 = glCreateShader(GL_COMPUTE_SHADER);
   2499 		glShaderSource(sh2, 1, &cs2, NULL);
   2500 		glCompileShader(sh2);
   2501 
   2502 		const GLuint sh3 = glCreateShader(GL_COMPUTE_SHADER);
   2503 		glShaderSource(sh3, 1, &cs3, NULL);
   2504 		glCompileShader(sh3);
   2505 
   2506 		const GLuint p = glCreateProgram();
   2507 		glAttachShader(p, sh1);
   2508 		glAttachShader(p, sh2);
   2509 		glAttachShader(p, sh3);
   2510 		glLinkProgram(p);
   2511 
   2512 		glDeleteShader(sh1);
   2513 		glDeleteShader(sh2);
   2514 		glDeleteShader(sh3);
   2515 
   2516 		bool res = CheckProgram(p);
   2517 
   2518 		GLuint buffer;
   2519 		glGenBuffers(1, &buffer);
   2520 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffer);
   2521 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), &vec4(0.0f)[0], GL_DYNAMIC_DRAW);
   2522 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   2523 
   2524 		glUseProgram(p);
   2525 		glDispatchCompute(1, 1, 1);
   2526 
   2527 		vec4 data;
   2528 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
   2529 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   2530 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &data[0]);
   2531 		if (!IsEqual(data, vec4(1.0f, 2.0f, 3.0f, 4.0f)))
   2532 		{
   2533 			m_context.getTestContext().getLog()
   2534 				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
   2535 			res = false;
   2536 		}
   2537 
   2538 		glDeleteBuffers(1, &buffer);
   2539 		glUseProgram(0);
   2540 		glDeleteProgram(p);
   2541 
   2542 		return res == true ? NO_ERROR : ERROR;
   2543 	}
   2544 };
   2545 
   2546 class BasicBuildSeparable : public ComputeShaderBase
   2547 {
   2548 
   2549 	virtual std::string Title()
   2550 	{
   2551 		return "Building CS separable program";
   2552 	}
   2553 
   2554 	virtual std::string Purpose()
   2555 	{
   2556 		return NL "1. Verify that building separable CS program works as expected." NL
   2557 				  "2. Verify that program consisting from 4 strings works as expected.";
   2558 	}
   2559 
   2560 	virtual std::string Method()
   2561 	{
   2562 		return NL "1. Create, compile and link CS using CreateShaderProgramv command." NL
   2563 				  "2. Dispatch and verify CS program.";
   2564 	}
   2565 
   2566 	virtual std::string PassCriteria()
   2567 	{
   2568 		return "Everything works as expected.";
   2569 	}
   2570 
   2571 	virtual long Run()
   2572 	{
   2573 		const char* const cs[4] = {
   2574 			"#version 430 core",
   2575 
   2576 			NL "layout(local_size_x = 1) in;" NL "void Run();" NL "void main() {" NL "  Run();" NL "}",
   2577 			NL "layout(binding = 0, std430) buffer Output {" NL "  vec4 g_output;" NL "};" NL
   2578 			   "vec4 CalculateOutput();" NL "void Run() {" NL "  g_output = CalculateOutput();" NL "}",
   2579 			NL "vec4 CalculateOutput() {" NL "  g_output = vec4(0);" NL "  return vec4(1, 2, 3, 4);" NL "}"
   2580 		};
   2581 
   2582 		const GLuint p   = glCreateShaderProgramv(GL_COMPUTE_SHADER, 4, cs);
   2583 		bool		 res = CheckProgram(p);
   2584 
   2585 		GLuint buffer;
   2586 		glGenBuffers(1, &buffer);
   2587 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffer);
   2588 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), &vec4(0.0f)[0], GL_DYNAMIC_DRAW);
   2589 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   2590 
   2591 		glUseProgram(p);
   2592 		glDispatchCompute(1, 1, 1);
   2593 
   2594 		vec4 data;
   2595 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
   2596 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   2597 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &data[0]);
   2598 		if (!IsEqual(data, vec4(1.0f, 2.0f, 3.0f, 4.0f)))
   2599 		{
   2600 			m_context.getTestContext().getLog()
   2601 				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
   2602 			res = false;
   2603 		}
   2604 
   2605 		glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &vec4(0.0f)[0]);
   2606 
   2607 		GLuint pipeline;
   2608 		glGenProgramPipelines(1, &pipeline);
   2609 		glUseProgramStages(pipeline, GL_COMPUTE_SHADER_BIT, p);
   2610 
   2611 		glUseProgram(0);
   2612 		glBindProgramPipeline(pipeline);
   2613 		glDispatchCompute(1, 1, 1);
   2614 
   2615 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   2616 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &data[0]);
   2617 		if (!IsEqual(data, vec4(1.0f, 2.0f, 3.0f, 4.0f)))
   2618 		{
   2619 			m_context.getTestContext().getLog()
   2620 				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
   2621 			res = false;
   2622 		}
   2623 
   2624 		glDeleteProgramPipelines(1, &pipeline);
   2625 		glDeleteBuffers(1, &buffer);
   2626 		glDeleteProgram(p);
   2627 
   2628 		return res == true ? NO_ERROR : ERROR;
   2629 	}
   2630 };
   2631 
   2632 class BasicSharedSimple : public ComputeShaderBase
   2633 {
   2634 	virtual std::string Title()
   2635 	{
   2636 		return "Shared Memory - simple usage";
   2637 	}
   2638 
   2639 	virtual std::string Purpose()
   2640 	{
   2641 		return NL "1. Verify that shared array of uints works as expected." NL
   2642 				  "2. Verify that shared memory written by one invocation is observable by other invocations" NL
   2643 				  "    when groupMemoryBarrier() and barrier() built-in functions are used.";
   2644 	}
   2645 
   2646 	virtual std::string Method()
   2647 	{
   2648 		return NL "1. Create and dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
   2649 				  "2. Verify results written by CS to SSBO." NL
   2650 				  "3. Repeat for several different number of work groups.";
   2651 	}
   2652 
   2653 	virtual std::string PassCriteria()
   2654 	{
   2655 		return "Everything works as expected.";
   2656 	}
   2657 
   2658 	GLuint m_program;
   2659 	GLuint m_storage_buffer;
   2660 	GLuint m_dispatch_buffer;
   2661 
   2662 	bool RunIteration(const GLuint num_groups, bool dispatch_indirect)
   2663 	{
   2664 		const GLuint kBufferSize = 256 * num_groups;
   2665 
   2666 		std::vector<GLuint> data(kBufferSize, 0xffff);
   2667 		if (m_storage_buffer == 0)
   2668 			glGenBuffers(1, &m_storage_buffer);
   2669 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   2670 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
   2671 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   2672 
   2673 		glUseProgram(m_program);
   2674 		if (dispatch_indirect)
   2675 		{
   2676 			const GLuint groups[3] = { num_groups, 1, 1 };
   2677 			if (m_dispatch_buffer == 0)
   2678 				glGenBuffers(1, &m_dispatch_buffer);
   2679 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   2680 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(groups), groups, GL_STATIC_DRAW);
   2681 			glDispatchComputeIndirect(0);
   2682 		}
   2683 		else
   2684 		{
   2685 			glDispatchCompute(num_groups, 1, 1);
   2686 		}
   2687 
   2688 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   2689 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   2690 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, &data[0]);
   2691 		for (GLuint i = 0; i < kBufferSize; ++i)
   2692 		{
   2693 			if (data[i] != 1)
   2694 			{
   2695 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
   2696 													<< data[i] << " should be 1." << tcu::TestLog::EndMessage;
   2697 				return false;
   2698 			}
   2699 		}
   2700 		return true;
   2701 	}
   2702 
   2703 	virtual long Setup()
   2704 	{
   2705 		m_program		  = 0;
   2706 		m_storage_buffer  = 0;
   2707 		m_dispatch_buffer = 0;
   2708 		return NO_ERROR;
   2709 	}
   2710 
   2711 	virtual long Run()
   2712 	{
   2713 		const char* const glsl_cs =
   2714 			NL "layout(local_size_x = 256) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
   2715 			   "shared uint g_shared_data[256];" NL "void main() {" NL
   2716 			   "  g_shared_data[gl_LocalInvocationID.x] = gl_LocalInvocationIndex;" NL
   2717 			   "  groupMemoryBarrier();" // flush memory stores
   2718 			NL "  barrier();"			 // wait for all stores to finish
   2719 			NL "  g_output[gl_GlobalInvocationID.x] = 1;" NL "  if (gl_LocalInvocationIndex < 255) {" NL
   2720 			   "    const uint res = g_shared_data[gl_LocalInvocationID.x + "
   2721 			   "1];" // load data from shared memory filled by other thread
   2722 			NL "    if (res != (gl_LocalInvocationIndex + 1)) {" NL "      g_output[gl_GlobalInvocationID.x] = 0;" NL
   2723 			   "    }" NL "  }" NL "}";
   2724 		m_program = CreateComputeProgram(glsl_cs);
   2725 		glLinkProgram(m_program);
   2726 		if (!CheckProgram(m_program))
   2727 			return ERROR;
   2728 
   2729 		if (!RunIteration(1, false))
   2730 			return ERROR;
   2731 		if (!RunIteration(8, true))
   2732 			return ERROR;
   2733 		if (!RunIteration(13, false))
   2734 			return ERROR;
   2735 		if (!RunIteration(7, true))
   2736 			return ERROR;
   2737 		return NO_ERROR;
   2738 	}
   2739 	virtual long Cleanup()
   2740 	{
   2741 		glUseProgram(0);
   2742 		glDeleteProgram(m_program);
   2743 		glDeleteBuffers(1, &m_storage_buffer);
   2744 		glDeleteBuffers(1, &m_dispatch_buffer);
   2745 		return NO_ERROR;
   2746 	}
   2747 };
   2748 
   2749 class BasicSharedStruct : public ComputeShaderBase
   2750 {
   2751 	virtual std::string Title()
   2752 	{
   2753 		return "Shared Memory - arrays and structers";
   2754 	}
   2755 
   2756 	virtual std::string Purpose()
   2757 	{
   2758 		return NL "1. Verify that vectors, matrices, structers and arrays of those can be used" NL
   2759 				  "    as a shared memory." NL
   2760 				  "2. Verify that shared memory can be indexed with constant values, built-in" NL
   2761 				  "    variables and dynamic expressions." NL
   2762 				  "3. Verify that memoryBarrierAtomicCounter(), memoryBarrierImage(), memoryBarrier()," NL
   2763 				  "     memoryBarrierBuffer() and memoryBarrierShared() built-in functions are accepted" NL
   2764 				  "     by the GLSL compiler.";
   2765 	}
   2766 
   2767 	virtual std::string Method()
   2768 	{
   2769 		return NL "1. Create and dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
   2770 				  "2. Verify results written by CS to SSBO.";
   2771 	}
   2772 
   2773 	virtual std::string PassCriteria()
   2774 	{
   2775 		return "Everything works as expected.";
   2776 	}
   2777 
   2778 	GLuint m_program;
   2779 	GLuint m_storage_buffer;
   2780 	GLuint m_dispatch_buffer;
   2781 
   2782 	bool RunIteration(bool dispatch_indirect)
   2783 	{
   2784 		const GLuint kBufferSize = 256;
   2785 
   2786 		std::vector<vec4> data(kBufferSize);
   2787 		if (m_storage_buffer == 0)
   2788 			glGenBuffers(1, &m_storage_buffer);
   2789 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   2790 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
   2791 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   2792 
   2793 		glUseProgram(m_program);
   2794 		if (dispatch_indirect)
   2795 		{
   2796 			const GLuint groups[3] = { 1, 1, 1 };
   2797 			if (m_dispatch_buffer == 0)
   2798 				glGenBuffers(1, &m_dispatch_buffer);
   2799 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   2800 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(groups), groups, GL_STATIC_DRAW);
   2801 			glDispatchComputeIndirect(0);
   2802 		}
   2803 		else
   2804 		{
   2805 			glDispatchCompute(1, 1, 1);
   2806 		}
   2807 
   2808 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   2809 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   2810 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * kBufferSize, &data[0]);
   2811 		for (GLuint i = 0; i < kBufferSize; ++i)
   2812 		{
   2813 			if (!IsEqual(data[i], vec4(static_cast<float>(i))))
   2814 			{
   2815 				m_context.getTestContext().getLog()
   2816 					<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
   2817 				return false;
   2818 			}
   2819 		}
   2820 		return true;
   2821 	}
   2822 
   2823 	virtual long Setup()
   2824 	{
   2825 		m_program		  = 0;
   2826 		m_storage_buffer  = 0;
   2827 		m_dispatch_buffer = 0;
   2828 		return NO_ERROR;
   2829 	}
   2830 
   2831 	virtual long Run()
   2832 	{
   2833 		const char* const glsl_cs = NL
   2834 			"layout(local_size_x = 128) in;" NL "layout(std430) buffer Output {" NL "  vec4 g_output[256];" NL "};" NL
   2835 			"struct SubData {" NL "  mat2x4 data;" NL "};" NL "struct Data {" NL "  uint index;" NL "  vec3 data0;" NL
   2836 			"  SubData data1;" NL "};" NL "shared Data g_shared_data[256];" NL "shared int g_shared_buf[2];" NL
   2837 			"void main() {" NL "  if (gl_LocalInvocationID.x == 0) {" NL "    g_shared_buf[1] = 1;" NL
   2838 			"    g_shared_buf[1 + gl_LocalInvocationID.x] = 0;" NL "    g_shared_buf[0] = 128;" NL
   2839 			"    g_output[0] = vec4(g_shared_buf[1]);" NL "    g_output[128] = vec4(g_shared_buf[0]);" NL
   2840 			"    memoryBarrierBuffer();" // note: this call is not needed here, just check if compiler accepts it
   2841 			NL "  } else {" NL "    const uint index = gl_LocalInvocationIndex;" NL
   2842 			"    g_shared_data[index].index = index;" NL "    g_shared_data[index + 128].index = index + 128;" NL
   2843 			"    g_shared_data[index].data1.data = mat2x4(0.0);" NL
   2844 			"    g_shared_data[index + 128].data1.data = mat2x4(0.0);" NL
   2845 			"    g_output[index] = vec4(g_shared_data[index].index);" // load data from shared memory
   2846 			NL "    g_output[index + 128] = vec4(g_shared_data[index + 128].index);" NL
   2847 			"    memoryBarrierShared();" // note: this call is not needed here, just check if compiler accepts it
   2848 			NL "  }" NL "  memoryBarrierAtomicCounter();" NL "  memoryBarrierImage();" NL
   2849 			"  memoryBarrier();" // note: these calls are not needed here, just check if compiler accepts them
   2850 			NL "}";
   2851 		m_program = CreateComputeProgram(glsl_cs);
   2852 		glLinkProgram(m_program);
   2853 		if (!CheckProgram(m_program))
   2854 			return ERROR;
   2855 
   2856 		if (!RunIteration(false))
   2857 			return ERROR;
   2858 		if (!RunIteration(true))
   2859 			return ERROR;
   2860 		return NO_ERROR;
   2861 	}
   2862 
   2863 	virtual long Cleanup()
   2864 	{
   2865 		glUseProgram(0);
   2866 		glDeleteProgram(m_program);
   2867 		glDeleteBuffers(1, &m_storage_buffer);
   2868 		glDeleteBuffers(1, &m_dispatch_buffer);
   2869 		return NO_ERROR;
   2870 	}
   2871 };
   2872 
   2873 class BasicDispatchIndirect : public ComputeShaderBase
   2874 {
   2875 	virtual std::string Title()
   2876 	{
   2877 		return NL "DispatchComputeIndirect command";
   2878 	}
   2879 
   2880 	virtual std::string Purpose()
   2881 	{
   2882 		return NL
   2883 			"1. Verify that DispatchComputeIndirect command works as described in the OpenGL specification." NL
   2884 			"2. Verify that <offset> parameter is correctly applied." NL
   2885 			"3. Verify that updating dispatch buffer with different methods (BufferData, BufferSubData, MapBuffer)" NL
   2886 			"    just before DispatchComputeIndirect call works as expected." NL
   2887 			"4. Verify that GL_DISPATCH_INDIRECT_BUFFER_BINDING binding point is set correctly.";
   2888 	}
   2889 
   2890 	virtual std::string Method()
   2891 	{
   2892 		return NL
   2893 			"1. Create CS and dispatch indirect buffer." NL "2. Dispatch CS with DispatchComputeIndirect command." NL
   2894 			"3. Update dispatch indirect buffer." NL
   2895 			"4. Repeat several times updating dispatch buffer with different methods and changing <offset> parameter.";
   2896 	}
   2897 
   2898 	virtual std::string PassCriteria()
   2899 	{
   2900 		return NL "Everything works as expected.";
   2901 	}
   2902 
   2903 	GLuint m_program;
   2904 	GLuint m_storage_buffer;
   2905 	GLuint m_dispatch_buffer[2];
   2906 
   2907 	bool RunIteration(GLintptr offset, GLuint buffer_size)
   2908 	{
   2909 		std::vector<GLuint> data(buffer_size);
   2910 		if (m_storage_buffer == 0)
   2911 			glGenBuffers(1, &m_storage_buffer);
   2912 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   2913 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * buffer_size, &data[0], GL_DYNAMIC_DRAW);
   2914 
   2915 		glDispatchComputeIndirect(offset);
   2916 
   2917 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   2918 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * buffer_size, &data[0]);
   2919 		for (GLuint i = 0; i < buffer_size; ++i)
   2920 		{
   2921 			if (data[i] != i)
   2922 			{
   2923 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
   2924 													<< data[i] << " should be " << i << "." << tcu::TestLog::EndMessage;
   2925 				return false;
   2926 			}
   2927 		}
   2928 		return true;
   2929 	}
   2930 
   2931 	bool CheckBinding(GLuint expected)
   2932 	{
   2933 		GLint	 i;
   2934 		GLint64   i64;
   2935 		GLfloat   f;
   2936 		GLdouble  d;
   2937 		GLboolean b;
   2938 
   2939 		glGetIntegerv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &i);
   2940 		if (static_cast<GLuint>(i) != expected)
   2941 		{
   2942 			return false;
   2943 		}
   2944 		glGetInteger64v(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &i64);
   2945 		if (static_cast<GLuint>(i64) != expected)
   2946 		{
   2947 			return false;
   2948 		}
   2949 		glGetFloatv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &f);
   2950 		if (static_cast<GLuint>(f) != expected)
   2951 		{
   2952 			return false;
   2953 		}
   2954 		glGetDoublev(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &d);
   2955 		if (static_cast<GLuint>(d) != expected)
   2956 		{
   2957 			return false;
   2958 		}
   2959 		glGetBooleanv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &b);
   2960 		if (b != (expected != 0 ? GL_TRUE : GL_FALSE))
   2961 		{
   2962 			return false;
   2963 		}
   2964 
   2965 		return true;
   2966 	}
   2967 
   2968 	virtual long Setup()
   2969 	{
   2970 		m_program		 = 0;
   2971 		m_storage_buffer = 0;
   2972 		memset(m_dispatch_buffer, 0, sizeof(m_dispatch_buffer));
   2973 		return NO_ERROR;
   2974 	}
   2975 
   2976 	virtual long Run()
   2977 	{
   2978 		const char* const glsl_cs =
   2979 			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
   2980 			   "uniform uvec3 g_global_size;" NL "void main() {" NL
   2981 			   "  const uint global_index = gl_GlobalInvocationID.x +" NL
   2982 			   "                            gl_GlobalInvocationID.y * g_global_size.x +" NL
   2983 			   "                            gl_GlobalInvocationID.z * g_global_size.x * g_global_size.y;" NL
   2984 			   "  if (gl_NumWorkGroups != g_global_size) {" NL "    g_output[global_index] = 0xffff;" NL
   2985 			   "    return;" NL "  }" NL "  g_output[global_index] = global_index;" NL "}";
   2986 		m_program = CreateComputeProgram(glsl_cs);
   2987 		glLinkProgram(m_program);
   2988 		if (!CheckProgram(m_program))
   2989 			return ERROR;
   2990 
   2991 		if (!CheckBinding(0))
   2992 			return ERROR;
   2993 
   2994 		glGenBuffers(2, m_dispatch_buffer);
   2995 
   2996 		const GLuint data[]  = { 1, 2, 3, 4, 5, 6, 7, 8 };
   2997 		const GLuint data2[] = { 3, 1, 4, 4 };
   2998 
   2999 		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[0]);
   3000 		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data), data, GL_STREAM_DRAW);
   3001 		if (!CheckBinding(m_dispatch_buffer[0]))
   3002 			return ERROR;
   3003 
   3004 		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[1]);
   3005 		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data2), data2, GL_STREAM_READ);
   3006 		if (!CheckBinding(m_dispatch_buffer[1]))
   3007 			return ERROR;
   3008 
   3009 		glUseProgram(m_program);
   3010 		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[0]);
   3011 
   3012 		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 2, 3);
   3013 		if (!RunIteration(0, 6))
   3014 			return ERROR;
   3015 
   3016 		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 2, 3, 4);
   3017 		if (!RunIteration(4, 24))
   3018 			return ERROR;
   3019 
   3020 		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 4, 5, 6);
   3021 		if (!RunIteration(12, 120))
   3022 			return ERROR;
   3023 
   3024 		glBufferSubData(GL_DISPATCH_INDIRECT_BUFFER, 20, 12, data);
   3025 		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 2, 3);
   3026 		if (!RunIteration(20, 6))
   3027 			return ERROR;
   3028 
   3029 		GLuint* ptr = static_cast<GLuint*>(glMapBuffer(GL_DISPATCH_INDIRECT_BUFFER, GL_WRITE_ONLY));
   3030 		*ptr++		= 4;
   3031 		*ptr++		= 4;
   3032 		*ptr++		= 4;
   3033 		glUnmapBuffer(GL_DISPATCH_INDIRECT_BUFFER);
   3034 
   3035 		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 4, 4, 4);
   3036 		if (!RunIteration(0, 64))
   3037 			return ERROR;
   3038 
   3039 		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[1]);
   3040 
   3041 		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 4, 4);
   3042 		if (!RunIteration(4, 16))
   3043 			return ERROR;
   3044 
   3045 		glDeleteBuffers(2, m_dispatch_buffer);
   3046 		memset(m_dispatch_buffer, 0, sizeof(m_dispatch_buffer));
   3047 
   3048 		if (!CheckBinding(0))
   3049 			return ERROR;
   3050 
   3051 		return NO_ERROR;
   3052 	}
   3053 	virtual long Cleanup()
   3054 	{
   3055 		glUseProgram(0);
   3056 		glDeleteProgram(m_program);
   3057 		glDeleteBuffers(1, &m_storage_buffer);
   3058 		glDeleteBuffers(2, m_dispatch_buffer);
   3059 		return NO_ERROR;
   3060 	}
   3061 };
   3062 
   3063 class BasicSSOComputePipeline : public ComputeShaderBase
   3064 {
   3065 	virtual std::string Title()
   3066 	{
   3067 		return NL "Separable CS Programs - Compute and non-compute stages (1)";
   3068 	}
   3069 	virtual std::string Purpose()
   3070 	{
   3071 		return NL "1. Verify that compute and non-compute stages can be attached to one pipeline object." NL
   3072 				  "2. Verify that DrawArrays and ComputeDispatch commands works as expected in this case.";
   3073 	}
   3074 	virtual std::string Method()
   3075 	{
   3076 		return NL "1. Create VS, FS and CS. Attach all created stages to one pipeline object." NL
   3077 				  "2. Bind pipeline object." NL "3. Invoke compute stage with DispatchCompute commmand." NL
   3078 				  "4. Issue MemoryBarrier command." NL
   3079 				  "5. Issue DrawArrays command which uses data written by the compute stage." NL "6. Verify result.";
   3080 	}
   3081 	virtual std::string PassCriteria()
   3082 	{
   3083 		return NL "Everything works as expected.";
   3084 	}
   3085 
   3086 	GLuint m_vsp, m_fsp, m_csp;
   3087 	GLuint m_storage_buffer;
   3088 	GLuint m_vertex_array;
   3089 	GLuint m_pipeline;
   3090 
   3091 	virtual long Setup()
   3092 	{
   3093 		m_vsp = m_fsp = m_csp = 0;
   3094 		m_storage_buffer	  = 0;
   3095 		m_vertex_array		  = 0;
   3096 		m_pipeline			  = 0;
   3097 		return NO_ERROR;
   3098 	}
   3099 	virtual long Run()
   3100 	{
   3101 		const char* const glsl_cs =
   3102 			NL "layout(local_size_x = 4) in;" NL "layout(std430) buffer Output {" NL "  vec4 g_output[4];" NL "};" NL
   3103 			   "void main() {" NL "  const vec2 quad[4] = { vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1) };" NL
   3104 			   "  g_output[gl_GlobalInvocationID.x] = vec4(quad[gl_GlobalInvocationID.x], 0, 1);" NL "}";
   3105 
   3106 		m_csp = CreateComputeProgram(glsl_cs);
   3107 		glProgramParameteri(m_csp, GL_PROGRAM_SEPARABLE, GL_TRUE);
   3108 		glLinkProgram(m_csp);
   3109 		if (!CheckProgram(m_csp))
   3110 			return ERROR;
   3111 
   3112 		const char* const glsl_vs =
   3113 			NL "layout(location = 0) in vec4 i_position;" NL "out gl_PerVertex {" NL "  vec4 gl_Position;" NL "};" NL
   3114 			   "void main() {" NL "  gl_Position = i_position;" NL "}";
   3115 		m_vsp = BuildShaderProgram(GL_VERTEX_SHADER, glsl_vs);
   3116 		if (!CheckProgram(m_vsp))
   3117 			return ERROR;
   3118 
   3119 		const char* const glsl_fs =
   3120 			NL "layout(location = 0) out vec4 o_color;" NL "void main() {" NL "  o_color = vec4(0, 1, 0, 1);" NL "}";
   3121 		m_fsp = BuildShaderProgram(GL_FRAGMENT_SHADER, glsl_fs);
   3122 		if (!CheckProgram(m_fsp))
   3123 			return ERROR;
   3124 
   3125 		glGenProgramPipelines(1, &m_pipeline);
   3126 		glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, m_vsp);
   3127 		glUseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, m_fsp);
   3128 		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_csp);
   3129 
   3130 		glGenBuffers(1, &m_storage_buffer);
   3131 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   3132 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * 4, NULL, GL_DYNAMIC_DRAW);
   3133 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   3134 
   3135 		glGenVertexArrays(1, &m_vertex_array);
   3136 		glBindVertexArray(m_vertex_array);
   3137 		glBindBuffer(GL_ARRAY_BUFFER, m_storage_buffer);
   3138 		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, 0);
   3139 		glBindBuffer(GL_ARRAY_BUFFER, 0);
   3140 		glEnableVertexAttribArray(0);
   3141 		glBindVertexArray(0);
   3142 
   3143 		glBindProgramPipeline(m_pipeline);
   3144 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   3145 		glDispatchCompute(1, 1, 1);
   3146 
   3147 		glClear(GL_COLOR_BUFFER_BIT);
   3148 		glBindVertexArray(m_vertex_array);
   3149 		glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
   3150 		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
   3151 
   3152 		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
   3153 			return ERROR;
   3154 		return NO_ERROR;
   3155 	}
   3156 
   3157 	virtual long Cleanup()
   3158 	{
   3159 		glDeleteProgram(m_vsp);
   3160 		glDeleteProgram(m_fsp);
   3161 		glDeleteProgram(m_csp);
   3162 		glDeleteBuffers(1, &m_storage_buffer);
   3163 		glDeleteVertexArrays(1, &m_vertex_array);
   3164 		glDeleteProgramPipelines(1, &m_pipeline);
   3165 		return NO_ERROR;
   3166 	}
   3167 };
   3168 
   3169 class BasicSSOCase2 : public ComputeShaderBase
   3170 {
   3171 	virtual std::string Title()
   3172 	{
   3173 		return NL "Separable CS Programs - Compute and non-compute stages (2)";
   3174 	}
   3175 	virtual std::string Purpose()
   3176 	{
   3177 		return NL "1. Verify that data computed by the compute stage is visible to non-compute stage after "
   3178 				  "MemoryBarrier command." NL "2. Verify that ProgramParameteri(program, GL_PROGRAM_SEPARABLE, "
   3179 				  "GL_TRUE) command works correctly for CS." NL
   3180 				  "3. Verify that gl_WorkGroupSize built-in variable is a contant and can be used as an array size.";
   3181 	}
   3182 	virtual std::string Method()
   3183 	{
   3184 		return NL "1. Create VS, FS and CS. Attach all created stages to one pipeline object." NL
   3185 				  "2. Bind pipeline object." NL "3. Invoke compute stage with DispatchCompute commmand." NL
   3186 				  "4. Issue MemoryBarrier command." NL
   3187 				  "5. Issue DrawArrays command which uses data written to the buffer object by the compute stage." NL
   3188 				  "6. Verify result.";
   3189 	}
   3190 	virtual std::string PassCriteria()
   3191 	{
   3192 		return NL "Everything works as expected.";
   3193 	}
   3194 
   3195 	GLuint m_program_ab;
   3196 	GLuint m_program_c;
   3197 	GLuint m_pipeline;
   3198 	GLuint m_storage_buffer;
   3199 	GLuint m_vao;
   3200 
   3201 	virtual long Setup()
   3202 	{
   3203 		m_program_ab	 = 0;
   3204 		m_program_c		 = 0;
   3205 		m_pipeline		 = 0;
   3206 		m_storage_buffer = 0;
   3207 		m_vao			 = 0;
   3208 		return NO_ERROR;
   3209 	}
   3210 	virtual long Run()
   3211 	{
   3212 		GLint res;
   3213 		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
   3214 		if (res <= 0)
   3215 		{
   3216 			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
   3217 			return NO_ERROR;
   3218 		}
   3219 
   3220 		const char* const glsl_a =
   3221 			"#version 430 core" NL "layout(binding = 1, std430) buffer Input {" NL "  vec2 g_input[4];" NL "};" NL
   3222 			"out StageData {" NL "  vec3 color;" NL "} g_vs_out;" NL "out gl_PerVertex {" NL "  vec4 gl_Position;" NL
   3223 			"};" NL "void main() {" NL "  gl_Position = vec4(g_input[gl_VertexID], 0, 1);" NL
   3224 			"  g_vs_out.color = vec3(0, 1, 0);" NL "}";
   3225 
   3226 		const char* const glsl_b =
   3227 			"#version 430 core" NL "in StageData {" NL "  vec3 color;" NL "} g_fs_in;" NL
   3228 			"layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(g_fs_in.color, 1);" NL "}";
   3229 
   3230 		const char* const glsl_c =
   3231 			"#version 430 core" NL "layout(local_size_x = 4) in;" NL "layout(binding = 1, std430) buffer Output {" NL
   3232 			"  vec2 g_output[gl_WorkGroupSize.x];" NL "};" NL "void main() {" NL
   3233 			"  if (gl_GlobalInvocationID.x == 0) {" NL "    g_output[0] = vec2(-0.8, -0.8);" NL
   3234 			"  } else if (gl_GlobalInvocationID.x == 1) {" NL "    g_output[1] = vec2(0.8, -0.8);" NL
   3235 			"  } else if (gl_GlobalInvocationID.x == 2) {" NL "    g_output[2] = vec2(-0.8, 0.8);" NL
   3236 			"  } else if (gl_GlobalInvocationID.x == 3) {" NL "    g_output[3] = vec2(0.8, 0.8);" NL "  }" NL "}";
   3237 
   3238 		m_program_ab = glCreateProgram();
   3239 		GLuint sh	= glCreateShader(GL_VERTEX_SHADER);
   3240 		glAttachShader(m_program_ab, sh);
   3241 		glDeleteShader(sh);
   3242 		glShaderSource(sh, 1, &glsl_a, NULL);
   3243 		glCompileShader(sh);
   3244 
   3245 		sh = glCreateShader(GL_FRAGMENT_SHADER);
   3246 		glAttachShader(m_program_ab, sh);
   3247 		glDeleteShader(sh);
   3248 		glShaderSource(sh, 1, &glsl_b, NULL);
   3249 		glCompileShader(sh);
   3250 
   3251 		glProgramParameteri(m_program_ab, GL_PROGRAM_SEPARABLE, GL_TRUE);
   3252 		glLinkProgram(m_program_ab);
   3253 
   3254 		m_program_c = glCreateShaderProgramv(GL_COMPUTE_SHADER, 1, &glsl_c);
   3255 		glGenVertexArrays(1, &m_vao);
   3256 		glGenProgramPipelines(1, &m_pipeline);
   3257 		glUseProgramStages(m_pipeline, GL_ALL_SHADER_BITS, m_program_ab);
   3258 		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_program_c);
   3259 
   3260 		glGenBuffers(1, &m_storage_buffer);
   3261 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer);
   3262 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec2) * 4, NULL, GL_STREAM_DRAW);
   3263 
   3264 		glClear(GL_COLOR_BUFFER_BIT);
   3265 		glBindProgramPipeline(m_pipeline);
   3266 		glDispatchCompute(1, 1, 1);
   3267 		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
   3268 		glBindVertexArray(m_vao);
   3269 		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
   3270 
   3271 		if (getWindowWidth() < 500 &&
   3272 			!ValidateReadBufferCenteredQuad(getWindowWidth(), getWindowHeight(), vec3(0, 1, 0)))
   3273 		{
   3274 			return ERROR;
   3275 		}
   3276 		return NO_ERROR;
   3277 	}
   3278 	virtual long Cleanup()
   3279 	{
   3280 		glDeleteProgram(m_program_ab);
   3281 		glDeleteProgram(m_program_c);
   3282 		glDeleteProgramPipelines(1, &m_pipeline);
   3283 		glDeleteBuffers(1, &m_storage_buffer);
   3284 		glDeleteVertexArrays(1, &m_vao);
   3285 		return NO_ERROR;
   3286 	}
   3287 };
   3288 
   3289 class BasicSSOCase3 : public ComputeShaderBase
   3290 {
   3291 	virtual std::string Title()
   3292 	{
   3293 		return NL "Separable CS Programs - Compute stage";
   3294 	}
   3295 	virtual std::string Purpose()
   3296 	{
   3297 		return NL "Verify that compute shader stage selected with UseProgram command has precedence" NL
   3298 				  "over compute shader stage selected with BindProgramPipeline command.";
   3299 	}
   3300 	virtual std::string Method()
   3301 	{
   3302 		return NL "1. Create CS0 with CreateProgram command. Create CS1 with CreateShaderProgramv command." NL
   3303 				  "2. Verify that CS program selected with UseProgram is dispatched even if there is active" NL
   3304 				  "    compute stage bound by BindProgramPipeline.";
   3305 	}
   3306 	virtual std::string PassCriteria()
   3307 	{
   3308 		return NL "Everything works as expected.";
   3309 	}
   3310 
   3311 	GLuint m_program_a;
   3312 	GLuint m_program_b;
   3313 	GLuint m_pipeline;
   3314 	GLuint m_storage_buffer;
   3315 
   3316 	virtual long Setup()
   3317 	{
   3318 		m_program_a		 = 0;
   3319 		m_program_b		 = 0;
   3320 		m_pipeline		 = 0;
   3321 		m_storage_buffer = 0;
   3322 		return NO_ERROR;
   3323 	}
   3324 	virtual long Run()
   3325 	{
   3326 		const char* const glsl_a =
   3327 			"#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(binding = 3, std430) buffer Output {" NL
   3328 			"  int g_output;" NL "};" NL "void main() {" NL "  g_output = 1;" NL "}";
   3329 
   3330 		const char* const glsl_b =
   3331 			"#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(binding = 3, std430) buffer Output {" NL
   3332 			"  int g_output;" NL "};" NL "void main() {" NL "  g_output = 2;" NL "}";
   3333 
   3334 		/* create program A */
   3335 		{
   3336 			m_program_a = glCreateProgram();
   3337 			GLuint sh   = glCreateShader(GL_COMPUTE_SHADER);
   3338 			glAttachShader(m_program_a, sh);
   3339 			glDeleteShader(sh);
   3340 			glShaderSource(sh, 1, &glsl_a, NULL);
   3341 			glCompileShader(sh);
   3342 			glProgramParameteri(m_program_a, GL_PROGRAM_SEPARABLE, GL_TRUE);
   3343 			glLinkProgram(m_program_a);
   3344 		}
   3345 		m_program_b = glCreateShaderProgramv(GL_COMPUTE_SHADER, 1, &glsl_b);
   3346 
   3347 		/* create storage buffer */
   3348 		{
   3349 			int data = 0;
   3350 			glGenBuffers(1, &m_storage_buffer);
   3351 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_storage_buffer);
   3352 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &data, GL_STREAM_READ);
   3353 		}
   3354 
   3355 		glGenProgramPipelines(1, &m_pipeline);
   3356 		glUseProgramStages(m_pipeline, GL_ALL_SHADER_BITS, m_program_b);
   3357 
   3358 		glUseProgram(m_program_a);
   3359 		glBindProgramPipeline(m_pipeline);
   3360 		glDispatchCompute(1, 1, 1);
   3361 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   3362 
   3363 		/* validate */
   3364 		{
   3365 			int data;
   3366 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), &data);
   3367 			if (data != 1)
   3368 			{
   3369 				m_context.getTestContext().getLog()
   3370 					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
   3371 				return ERROR;
   3372 			}
   3373 		}
   3374 
   3375 		glUseProgram(0);
   3376 		glDispatchCompute(1, 1, 1);
   3377 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   3378 
   3379 		/* validate */
   3380 		{
   3381 			int data;
   3382 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), &data);
   3383 			if (data != 2)
   3384 			{
   3385 				m_context.getTestContext().getLog()
   3386 					<< tcu::TestLog::Message << "Data is " << data << " should be 2." << tcu::TestLog::EndMessage;
   3387 				return ERROR;
   3388 			}
   3389 		}
   3390 
   3391 		glUseProgram(m_program_b);
   3392 		glDispatchCompute(1, 1, 1);
   3393 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   3394 
   3395 		/* validate */
   3396 		{
   3397 			int data;
   3398 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), &data);
   3399 			if (data != 2)
   3400 			{
   3401 				m_context.getTestContext().getLog()
   3402 					<< tcu::TestLog::Message << "Data is " << data << " should be 2." << tcu::TestLog::EndMessage;
   3403 				return ERROR;
   3404 			}
   3405 		}
   3406 
   3407 		glUseProgram(0);
   3408 		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_program_a);
   3409 		glDispatchCompute(1, 1, 1);
   3410 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   3411 
   3412 		/* validate */
   3413 		{
   3414 			int data;
   3415 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), &data);
   3416 			if (data != 1)
   3417 			{
   3418 				m_context.getTestContext().getLog()
   3419 					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
   3420 				return ERROR;
   3421 			}
   3422 		}
   3423 
   3424 		return NO_ERROR;
   3425 	}
   3426 	virtual long Cleanup()
   3427 	{
   3428 		glDeleteProgram(m_program_a);
   3429 		glDeleteProgram(m_program_b);
   3430 		glDeleteProgramPipelines(1, &m_pipeline);
   3431 		glDeleteBuffers(1, &m_storage_buffer);
   3432 		return NO_ERROR;
   3433 	}
   3434 };
   3435 
   3436 class BasicAtomicCase1 : public ComputeShaderBase
   3437 {
   3438 	virtual std::string Title()
   3439 	{
   3440 		return NL "Atomic functions";
   3441 	}
   3442 	virtual std::string Purpose()
   3443 	{
   3444 		return NL "1. Verify that atomicAdd function works as expected with int and uint parameters." NL
   3445 				  "2. Verify that shared memory can be used with atomic functions." NL
   3446 				  "3. Verify that groupMemoryBarrier() and barrier() built-in functions work as expected.";
   3447 	}
   3448 	virtual std::string Method()
   3449 	{
   3450 		return NL "1. Use shared memory as a 'counter' with-in one CS work group." NL
   3451 				  "2. Each shader invocation increments/decrements 'counter' value using atomicAdd function." NL
   3452 				  "3. Values returned by atomicAdd function are written to SSBO." NL
   3453 				  "4. Verify SSBO content (values from 0 to 7 should be written).";
   3454 	}
   3455 	virtual std::string PassCriteria()
   3456 	{
   3457 		return NL "Everything works as expected.";
   3458 	}
   3459 
   3460 	GLuint m_program;
   3461 	GLuint m_storage_buffer;
   3462 
   3463 	virtual long Setup()
   3464 	{
   3465 		m_program		 = 0;
   3466 		m_storage_buffer = 0;
   3467 		return NO_ERROR;
   3468 	}
   3469 	virtual long Run()
   3470 	{
   3471 		const char* const glsl_cs =
   3472 			NL "layout(local_size_x = 8) in;" NL "layout(std430, binding = 0) buffer Output {" NL
   3473 			   "  uint g_add_output[8];" NL "  int g_sub_output[8];" NL "};" NL "shared uint g_add_value;" NL
   3474 			   "shared int g_sub_value;" NL "void main() {" NL "  if (gl_LocalInvocationIndex == 0) {" NL
   3475 			   "    g_add_value = 0u;" NL "    g_sub_value = 7;" NL "  }" NL
   3476 			   "  g_add_output[gl_LocalInvocationIndex] = 0u;" NL "  g_sub_output[gl_LocalInvocationIndex] = 0;" NL
   3477 			   "  groupMemoryBarrier();" NL "  barrier();" NL
   3478 			   "  g_add_output[gl_LocalInvocationIndex] = atomicAdd(g_add_value, 1u);" NL
   3479 			   "  g_sub_output[gl_LocalInvocationIndex] = atomicAdd(g_sub_value, -1);" NL "}";
   3480 		m_program = CreateComputeProgram(glsl_cs);
   3481 		glLinkProgram(m_program);
   3482 		if (!CheckProgram(m_program))
   3483 			return ERROR;
   3484 
   3485 		glGenBuffers(1, &m_storage_buffer);
   3486 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   3487 		glBufferData(GL_SHADER_STORAGE_BUFFER, 16 * sizeof(int), NULL, GL_STATIC_DRAW);
   3488 
   3489 		glUseProgram(m_program);
   3490 		glDispatchCompute(1, 1, 1);
   3491 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   3492 
   3493 		std::vector<int> data(8);
   3494 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int) * 8, &data[0]);
   3495 		std::sort(data.begin(), data.end());
   3496 		for (int i = 0; i < 8; ++i)
   3497 		{
   3498 			if (data[i] != i)
   3499 			{
   3500 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
   3501 													<< data[i] << " should be " << i << "." << tcu::TestLog::EndMessage;
   3502 				return ERROR;
   3503 			}
   3504 		}
   3505 
   3506 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, sizeof(int) * 8, sizeof(int) * 8, &data[0]);
   3507 		std::sort(data.begin(), data.end());
   3508 		for (int i = 0; i < 8; ++i)
   3509 		{
   3510 			if (data[i] != i)
   3511 			{
   3512 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
   3513 													<< data[i] << " should be " << i << "." << tcu::TestLog::EndMessage;
   3514 				return ERROR;
   3515 			}
   3516 		}
   3517 
   3518 		return NO_ERROR;
   3519 	}
   3520 	virtual long Cleanup()
   3521 	{
   3522 		glUseProgram(0);
   3523 		glDeleteProgram(m_program);
   3524 		glDeleteBuffers(1, &m_storage_buffer);
   3525 		return NO_ERROR;
   3526 	}
   3527 };
   3528 
   3529 class BasicAtomicCase2 : public ComputeShaderBase
   3530 {
   3531 	virtual std::string Title()
   3532 	{
   3533 		return NL "Atomic functions - buffer variables";
   3534 	}
   3535 	virtual std::string Purpose()
   3536 	{
   3537 		return NL "1. Verify that all atomic functions (atomicExchange, atomicMin, atomicMax," NL
   3538 				  "    atomicAnd, atomicOr, atomicXor and atomicCompSwap) works as expected with buffer variables." NL
   3539 				  "2. Verify that atomic functions work with parameters being constants and" NL
   3540 				  "    with parameters being uniforms." NL
   3541 				  "3. Verify that barrier() built-in function can be used in a control flow.";
   3542 	}
   3543 	virtual std::string Method()
   3544 	{
   3545 		return NL "1. Create CS that uses all atomic functions. Values returned by the atomic functions are written to "
   3546 				  "SSBO." NL "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
   3547 				  "3. Verify SSBO content." NL
   3548 				  "4. Repeat for different number of work groups and different work group sizes.";
   3549 	}
   3550 	virtual std::string PassCriteria()
   3551 	{
   3552 		return NL "Everything works as expected.";
   3553 	}
   3554 
   3555 	GLuint m_program;
   3556 	GLuint m_storage_buffer[2];
   3557 	GLuint m_dispatch_buffer;
   3558 
   3559 	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
   3560 	{
   3561 		const uvec3		  global_size = local_size * num_groups;
   3562 		std::stringstream ss;
   3563 		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
   3564 		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
   3565 		   << ", " << global_size.y() << ", " << global_size.z()
   3566 		   << ");" NL "layout(std430, binding = 0) buffer OutputU {" NL "  uint g_uint_out["
   3567 		   << global_size.x() * global_size.y() * global_size.z()
   3568 		   << "];" NL "};" NL "layout(std430, binding = 1) buffer OutputI {" NL "  int data["
   3569 		   << global_size.x() * global_size.y() * global_size.z()
   3570 		   << "];" NL "} g_int_out;" NL
   3571 			  "uniform uint g_uint_value[8] = uint[8](3u, 1u, 2u, 0x1u, 0x3u, 0x1u, 0x2u, 0x7u);" NL "void main() {" NL
   3572 			  "  const uint global_index = gl_GlobalInvocationID.x +" NL
   3573 			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
   3574 			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
   3575 			  "  atomicExchange(g_uint_out[global_index], g_uint_value[0]);" NL
   3576 			  "  atomicMin(g_uint_out[global_index], g_uint_value[1]);" NL
   3577 			  "  atomicMax(g_uint_out[global_index], g_uint_value[2]);" NL
   3578 			  "  atomicAnd(g_uint_out[global_index], g_uint_value[3]);" NL
   3579 			  "  atomicOr(g_uint_out[global_index], g_uint_value[4]);" NL "  if (g_uint_value[0] > 0u) {" NL
   3580 			  "    barrier();" // not needed here, just check if compiler accepts it in a control flow
   3581 			NL "    atomicXor(g_uint_out[global_index], g_uint_value[5]);" NL "  }" NL
   3582 			  "  atomicCompSwap(g_uint_out[global_index], g_uint_value[6], g_uint_value[7]);" NL NL
   3583 			  "  atomicExchange(g_int_out.data[global_index], 3);" NL "  atomicMin(g_int_out.data[global_index], 1);" NL
   3584 			  "  atomicMax(g_int_out.data[global_index], 2);" NL "  atomicAnd(g_int_out.data[global_index], 0x1);" NL
   3585 			  "  atomicOr(g_int_out.data[global_index], 0x3);" NL "  atomicXor(g_int_out.data[global_index], 0x1);" NL
   3586 			  "  atomicCompSwap(g_int_out.data[global_index], 0x2, 0x7);" NL "}";
   3587 		return ss.str();
   3588 	}
   3589 	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
   3590 	{
   3591 		if (m_program != 0)
   3592 			glDeleteProgram(m_program);
   3593 		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
   3594 		glLinkProgram(m_program);
   3595 		if (!CheckProgram(m_program))
   3596 			return false;
   3597 
   3598 		const GLuint kBufferSize =
   3599 			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
   3600 
   3601 		if (m_storage_buffer[0] == 0)
   3602 			glGenBuffers(2, m_storage_buffer);
   3603 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
   3604 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, NULL, GL_DYNAMIC_DRAW);
   3605 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
   3606 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint) * kBufferSize, NULL, GL_DYNAMIC_DRAW);
   3607 
   3608 		glUseProgram(m_program);
   3609 		if (dispatch_indirect)
   3610 		{
   3611 			if (m_dispatch_buffer == 0)
   3612 				glGenBuffers(1, &m_dispatch_buffer);
   3613 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   3614 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
   3615 			glDispatchComputeIndirect(0);
   3616 		}
   3617 		else
   3618 		{
   3619 			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
   3620 		}
   3621 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   3622 
   3623 		std::vector<GLuint> udata(kBufferSize);
   3624 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
   3625 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, &udata[0]);
   3626 		for (GLuint i = 0; i < kBufferSize; ++i)
   3627 		{
   3628 			if (udata[i] != 7)
   3629 			{
   3630 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "uData at index " << i << " is "
   3631 													<< udata[i] << " should be 7." << tcu::TestLog::EndMessage;
   3632 				return false;
   3633 			}
   3634 		}
   3635 
   3636 		std::vector<GLint> idata(kBufferSize);
   3637 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
   3638 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint) * kBufferSize, &idata[0]);
   3639 		for (GLint i = 0; i < static_cast<GLint>(kBufferSize); ++i)
   3640 		{
   3641 			if (idata[i] != 7)
   3642 			{
   3643 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
   3644 													<< idata[i] << " should be 7." << tcu::TestLog::EndMessage;
   3645 				return false;
   3646 			}
   3647 		}
   3648 
   3649 		return true;
   3650 	}
   3651 	virtual long Setup()
   3652 	{
   3653 		m_program			= 0;
   3654 		m_storage_buffer[0] = m_storage_buffer[1] = 0;
   3655 		m_dispatch_buffer						  = 0;
   3656 		return NO_ERROR;
   3657 	}
   3658 	virtual long Run()
   3659 	{
   3660 		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
   3661 			return ERROR;
   3662 		if (!RunIteration(uvec3(1, 1, 64), uvec3(1, 5, 2), true))
   3663 			return ERROR;
   3664 		if (!RunIteration(uvec3(1, 1, 4), uvec3(2, 2, 2), false))
   3665 			return ERROR;
   3666 		if (!RunIteration(uvec3(3, 2, 1), uvec3(1, 2, 3), true))
   3667 			return ERROR;
   3668 		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
   3669 			return ERROR;
   3670 		if (!RunIteration(uvec3(2, 4, 7), uvec3(2, 1, 4), true))
   3671 			return ERROR;
   3672 		return NO_ERROR;
   3673 	}
   3674 	virtual long Cleanup()
   3675 	{
   3676 		glUseProgram(0);
   3677 		glDeleteProgram(m_program);
   3678 		glDeleteBuffers(2, m_storage_buffer);
   3679 		glDeleteBuffers(1, &m_dispatch_buffer);
   3680 		return NO_ERROR;
   3681 	}
   3682 };
   3683 
   3684 class BasicAtomicCase3 : public ComputeShaderBase
   3685 {
   3686 	virtual std::string Title()
   3687 	{
   3688 		return NL "Atomic functions - shared variables";
   3689 	}
   3690 	virtual std::string Purpose()
   3691 	{
   3692 		return NL "1. Verify that all atomic functions (atomicExchange, atomicMin, atomicMax," NL
   3693 				  "    atomicAnd, atomicOr, atomicXor and atomicCompSwap) works as expected with shared variables." NL
   3694 				  "2. Verify that atomic functions work with parameters being constants and" NL
   3695 				  "    with parameters being uniforms." NL
   3696 				  "3. Verify that atomic functions can be used in a control flow.";
   3697 	}
   3698 	virtual std::string Method()
   3699 	{
   3700 		return NL "1. Create CS that uses all atomic functions. Values returned by the atomic functions are written to "
   3701 				  "SSBO." NL "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
   3702 				  "3. Verify SSBO content." NL
   3703 				  "4. Repeat for different number of work groups and different work group sizes.";
   3704 	}
   3705 	virtual std::string PassCriteria()
   3706 	{
   3707 		return NL "Everything works as expected.";
   3708 	}
   3709 
   3710 	GLuint m_program;
   3711 	GLuint m_storage_buffer;
   3712 	GLuint m_dispatch_buffer;
   3713 
   3714 	std::string GenSource(const uvec3& local_size)
   3715 	{
   3716 		std::stringstream ss;
   3717 		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
   3718 		   << ", local_size_z = " << local_size.z()
   3719 		   << ") in;" NL "layout(std430, binding = 0) buffer Output {" NL "  uint g_uint_out["
   3720 		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "  int g_int_out["
   3721 		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "};" NL "shared uint g_shared_uint["
   3722 		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "shared int g_shared_int["
   3723 		   << local_size.x() * local_size.y() * local_size.z()
   3724 		   << "];" NL "uniform uint g_uint_value[8] = uint[8](3u, 1u, 2u, 0x1u, 0x3u, 0x1u, 0x2u, 0x7u);" NL
   3725 			  "void main() {" NL "  atomicExchange(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[0]);" NL
   3726 			  "  atomicMin(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[1]);" NL
   3727 			  "  atomicMax(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[2]);" NL
   3728 			  "  atomicAnd(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[3]);" NL
   3729 			  "  atomicOr(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[4]);" NL
   3730 			  "  atomicXor(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[5]);" NL
   3731 			  "  atomicCompSwap(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[6], g_uint_value[7]);" NL NL
   3732 			  "  atomicExchange(g_shared_int[gl_LocalInvocationIndex], 3);" NL
   3733 			  "  atomicMin(g_shared_int[gl_LocalInvocationIndex], 1);" NL
   3734 			  "  atomicMax(g_shared_int[gl_LocalInvocationIndex], 2);" NL
   3735 			  "  atomicAnd(g_shared_int[gl_LocalInvocationIndex], 0x1);" NL "  if (g_uint_value[1] > 0u) {" NL
   3736 			  "    atomicOr(g_shared_int[gl_LocalInvocationIndex], 0x3);" NL
   3737 			  "    atomicXor(g_shared_int[gl_LocalInvocationIndex], 0x1);" NL
   3738 			  "    atomicCompSwap(g_shared_int[gl_LocalInvocationIndex], 0x2, 0x7);" NL "  }" NL NL
   3739 			  "  g_uint_out[gl_LocalInvocationIndex] = g_shared_uint[gl_LocalInvocationIndex];" NL
   3740 			  "  g_int_out[gl_LocalInvocationIndex] = g_shared_int[gl_LocalInvocationIndex];" NL "}";
   3741 		return ss.str();
   3742 	}
   3743 	bool RunIteration(const uvec3& local_size, bool dispatch_indirect)
   3744 	{
   3745 		if (m_program != 0)
   3746 			glDeleteProgram(m_program);
   3747 		m_program = CreateComputeProgram(GenSource(local_size));
   3748 		glLinkProgram(m_program);
   3749 		if (!CheckProgram(m_program))
   3750 			return false;
   3751 
   3752 		const GLuint kBufferSize = local_size.x() * local_size.y() * local_size.z();
   3753 
   3754 		if (m_storage_buffer == 0)
   3755 			glGenBuffers(1, &m_storage_buffer);
   3756 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   3757 		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize * 2, NULL, GL_DYNAMIC_DRAW);
   3758 
   3759 		glUseProgram(m_program);
   3760 		if (dispatch_indirect)
   3761 		{
   3762 			const GLuint num_groups[3] = { 1, 1, 1 };
   3763 			if (m_dispatch_buffer == 0)
   3764 				glGenBuffers(1, &m_dispatch_buffer);
   3765 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   3766 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
   3767 			glDispatchComputeIndirect(0);
   3768 		}
   3769 		else
   3770 		{
   3771 			glDispatchCompute(1, 1, 1);
   3772 		}
   3773 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   3774 
   3775 		std::vector<GLuint> udata(kBufferSize);
   3776 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, &udata[0]);
   3777 		for (GLuint i = 0; i < kBufferSize; ++i)
   3778 		{
   3779 			if (udata[i] != 7)
   3780 			{
   3781 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "uData at index " << i << " is "
   3782 													<< udata[i] << " should be 7." << tcu::TestLog::EndMessage;
   3783 				return false;
   3784 			}
   3785 		}
   3786 
   3787 		std::vector<GLint> idata(kBufferSize);
   3788 		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, sizeof(GLint) * kBufferSize,
   3789 						   &idata[0]);
   3790 		for (GLint i = 0; i < static_cast<GLint>(kBufferSize); ++i)
   3791 		{
   3792 			if (idata[i] != 7)
   3793 			{
   3794 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "iData at index " << i << " is "
   3795 													<< idata[i] << " should be 7." << tcu::TestLog::EndMessage;
   3796 				return false;
   3797 			}
   3798 		}
   3799 
   3800 		return true;
   3801 	}
   3802 	virtual long Setup()
   3803 	{
   3804 		m_program		  = 0;
   3805 		m_storage_buffer  = 0;
   3806 		m_dispatch_buffer = 0;
   3807 		return NO_ERROR;
   3808 	}
   3809 	virtual long Run()
   3810 	{
   3811 		if (!RunIteration(uvec3(64, 1, 1), false))
   3812 			return ERROR;
   3813 		if (!RunIteration(uvec3(1, 1, 64), true))
   3814 			return ERROR;
   3815 		if (!RunIteration(uvec3(1, 1, 4), false))
   3816 			return ERROR;
   3817 		if (!RunIteration(uvec3(3, 2, 1), true))
   3818 			return ERROR;
   3819 		if (!RunIteration(uvec3(2, 4, 2), false))
   3820 			return ERROR;
   3821 		if (!RunIteration(uvec3(2, 4, 7), true))
   3822 			return ERROR;
   3823 		return NO_ERROR;
   3824 	}
   3825 	virtual long Cleanup()
   3826 	{
   3827 		glUseProgram(0);
   3828 		glDeleteProgram(m_program);
   3829 		glDeleteBuffers(1, &m_storage_buffer);
   3830 		glDeleteBuffers(1, &m_dispatch_buffer);
   3831 		return NO_ERROR;
   3832 	}
   3833 };
   3834 
   3835 class AdvancedCopyImage : public ComputeShaderBase
   3836 {
   3837 	virtual std::string Title()
   3838 	{
   3839 		return NL "Copy Image";
   3840 	}
   3841 	virtual std::string Purpose()
   3842 	{
   3843 		return NL "Verify that copying two textures using CS works as expected.";
   3844 	}
   3845 	virtual std::string Method()
   3846 	{
   3847 		return NL "Use shader image load and store operations to copy two textures in the CS.";
   3848 	}
   3849 	virtual std::string PassCriteria()
   3850 	{
   3851 		return NL "Everything works as expected.";
   3852 	}
   3853 
   3854 	GLuint m_program;
   3855 	GLuint m_texture[2];
   3856 
   3857 	virtual long Setup()
   3858 	{
   3859 		m_program = 0;
   3860 		memset(m_texture, 0, sizeof(m_texture));
   3861 		return NO_ERROR;
   3862 	}
   3863 
   3864 	virtual long Run()
   3865 	{
   3866 		const char* const glsl_cs = NL "#define TILE_WIDTH 16" NL "#define TILE_HEIGHT 16" NL
   3867 									   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
   3868 									   "layout(binding = 0, rgba8) uniform image2D g_input_image;" NL
   3869 									   "layout(binding = 1, rgba8) uniform image2D g_output_image;" NL	NL
   3870 									   "layout(local_size_x=TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL NL
   3871 									   "void main() {" NL "  const ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL
   3872 									   "  const ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
   3873 									   "  const ivec2 pixel_xy = tile_xy * kTileSize + thread_xy;" NL NL
   3874 									   "  vec4 pixel = imageLoad(g_input_image, pixel_xy);" NL
   3875 									   "  imageStore(g_output_image, pixel_xy, pixel);" NL "}";
   3876 		m_program = CreateComputeProgram(glsl_cs);
   3877 		glLinkProgram(m_program);
   3878 		if (!CheckProgram(m_program))
   3879 			return ERROR;
   3880 
   3881 		std::vector<GLubyte> in_image(64 * 64 * 4, 0x0f);
   3882 		std::vector<GLubyte> out_image(64 * 64 * 4, 0x00);
   3883 
   3884 		glGenTextures(2, m_texture);
   3885 		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
   3886 		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
   3887 		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 64, 64, 0, GL_RGBA, GL_UNSIGNED_BYTE, &in_image[0]);
   3888 
   3889 		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
   3890 		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
   3891 		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 64, 64, 0, GL_RGBA, GL_UNSIGNED_BYTE, &out_image[0]);
   3892 
   3893 		glUseProgram(m_program);
   3894 		glBindImageTexture(0, m_texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);
   3895 		glBindImageTexture(1, m_texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
   3896 		glDispatchCompute(5, 4,
   3897 						  1); // 5 is on purpose, to ensure that out of bounds image load and stores have no effect
   3898 		glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
   3899 
   3900 		std::vector<GLubyte> data(64 * 64 * 4);
   3901 		glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
   3902 		for (std::size_t i = 0; i < data.size(); ++i)
   3903 		{
   3904 			if (getWindowWidth() > 100 && data[i] != 0x0f)
   3905 			{
   3906 				m_context.getTestContext().getLog()
   3907 					<< tcu::TestLog::Message << "Data at index " << i << " is " << data[i] << " should be " << 0x0f
   3908 					<< "." << tcu::TestLog::EndMessage;
   3909 				return ERROR;
   3910 			}
   3911 		}
   3912 
   3913 		return NO_ERROR;
   3914 	}
   3915 	virtual long Cleanup()
   3916 	{
   3917 		glUseProgram(0);
   3918 		glDeleteProgram(m_program);
   3919 		glDeleteTextures(2, m_texture);
   3920 		return NO_ERROR;
   3921 	}
   3922 };
   3923 
   3924 class AdvancedPipelinePreVS : public ComputeShaderBase
   3925 {
   3926 	virtual std::string Title()
   3927 	{
   3928 		return NL "CS as an additional pipeline stage - Before VS (1)";
   3929 	}
   3930 	virtual std::string Purpose()
   3931 	{
   3932 		return NL "Verify that CS which runs just before VS and modifies VBO content works as expected.";
   3933 	}
   3934 	virtual std::string Method()
   3935 	{
   3936 		return NL "1. Prepare VBO and VAO for a drawing operation." NL "2. Run CS to modify existing VBO content." NL
   3937 				  "3. Issue MemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT) command." NL
   3938 				  "4. Issue draw call command." NL "5. Verify that the framebuffer content is as expected.";
   3939 	}
   3940 	virtual std::string PassCriteria()
   3941 	{
   3942 		return NL "Everything works as expected.";
   3943 	}
   3944 
   3945 	GLuint m_program[2];
   3946 	GLuint m_vertex_buffer;
   3947 	GLuint m_vertex_array;
   3948 
   3949 	virtual long Setup()
   3950 	{
   3951 		memset(m_program, 0, sizeof(m_program));
   3952 		m_vertex_buffer = 0;
   3953 		m_vertex_array  = 0;
   3954 		return NO_ERROR;
   3955 	}
   3956 	virtual long Run()
   3957 	{
   3958 		const char* const glsl_cs =
   3959 			NL "layout(local_size_x = 4) in;" NL "struct Vertex {" NL "  vec4 position;" NL "  vec4 color;" NL "};" NL
   3960 			   "layout(binding = 0, std430) buffer VertexBuffer {" NL "  Vertex g_vertex[];" NL "};" NL
   3961 			   "uniform float g_scale = 0.8;" NL "void main() {" NL
   3962 			   "  g_vertex[gl_GlobalInvocationID.x].position.xyz *= g_scale;" NL
   3963 			   "  g_vertex[gl_GlobalInvocationID.x].color *= vec4(0, 1, 0, 1);" NL "}";
   3964 		m_program[0] = CreateComputeProgram(glsl_cs);
   3965 		glLinkProgram(m_program[0]);
   3966 		if (!CheckProgram(m_program[0]))
   3967 			return ERROR;
   3968 
   3969 		const char* const glsl_vs =
   3970 			NL "layout(location = 0) in vec4 g_position;" NL "layout(location = 1) in vec4 g_color;" NL
   3971 			   "out StageData {" NL "  vec4 color;" NL "} g_vs_out;" NL "void main() {" NL
   3972 			   "  gl_Position = g_position;" NL "  g_vs_out.color = g_color;" NL "}";
   3973 
   3974 		const char* const glsl_fs =
   3975 			NL "in StageData {" NL "  vec4 color;" NL "} g_fs_in;" NL "layout(location = 0) out vec4 g_color;" NL
   3976 			   "void main() {" NL "  g_color = g_fs_in.color;" NL "}";
   3977 		m_program[1] = CreateProgram(glsl_vs, glsl_fs);
   3978 		glLinkProgram(m_program[1]);
   3979 		if (!CheckProgram(m_program[1]))
   3980 			return ERROR;
   3981 
   3982 		/* vertex buffer */
   3983 		{
   3984 			const float data[] = { -1, -1, 0, 1, 1, 1, 1, 1, 1, -1, 0, 1, 1, 1, 1, 1,
   3985 								   -1, 1,  0, 1, 1, 1, 1, 1, 1, 1,  0, 1, 1, 1, 1, 1 };
   3986 			glGenBuffers(1, &m_vertex_buffer);
   3987 			glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
   3988 			glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
   3989 			glBindBuffer(GL_ARRAY_BUFFER, 0);
   3990 		}
   3991 
   3992 		glGenVertexArrays(1, &m_vertex_array);
   3993 		glBindVertexArray(m_vertex_array);
   3994 		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
   3995 		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), 0);
   3996 		glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), reinterpret_cast<void*>(sizeof(vec4)));
   3997 		glBindBuffer(GL_ARRAY_BUFFER, 0);
   3998 		glEnableVertexAttribArray(0);
   3999 		glEnableVertexAttribArray(1);
   4000 		glBindVertexArray(0);
   4001 
   4002 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_vertex_buffer);
   4003 		glUseProgram(m_program[0]);
   4004 		glDispatchCompute(1, 1, 1);
   4005 
   4006 		glClear(GL_COLOR_BUFFER_BIT);
   4007 		glUseProgram(m_program[1]);
   4008 		glBindVertexArray(m_vertex_array);
   4009 		glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
   4010 		glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, 1);
   4011 
   4012 		if (getWindowWidth() < 500 &&
   4013 			!ValidateReadBufferCenteredQuad(getWindowWidth(), getWindowHeight(), vec3(0, 1, 0)))
   4014 		{
   4015 			return ERROR;
   4016 		}
   4017 		return NO_ERROR;
   4018 	}
   4019 	virtual long Cleanup()
   4020 	{
   4021 		glUseProgram(0);
   4022 		for (int i = 0; i < 2; ++i)
   4023 			glDeleteProgram(m_program[i]);
   4024 		glDeleteBuffers(1, &m_vertex_buffer);
   4025 		glDeleteVertexArrays(1, &m_vertex_array);
   4026 		return NO_ERROR;
   4027 	}
   4028 };
   4029 
   4030 class AdvancedPipelineGenDrawCommands : public ComputeShaderBase
   4031 {
   4032 	virtual std::string Title()
   4033 	{
   4034 		return NL "CS as an additional pipeline stage - Before VS (2)";
   4035 	}
   4036 	virtual std::string Purpose()
   4037 	{
   4038 		return NL "Verify that a complex scenario where CS is used to generate drawing commands" NL
   4039 				  "and write them to a draw indirect buffer works as expected. This is a practial usage of CS." NL
   4040 				  "CS is used for culling objects which are outside of the viewing frustum.";
   4041 	}
   4042 	virtual std::string Method()
   4043 	{
   4044 		return NL "1. Run CS which will generate four sets of draw call parameters and write them to the draw indirect "
   4045 				  "buffer." NL "2. One set of draw call parameters will be: 0, 0, 0, 0" NL
   4046 				  "    (which means that an object is outside of the viewing frustum and should not be drawn)." NL
   4047 				  "3. Issue MemoryBarrier(GL_COMMAND_BARRIER_BIT) command." NL
   4048 				  "4. Issue four draw indirect commands." NL "5. Verify that the framebuffer content is as expected.";
   4049 	}
   4050 	virtual std::string PassCriteria()
   4051 	{
   4052 		return NL "Everything works as expected.";
   4053 	}
   4054 
   4055 	GLuint m_program[2];
   4056 	GLuint m_vertex_buffer;
   4057 	GLuint m_index_buffer;
   4058 	GLuint m_vertex_array;
   4059 	GLuint m_draw_buffer;
   4060 	GLuint m_object_buffer;
   4061 
   4062 	virtual long Setup()
   4063 	{
   4064 		memset(m_program, 0, sizeof(m_program));
   4065 		m_vertex_buffer = 0;
   4066 		m_index_buffer  = 0;
   4067 		m_vertex_array  = 0;
   4068 		m_draw_buffer   = 0;
   4069 		m_object_buffer = 0;
   4070 		return NO_ERROR;
   4071 	}
   4072 	virtual long Run()
   4073 	{
   4074 		GLint res;
   4075 		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
   4076 		if (res <= 0)
   4077 		{
   4078 			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
   4079 			return NO_ERROR;
   4080 		}
   4081 
   4082 		const char* const glsl_cs =
   4083 			NL "layout(local_size_x = 4) in;" NL "struct DrawCommand {" NL "  uint count;" NL
   4084 			   "  uint instance_count;" NL "  uint first_index;" NL "  int base_vertex;" NL "  uint base_instance;" NL
   4085 			   "};" NL "layout(std430) buffer;" NL "layout(binding = 0) readonly buffer ObjectBuffer {" NL
   4086 			   "  mat4 transform[4];" NL "  uint count[4];" NL "  uint first_index[4];" NL "} g_objects;" NL
   4087 			   "layout(binding = 1) writeonly buffer DrawCommandBuffer {" NL "  DrawCommand g_command[4];" NL "};" NL
   4088 			   "bool IsObjectVisible(uint id) {" NL
   4089 			   "  if (g_objects.transform[id][3].x < -1.0 || g_objects.transform[id][3].x > 1.0) return false;" NL
   4090 			   "  if (g_objects.transform[id][3][1] < -1.0 || g_objects.transform[id][3][1] > 1.0) return false;" NL
   4091 			   "  if (g_objects.transform[id][3][2] < -1.0 || g_objects.transform[id][3].z > 1.0) return false;" NL
   4092 			   "  return true;" NL "}" NL "void main() {" NL "  uint id = gl_GlobalInvocationID.x;" NL
   4093 			   "  g_command[id].count = 0;" NL "  g_command[id].instance_count = 0;" NL
   4094 			   "  g_command[id].first_index = 0;" NL "  g_command[id].base_vertex = 0;" NL
   4095 			   "  g_command[id].base_instance = 0;" NL "  if (IsObjectVisible(id)) {" NL
   4096 			   "    g_command[id].count = g_objects.count[id];" NL "    g_command[id].instance_count = 1;" NL
   4097 			   "    g_command[id].first_index = g_objects.first_index[id];" NL "  }" NL "}";
   4098 		m_program[0] = CreateComputeProgram(glsl_cs);
   4099 		glLinkProgram(m_program[0]);
   4100 		if (!CheckProgram(m_program[0]))
   4101 			return ERROR;
   4102 
   4103 		const char* const glsl_vs =
   4104 			NL "layout(location = 0) in vec4 g_position;" NL "layout(location = 1) in vec3 g_color;" NL
   4105 			   "out StageData {" NL "  vec3 color;" NL "} g_vs_out;" NL
   4106 			   "layout(binding = 0, std430) buffer ObjectBuffer {" NL "  mat4 transform[4];" NL "  uint count[4];" NL
   4107 			   "  uint first_index[4];" NL "} g_objects;" NL "uniform int g_object_id;" NL "void main() {" NL
   4108 			   "  gl_Position = g_objects.transform[g_object_id] * g_position;" NL "  g_vs_out.color = g_color;" NL "}";
   4109 
   4110 		const char* const glsl_fs =
   4111 			NL "in StageData {" NL "  vec3 color;" NL "} g_fs_in;" NL "layout(location = 0) out vec4 g_color;" NL
   4112 			   "void main() {" NL "  g_color = vec4(g_fs_in.color, 1);" NL "}";
   4113 		m_program[1] = CreateProgram(glsl_vs, glsl_fs);
   4114 		glLinkProgram(m_program[1]);
   4115 		if (!CheckProgram(m_program[1]))
   4116 			return ERROR;
   4117 		glViewport(0, 0, 100, 100);
   4118 
   4119 		/* object buffer */
   4120 		{
   4121 			struct
   4122 			{
   4123 				mat4   transform[4];
   4124 				GLuint count[4];
   4125 				GLuint first_index[4];
   4126 			} data = {
   4127 				{ tcu::translationMatrix(vec3(-1.5f, -0.5f, 0.0f)), tcu::translationMatrix(vec3(0.5f, -0.5f, 0.0f)),
   4128 				  tcu::translationMatrix(vec3(-0.5f, 0.5f, 0.0f)), tcu::translationMatrix(vec3(0.5f, 0.5f, 0.0f)) },
   4129 				{ 4, 4, 4, 4 },
   4130 				{ 0, 4, 8, 12 }
   4131 			};
   4132 			glGenBuffers(1, &m_object_buffer);
   4133 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_object_buffer);
   4134 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
   4135 		}
   4136 		/* vertex buffer */
   4137 		{
   4138 			const vec3 data[] = { vec3(-0.4f, -0.4f, 0.0f), vec3(1, 0, 0), vec3(0.4f, -0.4f, 0.0f), vec3(1, 0, 0),
   4139 								  vec3(-0.4f, 0.4f, 0.0f),  vec3(1, 0, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(1, 0, 0),
   4140 								  vec3(-0.4f, -0.4f, 0.0f), vec3(0, 1, 0), vec3(0.4f, -0.4f, 0.0f), vec3(0, 1, 0),
   4141 								  vec3(-0.4f, 0.4f, 0.0f),  vec3(0, 1, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(0, 1, 0),
   4142 								  vec3(-0.4f, -0.4f, 0.0f), vec3(0, 0, 1), vec3(0.4f, -0.4f, 0.0f), vec3(0, 0, 1),
   4143 								  vec3(-0.4f, 0.4f, 0.0f),  vec3(0, 0, 1), vec3(0.4f, 0.4f, 0.0f),  vec3(0, 0, 1),
   4144 								  vec3(-0.4f, -0.4f, 0.0f), vec3(1, 1, 0), vec3(0.4f, -0.4f, 0.0f), vec3(1, 1, 0),
   4145 								  vec3(-0.4f, 0.4f, 0.0f),  vec3(1, 1, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(1, 1, 0) };
   4146 			glGenBuffers(1, &m_vertex_buffer);
   4147 			glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
   4148 			glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
   4149 			glBindBuffer(GL_ARRAY_BUFFER, 0);
   4150 		}
   4151 		/* index buffer */
   4152 		{
   4153 			const GLushort data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
   4154 			glGenBuffers(1, &m_index_buffer);
   4155 			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
   4156 			glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(data), data, GL_DYNAMIC_DRAW);
   4157 			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
   4158 		}
   4159 		glGenBuffers(1, &m_draw_buffer);
   4160 		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_draw_buffer);
   4161 		glBufferData(GL_DRAW_INDIRECT_BUFFER, 4 * sizeof(GLuint) * 5, NULL, GL_DYNAMIC_DRAW);
   4162 		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
   4163 
   4164 		glGenVertexArrays(1, &m_vertex_array);
   4165 		glBindVertexArray(m_vertex_array);
   4166 		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
   4167 		glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 2 * sizeof(vec3), 0);
   4168 		glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 2 * sizeof(vec3), reinterpret_cast<void*>(sizeof(vec3)));
   4169 		glBindBuffer(GL_ARRAY_BUFFER, 0);
   4170 		glEnableVertexAttribArray(0);
   4171 		glEnableVertexAttribArray(1);
   4172 		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
   4173 		glBindVertexArray(0);
   4174 
   4175 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_draw_buffer);
   4176 		glUseProgram(m_program[0]);
   4177 		glDispatchCompute(1, 1, 1);
   4178 
   4179 		glClear(GL_COLOR_BUFFER_BIT);
   4180 		glUseProgram(m_program[1]);
   4181 		glBindVertexArray(m_vertex_array);
   4182 		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_draw_buffer);
   4183 		glMemoryBarrier(GL_COMMAND_BARRIER_BIT);
   4184 		/* draw (CPU draw calls dispatch, could be done by the GPU with ARB_multi_draw_indirect) */
   4185 		{
   4186 			GLsizeiptr offset = 0;
   4187 			for (int i = 0; i < 4; ++i)
   4188 			{
   4189 				glUniform1i(glGetUniformLocation(m_program[1], "g_object_id"), i);
   4190 				glDrawElementsIndirect(GL_TRIANGLE_STRIP, GL_UNSIGNED_SHORT, reinterpret_cast<void*>(offset));
   4191 				offset += 5 * sizeof(GLuint);
   4192 			}
   4193 		}
   4194 		if (getWindowWidth() >= 100 && getWindowHeight() >= 100 &&
   4195 			!ValidateWindow4Quads(vec3(0), vec3(0, 1, 0), vec3(1, 1, 0), vec3(0, 0, 1)))
   4196 		{
   4197 			return ERROR;
   4198 		}
   4199 		return NO_ERROR;
   4200 	}
   4201 	virtual long Cleanup()
   4202 	{
   4203 		glUseProgram(0);
   4204 		for (int i = 0; i < 2; ++i)
   4205 			glDeleteProgram(m_program[i]);
   4206 		glDeleteBuffers(1, &m_vertex_buffer);
   4207 		glDeleteBuffers(1, &m_index_buffer);
   4208 		glDeleteVertexArrays(1, &m_vertex_array);
   4209 		glDeleteBuffers(1, &m_draw_buffer);
   4210 		glDeleteBuffers(1, &m_object_buffer);
   4211 		glViewport(0, 0, getWindowWidth(), getWindowHeight());
   4212 		return NO_ERROR;
   4213 	}
   4214 };
   4215 
   4216 class AdvancedPipelineComputeChain : public ComputeShaderBase
   4217 {
   4218 	virtual std::string Title()
   4219 	{
   4220 		return NL "Compute Chain";
   4221 	}
   4222 	virtual std::string Purpose()
   4223 	{
   4224 		return NL "1. Verify that dispatching several compute kernels that work in a sequence" NL
   4225 				  "    with a common set of resources works as expected." NL
   4226 				  "2. Verify that indexing nested structures with built-in variables work as expected." NL
   4227 				  "3. Verify that two kernels can write to the same resource without MemoryBarrier" NL
   4228 				  "    command if target regions of memory do not overlap.";
   4229 	}
   4230 	virtual std::string Method()
   4231 	{
   4232 		return NL "1. Create a set of GPU resources (buffers, images, atomic counters)." NL
   4233 				  "2. Dispatch Kernel0 that write to these resources." NL "3. Issue MemoryBarrier command." NL
   4234 				  "4. Dispatch Kernel1 that read/write from/to these resources." NL "5. Issue MemoryBarrier command." NL
   4235 				  "6. Dispatch Kernel2 that read/write from/to these resources." NL
   4236 				  "7. Verify that content of all resources is as expected.";
   4237 	}
   4238 	virtual std::string PassCriteria()
   4239 	{
   4240 		return NL "Everything works as expected.";
   4241 	}
   4242 
   4243 	GLuint m_program[3];
   4244 	GLuint m_storage_buffer[4];
   4245 	GLuint m_counter_buffer;
   4246 	GLuint m_texture;
   4247 	GLuint m_fbo;
   4248 
   4249 	std::string Common()
   4250 	{
   4251 		return NL "struct S0 {" NL "  int m0[8];" NL "};" NL "struct S1 {" NL "  S0 m0[8];" NL "};" NL
   4252 				  "layout(binding = 0, std430) buffer Buffer0 {" NL "  int m0[5];" NL "  S1 m1[8];" NL "} g_buffer0;" NL
   4253 				  "layout(binding = 1, std430) buffer Buffer1 {" NL "  uint data[8];" NL "} g_buffer1;" NL
   4254 				  "layout(binding = 2, std430) buffer Buffer2 {" NL "  int data[256];" NL "} g_buffer2;" NL
   4255 				  "layout(binding = 3, std430) buffer Buffer3 {" NL "  int data[256];" NL "} g_buffer3;" NL
   4256 				  "layout(binding = 4, std430) buffer Buffer4 {" NL "  mat4 data0;" NL "  mat4 data1;" NL
   4257 				  "} g_buffer4;" NL "layout(binding = 0, rgba32f) uniform image2D g_image0;" NL
   4258 				  "layout(binding = 1, offset = 8) uniform atomic_uint g_counter[2];";
   4259 	}
   4260 	std::string GenGLSL(int p)
   4261 	{
   4262 		std::stringstream ss;
   4263 		ss << Common();
   4264 		if (p == 0)
   4265 		{
   4266 			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;" NL
   4267 					 "void UpdateBuffer0(uvec3 id, int add_val) {" NL "  if (id.x < 8 && id.y < 8 && id.z < 8) {" NL
   4268 					 "    g_buffer0.m1[id.z].m0[id.y].m0[id.x] += add_val;" NL "  }" NL "}" NL
   4269 					 "uniform int g_add_value = 1;" NL "uniform uint g_counter_y = 1;" NL
   4270 					 "uniform vec4 g_image_value = vec4(0.125, 0.25, 0.375, 0.5);" NL "void main() {" NL
   4271 					 "  uvec3 id = gl_GlobalInvocationID;" NL "  UpdateBuffer0(id, 1);" NL
   4272 					 "  UpdateBuffer0(id, g_add_value);" NL "  if (id == uvec3(1, g_counter_y, 1)) {" NL
   4273 					 "    uint idx = atomicCounterIncrement(g_counter[1]);" NL "    g_buffer1.data[idx] = idx;" NL
   4274 					 "    idx = atomicCounterIncrement(g_counter[1]);" NL "    g_buffer1.data[idx] = idx;" NL "  }" NL
   4275 					 "  if (id.x < 4 && id.y < 4 && id.z == 0) {" NL
   4276 					 "    vec4 v = imageLoad(g_image0, ivec2(id.xy));" NL
   4277 					 "    imageStore(g_image0, ivec2(id.xy), v + g_image_value);" NL "  }" NL
   4278 					 "  if (id.x < 2 && id.y == 0 && id.z == 0) {" NL "    g_buffer2.data[id.x] -= int(g_counter_y);" NL
   4279 					 "  }" NL "}";
   4280 		}
   4281 		else if (p == 1)
   4282 		{
   4283 			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 1) in;"
   4284 				// translation matrix
   4285 				NL "uniform mat4 g_mvp = mat4(1.0, 0.0, 0.0, 0.0,  0.0, 1.0, 0.0, 0.0,  0.0, 0.0, 1.0, 0.0,  10.0, "
   4286 					 "20.0, 30.0, 1.0);" NL "void main() {" NL "  if (gl_GlobalInvocationID == uvec3(0)) {" NL
   4287 					 "    g_buffer4.data0 *= g_mvp;" NL "  }" NL "  if (gl_WorkGroupID == uvec3(0)) {" NL
   4288 					 "    g_buffer4.data1[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = "
   4289 					 "g_mvp[gl_LocalInvocationID.x][gl_LocalInvocationID.y];" NL "  }" NL "}";
   4290 		}
   4291 		else if (p == 2)
   4292 		{
   4293 			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;" NL "void main() {" NL "}";
   4294 		}
   4295 		return ss.str();
   4296 	}
   4297 	virtual long Setup()
   4298 	{
   4299 		memset(m_program, 0, sizeof(m_program));
   4300 		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
   4301 		m_counter_buffer = 0;
   4302 		m_texture		 = 0;
   4303 		return NO_ERROR;
   4304 	}
   4305 	virtual long Run()
   4306 	{
   4307 		using namespace tcu;
   4308 
   4309 		for (int i = 0; i < 3; ++i)
   4310 		{
   4311 			m_program[i] = CreateComputeProgram(GenGLSL(i));
   4312 			glLinkProgram(m_program[i]);
   4313 			if (!CheckProgram(m_program[i]))
   4314 				return ERROR;
   4315 		}
   4316 
   4317 		glGenBuffers(4, m_storage_buffer);
   4318 		/* storage buffer 0 */
   4319 		{
   4320 			std::vector<int> data(5 + 8 * 8 * 8);
   4321 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
   4322 			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(int)), &data[0], GL_STATIC_COPY);
   4323 		}
   4324 		/* storage buffer 1 */
   4325 		{
   4326 			const GLuint data[8] = { 0 };
   4327 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
   4328 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), data, GL_STATIC_COPY);
   4329 		}
   4330 		/* storage buffer 2 & 3 */
   4331 		{
   4332 			std::vector<GLint> data(512, 7);
   4333 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[2]);
   4334 			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(GLint)), &data[0], GL_STATIC_COPY);
   4335 
   4336 			glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_storage_buffer[2], 0,
   4337 							  (GLsizeiptr)(sizeof(GLint) * data.size() / 2));
   4338 			glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 3, m_storage_buffer[2],
   4339 							  (GLintptr)(sizeof(GLint) * data.size() / 2),
   4340 							  (GLsizeiptr)(sizeof(GLint) * data.size() / 2));
   4341 		}
   4342 		/* storage buffer 4 */
   4343 		{
   4344 			std::vector<mat4> data(2);
   4345 			data[0] = mat4(1);
   4346 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, m_storage_buffer[3]);
   4347 			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(mat4)), &data[0], GL_STATIC_COPY);
   4348 		}
   4349 		/* counter buffer */
   4350 		{
   4351 			GLuint data[4] = { 0 };
   4352 			glGenBuffers(1, &m_counter_buffer);
   4353 			glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 1, m_counter_buffer);
   4354 			glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(data), data, GL_STATIC_COPY);
   4355 		}
   4356 		/* texture */
   4357 		{
   4358 			std::vector<vec4> data(4 * 4, vec4(0.0f));
   4359 			glGenTextures(1, &m_texture);
   4360 			glBindTexture(GL_TEXTURE_2D, m_texture);
   4361 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   4362 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   4363 			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 4, 4, 0, GL_RGBA, GL_FLOAT, &data[0]);
   4364 			glBindTexture(GL_TEXTURE_2D, 0);
   4365 		}
   4366 
   4367 		glUseProgram(m_program[0]);
   4368 		glBindImageTexture(0, m_texture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
   4369 		glDispatchCompute(2, 2, 2);
   4370 		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
   4371 		glDispatchCompute(3, 2, 2);
   4372 
   4373 		glUseProgram(m_program[1]);
   4374 		glDispatchCompute(4, 3, 7);
   4375 
   4376 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
   4377 						GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
   4378 
   4379 		/* validate texture */
   4380 		{
   4381 			std::vector<vec4> data(4 * 4);
   4382 			glBindTexture(GL_TEXTURE_2D, m_texture);
   4383 			glGenFramebuffers(1, &m_fbo);
   4384 			glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
   4385 			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture, 0);
   4386 			std::vector<GLubyte> colorData(4 * 4 * 4);
   4387 			glReadPixels(0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
   4388 			for (int i = 0; i < 4 * 4 * 4; i += 4)
   4389 			{
   4390 				data[i / 4] =
   4391 					vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
   4392 						 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
   4393 			}
   4394 			for (std::size_t i = 0; i < data.size(); ++i)
   4395 			{
   4396 				if (!ColorEqual(data[i], vec4(0.25f, 0.5f, 0.75f, 1.0f), g_color_eps))
   4397 				{
   4398 					m_context.getTestContext().getLog()
   4399 						<< tcu::TestLog::Message << "Invalid data at texture." << tcu::TestLog::EndMessage;
   4400 					return ERROR;
   4401 				}
   4402 			}
   4403 		}
   4404 		/* validate storage buffer 0 */
   4405 		{
   4406 			std::vector<int> data(5 + 8 * 8 * 8);
   4407 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
   4408 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, (GLsizeiptr)(data.size() * sizeof(int)), &data[0]);
   4409 			for (std::size_t i = 5; i < data.size(); ++i)
   4410 			{
   4411 				if (data[i] != 4)
   4412 				{
   4413 					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is " << data[i]
   4414 														<< " should be 2." << tcu::TestLog::EndMessage;
   4415 					return ERROR;
   4416 				}
   4417 			}
   4418 		}
   4419 		/* validate storage buffer 1 */
   4420 		{
   4421 			GLuint data[8];
   4422 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
   4423 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), data);
   4424 			for (GLuint i = 0; i < 4; ++i)
   4425 			{
   4426 				if (data[i] != i)
   4427 				{
   4428 					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is " << data[i]
   4429 														<< " should be " << i << "." << tcu::TestLog::EndMessage;
   4430 					return ERROR;
   4431 				}
   4432 			}
   4433 		}
   4434 		/* validate storage buffer 2 & 3 */
   4435 		{
   4436 			std::vector<GLint> data(512);
   4437 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[2]);
   4438 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, (GLsizeiptr)(sizeof(GLint) * data.size()), &data[0]);
   4439 			for (int i = 0; i < 2; ++i)
   4440 			{
   4441 				if (data[i] != 5)
   4442 				{
   4443 					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i]
   4444 														<< " should be: 5." << tcu::TestLog::EndMessage;
   4445 					return ERROR;
   4446 				}
   4447 				if (data[i + 256] != 7)
   4448 				{
   4449 					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i + 256]
   4450 														<< " should be: 7." << tcu::TestLog::EndMessage;
   4451 					return ERROR;
   4452 				}
   4453 			}
   4454 		}
   4455 		/* validate storage buffer 4 */
   4456 		{
   4457 			mat4 data[2];
   4458 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[3]);
   4459 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data[0](0, 0));
   4460 			if (data[0] != translationMatrix(vec3(10.0f, 20.0f, 30.0f)))
   4461 			{
   4462 				m_context.getTestContext().getLog()
   4463 					<< tcu::TestLog::Message << "Data is incorrect." << tcu::TestLog::EndMessage;
   4464 				return ERROR;
   4465 			}
   4466 			if (data[1] != transpose(translationMatrix(vec3(10.0f, 20.0f, 30.0f))))
   4467 			{
   4468 				m_context.getTestContext().getLog()
   4469 					<< tcu::TestLog::Message << "Data is incorrect." << tcu::TestLog::EndMessage;
   4470 				return ERROR;
   4471 			}
   4472 		}
   4473 		/* validate counter buffer */
   4474 		{
   4475 			GLuint data[4] = { 0 };
   4476 			glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(data), data);
   4477 			if (data[3] != 4)
   4478 			{
   4479 				m_context.getTestContext().getLog()
   4480 					<< tcu::TestLog::Message << "Data is: " << data[3] << " should be: 4." << tcu::TestLog::EndMessage;
   4481 				return ERROR;
   4482 			}
   4483 		}
   4484 
   4485 		return NO_ERROR;
   4486 	}
   4487 	virtual long Cleanup()
   4488 	{
   4489 		glUseProgram(0);
   4490 		for (int i = 0; i < 3; ++i)
   4491 			glDeleteProgram(m_program[i]);
   4492 		glDeleteBuffers(4, m_storage_buffer);
   4493 		glDeleteBuffers(1, &m_counter_buffer);
   4494 		glDeleteTextures(1, &m_texture);
   4495 		glDeleteFramebuffers(1, &m_fbo);
   4496 		return NO_ERROR;
   4497 	}
   4498 };
   4499 
   4500 class AdvancedPipelinePostFS : public ComputeShaderBase
   4501 {
   4502 	virtual std::string Title()
   4503 	{
   4504 		return NL "CS as an additional pipeline stage - After FS";
   4505 	}
   4506 	virtual std::string Purpose()
   4507 	{
   4508 		return NL "1. Verify that CS which runs just after FS to do a post-processing on a rendered image works as "
   4509 				  "expected." NL "2. Verify that CS used as a post-processing filter works as expected." NL
   4510 				  "3. Verify that several CS kernels which run in a sequence to do a post-processing on a rendered "
   4511 				  "image works as expected.";
   4512 	}
   4513 	virtual std::string Method()
   4514 	{
   4515 		return NL
   4516 			"1. Render image to Texture0 using VS and FS." NL
   4517 			"2. Use Texture0 as an input to Kernel0 which performs post-processing and writes result to Texture1." NL
   4518 			"3. Issue MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) command." NL
   4519 			"4. Use Texture1 as an input to Kernel1 which performs post-processing and writes result to Texture0." NL
   4520 			"5. Issue MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) command." NL
   4521 			"6. Verify content of the final post-processed image (Texture0).";
   4522 	}
   4523 	virtual std::string PassCriteria()
   4524 	{
   4525 		return NL "Everything works as expected.";
   4526 	}
   4527 
   4528 	GLuint m_program[3];
   4529 	GLuint m_render_target[2];
   4530 	GLuint m_framebuffer;
   4531 	GLuint m_vertex_array;
   4532 
   4533 	virtual long Setup()
   4534 	{
   4535 		memset(m_program, 0, sizeof(m_program));
   4536 		memset(m_render_target, 0, sizeof(m_render_target));
   4537 		m_framebuffer  = 0;
   4538 		m_vertex_array = 0;
   4539 		return NO_ERROR;
   4540 	}
   4541 
   4542 	virtual long Run()
   4543 	{
   4544 		const char* const glsl_vs =
   4545 			NL "const vec2 g_vertex[4] = vec2[4](vec2(0), vec2(-1, -1), vec2(3, -1), vec2(-1, 3));" NL
   4546 			   "void main() {" NL "  gl_Position = vec4(g_vertex[gl_VertexID], 0, 1);" NL "}";
   4547 
   4548 		const char* const glsl_fs =
   4549 			NL "layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1, 0, 0, 1);" NL "}";
   4550 
   4551 		m_program[0] = CreateProgram(glsl_vs, glsl_fs);
   4552 		glLinkProgram(m_program[0]);
   4553 		if (!CheckProgram(m_program[0]))
   4554 			return ERROR;
   4555 
   4556 		const char* const glsl_cs =
   4557 			NL "#define TILE_WIDTH 16" NL "#define TILE_HEIGHT 16" NL
   4558 			   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
   4559 			   "layout(binding = 0, rgba32f) uniform image2D g_input_image;" NL
   4560 			   "layout(binding = 1, rgba32f) uniform image2D g_output_image;" NL	NL
   4561 			   "layout(local_size_x = TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL NL "void main() {" NL
   4562 			   "  const ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL
   4563 			   "  const ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL NL "  if (thread_xy == ivec2(0)) {" NL
   4564 			   "    const ivec2 pixel_xy = tile_xy * kTileSize;" NL "    for (int y = 0; y < TILE_HEIGHT; ++y) {" NL
   4565 			   "      for (int x = 0; x < TILE_WIDTH; ++x) {" NL
   4566 			   "        imageStore(g_output_image, pixel_xy + ivec2(x, y), vec4(0, 1, 0, 1));" NL "      }" NL
   4567 			   "    }" NL "  }" NL "}";
   4568 
   4569 		m_program[1] = CreateComputeProgram(glsl_cs);
   4570 		glLinkProgram(m_program[1]);
   4571 		if (!CheckProgram(m_program[1]))
   4572 			return ERROR;
   4573 
   4574 		const char* const glsl_cs2 = NL "#define TILE_WIDTH 32" NL "#define TILE_HEIGHT 32" NL
   4575 										"const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
   4576 										"layout(binding = 0, rgba32f) uniform image2D g_input_image;" NL
   4577 										"layout(binding = 1, rgba32f) uniform image2D g_output_image;" NL	NL
   4578 										"layout(local_size_x = TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL NL
   4579 										"vec4 Process(vec4 ic) {" NL "  return ic + vec4(1, 0, 0, 0);" NL "}" NL
   4580 										"void main() {" NL "  const ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL
   4581 										"  const ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
   4582 										"  const ivec2 pixel_xy = tile_xy * kTileSize + thread_xy;" NL
   4583 										"  vec4 ic = imageLoad(g_input_image, pixel_xy);" NL
   4584 										"  imageStore(g_output_image, pixel_xy, Process(ic));" NL "}";
   4585 		m_program[2] = CreateComputeProgram(glsl_cs2);
   4586 		glLinkProgram(m_program[2]);
   4587 		if (!CheckProgram(m_program[2]))
   4588 			return ERROR;
   4589 
   4590 		glGenVertexArrays(1, &m_vertex_array);
   4591 
   4592 		/* init render targets */
   4593 		{
   4594 			std::vector<vec4> data(128 * 128);
   4595 			glGenTextures(2, m_render_target);
   4596 			for (int i = 0; i < 2; ++i)
   4597 			{
   4598 				glBindTexture(GL_TEXTURE_2D, m_render_target[i]);
   4599 				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
   4600 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 128, 128, 0, GL_RGBA, GL_FLOAT, &data[0][0]);
   4601 			}
   4602 			glBindTexture(GL_TEXTURE_2D, 0);
   4603 		}
   4604 
   4605 		glGenFramebuffers(1, &m_framebuffer);
   4606 		glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
   4607 		glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_render_target[0], 0);
   4608 		glBindFramebuffer(GL_FRAMEBUFFER, 0);
   4609 
   4610 		glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
   4611 		glUseProgram(m_program[0]);
   4612 		glBindVertexArray(m_vertex_array);
   4613 		glClear(GL_COLOR_BUFFER_BIT);
   4614 		glViewport(0, 0, 128, 128);
   4615 		// draw full-viewport triangle
   4616 		glDrawArrays(GL_TRIANGLES, 1,
   4617 					 3); // note: <first> is 1 this means that gl_VertexID in the VS will be: 1, 2 and 3
   4618 		glBindFramebuffer(GL_FRAMEBUFFER, 0);
   4619 
   4620 		glBindImageTexture(0, m_render_target[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);  // input
   4621 		glBindImageTexture(1, m_render_target[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); // output
   4622 		glUseProgram(m_program[1]);
   4623 		glDispatchCompute(128 / 16, 128 / 16, 1);
   4624 
   4625 		glBindImageTexture(0, m_render_target[1], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);  // input
   4626 		glBindImageTexture(1, m_render_target[0], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); // output
   4627 		glUseProgram(m_program[2]);
   4628 		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
   4629 		glDispatchCompute(128 / 32, 128 / 32, 1);
   4630 
   4631 		/* validate render target */
   4632 		{
   4633 			std::vector<vec4> data(128 * 128);
   4634 			glBindTexture(GL_TEXTURE_2D, m_render_target[0]);
   4635 			glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
   4636 			glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, &data[0][0]);
   4637 			for (std::size_t i = 0; i < data.size(); ++i)
   4638 			{
   4639 				if (!IsEqual(data[i], vec4(1, 1, 0, 1)))
   4640 				{
   4641 					m_context.getTestContext().getLog()
   4642 						<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
   4643 					return ERROR;
   4644 				}
   4645 			}
   4646 		}
   4647 		return NO_ERROR;
   4648 	}
   4649 
   4650 	virtual long Cleanup()
   4651 	{
   4652 		glViewport(0, 0, getWindowWidth(), getWindowHeight());
   4653 		glUseProgram(0);
   4654 		for (int i = 0; i < 3; ++i)
   4655 			glDeleteProgram(m_program[i]);
   4656 		glDeleteTextures(2, m_render_target);
   4657 		glDeleteVertexArrays(1, &m_vertex_array);
   4658 		glDeleteFramebuffers(1, &m_framebuffer);
   4659 		return NO_ERROR;
   4660 	}
   4661 };
   4662 
   4663 class AdvancedPipelinePostXFB : public ComputeShaderBase
   4664 {
   4665 	virtual std::string Title()
   4666 	{
   4667 		return NL "CS as an additional pipeline stage - After XFB";
   4668 	}
   4669 	virtual std::string Purpose()
   4670 	{
   4671 		return NL "1. Verify that CS which process data fedback by VS works as expected." NL
   4672 				  "2. Verify that XFB and SSBO works correctly together in one shader." NL
   4673 				  "3. Verify that 'switch' statment which selects different execution path for each CS thread works as "
   4674 				  "expected.";
   4675 	}
   4676 	virtual std::string Method()
   4677 	{
   4678 		return NL "1. Draw triangle with XFB enabled. Some data is written to the XFB buffer." NL
   4679 				  "2. Use XFB buffer as 'input SSBO' in CS. Process data and write it to 'output SSBO'." NL
   4680 				  "3. Verify 'output SSBO' content.";
   4681 	}
   4682 	virtual std::string PassCriteria()
   4683 	{
   4684 		return NL "Everything works as expected.";
   4685 	}
   4686 
   4687 	GLuint m_program[2];
   4688 	GLuint m_storage_buffer;
   4689 	GLuint m_xfb_buffer;
   4690 	GLuint m_vertex_buffer;
   4691 	GLuint m_vertex_array;
   4692 
   4693 	virtual long Setup()
   4694 	{
   4695 		memset(m_program, 0, sizeof(m_program));
   4696 		m_storage_buffer = 0;
   4697 		m_xfb_buffer	 = 0;
   4698 		m_vertex_buffer  = 0;
   4699 		m_vertex_array   = 0;
   4700 		return NO_ERROR;
   4701 	}
   4702 	virtual long Run()
   4703 	{
   4704 		GLint res;
   4705 		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
   4706 		if (res <= 0)
   4707 		{
   4708 			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
   4709 			return NO_ERROR;
   4710 		}
   4711 
   4712 		const char* const glsl_vs =
   4713 			NL "layout(location = 0) in vec4 g_position;" NL "layout(location = 1) in vec4 g_color;" NL
   4714 			   "struct Vertex {" NL "  vec4 position;" NL "  vec4 color;" NL "};" NL "out StageData {" NL
   4715 			   "  vec4 color;" NL "} g_vs_out;" NL "layout(binding = 0, std430) buffer StageData {" NL
   4716 			   "  Vertex vertex[];" NL "} g_vs_buffer;" NL "void main() {" NL "  gl_Position = g_position;" NL
   4717 			   "  g_vs_out.color = g_color;" NL "  g_vs_buffer.vertex[gl_VertexID].position = g_position;" NL
   4718 			   "  g_vs_buffer.vertex[gl_VertexID].color = g_color;" NL "}";
   4719 
   4720 		const char* const glsl_fs =
   4721 			NL "in StageData {" NL "  vec4 color;" NL "} g_fs_in;" NL "layout(location = 0) out vec4 g_color;" NL
   4722 			   "void main() {" NL "  g_color = g_fs_in.color;" NL "}";
   4723 
   4724 		m_program[0] = CreateProgram(glsl_vs, glsl_fs);
   4725 		/* setup xfb varyings */
   4726 		{
   4727 			const char* const var[2] = { "gl_Position", "StageData.color" };
   4728 			glTransformFeedbackVaryings(m_program[0], 2, var, GL_INTERLEAVED_ATTRIBS);
   4729 		}
   4730 		glLinkProgram(m_program[0]);
   4731 		if (!CheckProgram(m_program[0]))
   4732 			return ERROR;
   4733 
   4734 		const char* const glsl_cs =
   4735 			NL "layout(local_size_x = 3) in;" NL "struct Vertex {" NL "  vec4 position;" NL "  vec4 color;" NL "};" NL
   4736 			   "layout(binding = 3, std430) buffer Buffer {" NL "  Vertex g_vertex[3];" NL "};" NL
   4737 			   "uniform vec4 g_color1 = vec4(0, 0, 1, 0);" NL "uniform int g_two = 2;" NL
   4738 			   "void UpdateVertex2(int i) {" NL "  g_vertex[i].color -= vec4(-1, 1, 0, 0);" NL "}" NL "void main() {" NL
   4739 			   "  switch (gl_GlobalInvocationID.x) {" NL
   4740 			   "    case 0: g_vertex[gl_GlobalInvocationID.x].color += vec4(1, 0, 0, 0); break;" NL
   4741 			   "    case 1: g_vertex[1].color += g_color1; break;" NL "    case 2: UpdateVertex2(g_two); break;" NL
   4742 			   "    default: return;" NL "  }" NL "}";
   4743 		m_program[1] = CreateComputeProgram(glsl_cs);
   4744 		glLinkProgram(m_program[1]);
   4745 		if (!CheckProgram(m_program[1]))
   4746 			return ERROR;
   4747 
   4748 		glGenBuffers(1, &m_storage_buffer);
   4749 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   4750 		glBufferData(GL_SHADER_STORAGE_BUFFER, 3 * sizeof(vec4) * 2, NULL, GL_STATIC_COPY);
   4751 
   4752 		glGenBuffers(1, &m_xfb_buffer);
   4753 		glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, m_xfb_buffer);
   4754 		glBufferData(GL_TRANSFORM_FEEDBACK_BUFFER, 3 * sizeof(vec4) * 2, NULL, GL_STREAM_COPY);
   4755 
   4756 		const float in_data[3 * 8] = { -1, -1, 0, 1, 0, 1, 0, 1, 3, -1, 0, 1, 0, 1, 0, 1, -1, 3, 0, 1, 0, 1, 0, 1 };
   4757 		glGenBuffers(1, &m_vertex_buffer);
   4758 		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
   4759 		glBufferData(GL_ARRAY_BUFFER, sizeof(in_data), in_data, GL_STATIC_DRAW);
   4760 		glBindBuffer(GL_ARRAY_BUFFER, 0);
   4761 
   4762 		glGenVertexArrays(1, &m_vertex_array);
   4763 		glBindVertexArray(m_vertex_array);
   4764 		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
   4765 		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), 0);
   4766 		glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), reinterpret_cast<void*>(sizeof(vec4)));
   4767 		glBindBuffer(GL_ARRAY_BUFFER, 0);
   4768 		glEnableVertexAttribArray(0);
   4769 		glEnableVertexAttribArray(1);
   4770 		glBindVertexArray(0);
   4771 
   4772 		glClear(GL_COLOR_BUFFER_BIT);
   4773 		glUseProgram(m_program[0]);
   4774 		glBindVertexArray(m_vertex_array);
   4775 		glBeginTransformFeedback(GL_TRIANGLES);
   4776 		glDrawArrays(GL_TRIANGLES, 0, 3);
   4777 		glEndTransformFeedback();
   4778 
   4779 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_xfb_buffer);
   4780 		glUseProgram(m_program[1]);
   4781 		glDispatchCompute(1, 1, 1);
   4782 
   4783 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   4784 
   4785 		/* validate storage buffer */
   4786 		{
   4787 			float data[3 * 8];
   4788 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   4789 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), data);
   4790 			if (memcmp(data, in_data, sizeof(data)) != 0)
   4791 			{
   4792 				m_context.getTestContext().getLog()
   4793 					<< tcu::TestLog::Message << "Data in shader storage buffer is incorrect."
   4794 					<< tcu::TestLog::EndMessage;
   4795 				return ERROR;
   4796 			}
   4797 		}
   4798 		/* validate xfb buffer */
   4799 		{
   4800 			const float ref_data[3 * 8] = {
   4801 				-1, -1, 0, 1, 1, 1, 0, 1, 3, -1, 0, 1, 0, 1, 1, 1, -1, 3, 0, 1, 1, 0, 0, 1
   4802 			};
   4803 
   4804 			float data[3 * 8];
   4805 			glGetBufferSubData(GL_TRANSFORM_FEEDBACK_BUFFER, 0, sizeof(data), data);
   4806 			if (memcmp(data, ref_data, sizeof(data)) != 0)
   4807 			{
   4808 				m_context.getTestContext().getLog()
   4809 					<< tcu::TestLog::Message << "Data in xfb buffer is incorrect." << tcu::TestLog::EndMessage;
   4810 				return ERROR;
   4811 			}
   4812 		}
   4813 		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
   4814 		{
   4815 			return ERROR;
   4816 		}
   4817 		return NO_ERROR;
   4818 	}
   4819 
   4820 	virtual long Cleanup()
   4821 	{
   4822 		glUseProgram(0);
   4823 		for (int i = 0; i < 2; ++i)
   4824 			glDeleteProgram(m_program[i]);
   4825 		glDeleteBuffers(1, &m_vertex_buffer);
   4826 		glDeleteBuffers(1, &m_storage_buffer);
   4827 		glDeleteBuffers(1, &m_xfb_buffer);
   4828 		glDeleteVertexArrays(1, &m_vertex_array);
   4829 		return NO_ERROR;
   4830 	}
   4831 };
   4832 
   4833 class AdvancedSharedIndexing : public ComputeShaderBase
   4834 {
   4835 	virtual std::string Title()
   4836 	{
   4837 		return NL "Shared Memory - Indexing";
   4838 	}
   4839 	virtual std::string Purpose()
   4840 	{
   4841 		return NL "1. Verify that indexing various types of shared memory works as expected." NL
   4842 				  "2. Verify that indexing shared memory with different types of expressions work as expected." NL
   4843 				  "3. Verify that all declaration types of shared structures are supported by the GLSL compiler.";
   4844 	}
   4845 	virtual std::string Method()
   4846 	{
   4847 		return NL "1. Create CS which uses shared memory in many different ways." NL
   4848 				  "2. Write to shared memory using different expressions." NL "3. Validate shared memory content." NL
   4849 				  "4. Use synchronization primitives (barrier, groupMemoryBarrier) where applicable.";
   4850 	}
   4851 	virtual std::string PassCriteria()
   4852 	{
   4853 		return NL "Everyting works as expected.";
   4854 	}
   4855 
   4856 	GLuint m_program;
   4857 	GLuint m_texture;
   4858 
   4859 	virtual long Setup()
   4860 	{
   4861 		m_program = 0;
   4862 		m_texture = 0;
   4863 		return NO_ERROR;
   4864 	}
   4865 	virtual long Run()
   4866 	{
   4867 		const char* const glsl_cs = NL
   4868 			"layout(binding = 3, rgba32f) uniform image2D g_result_image;" NL
   4869 			"layout (local_size_x = 4,local_size_y=4 ) in;" NL "shared vec4 g_shared1[4];" NL
   4870 			"shared mat4 g_shared2;" NL "shared struct {" NL "  float data[4];" NL "} g_shared3[4];" NL
   4871 			"shared struct Type { float data[4]; } g_shared4[4];" NL "shared Type g_shared5[4];" NL
   4872 			"uniform bool g_true = true;" NL
   4873 			"uniform float g_values[16] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };" NL NL
   4874 			"void Sync() {" NL "  groupMemoryBarrier();" NL "  barrier();" NL "}" NL
   4875 			"void SetMemory(ivec2 xy, float value) {" NL "  g_shared1[xy.y][gl_LocalInvocationID.x] = value;" NL
   4876 			"  g_shared2[xy.y][xy.x] = value;" NL "  g_shared3[xy[1]].data[xy[0]] = value;" NL
   4877 			"  g_shared4[xy.y].data[xy[0]] = value;" NL
   4878 			"  g_shared5[gl_LocalInvocationID.y].data[gl_LocalInvocationID.x] = value;" NL "}" NL
   4879 			"bool CheckMemory(ivec2 xy, float expected) {" NL
   4880 			"  if (g_shared1[xy.y][xy[0]] != expected) return false;" NL
   4881 			"  if (g_shared2[xy[1]][xy[0]] != expected) return false;" NL
   4882 			"  if (g_shared3[gl_LocalInvocationID.y].data[gl_LocalInvocationID.x] != expected) return false;" NL
   4883 			"  if (g_shared4[gl_LocalInvocationID.y].data[xy.x] != expected) return false;" NL
   4884 			"  if (g_shared5[xy.y].data[xy.x] != expected) return false;" NL "  return true;" NL "}" NL
   4885 			"void main() {" NL "  const ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
   4886 			"  vec4 result = vec4(0, 1, 0, 1);" NL NL
   4887 			"  SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 1.0);" NL "  Sync();" NL
   4888 			"  if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 1.0)) result = vec4(1, 0, 0, 1);" NL NL
   4889 			"  SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * -1.0);" NL "  Sync();" NL
   4890 			"  if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * -1.0)) result = vec4(1, 0, 0, 1);" NL NL
   4891 			"  if (g_true && gl_LocalInvocationID.x < 10) {" NL
   4892 			"    SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 7.0);" NL "    Sync();" NL
   4893 			"    if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 7.0)) result = vec4(1, 0, 0, 1);" NL
   4894 			"  }" NL NL "  imageStore(g_result_image, thread_xy, result);" NL "}";
   4895 		m_program = CreateComputeProgram(glsl_cs);
   4896 		glLinkProgram(m_program);
   4897 		if (!CheckProgram(m_program))
   4898 			return ERROR;
   4899 
   4900 		/* init texture */
   4901 		{
   4902 			std::vector<vec4> data(4 * 4);
   4903 			glGenTextures(1, &m_texture);
   4904 			glBindTexture(GL_TEXTURE_2D, m_texture);
   4905 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
   4906 			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 4, 4, 0, GL_RGBA, GL_FLOAT, &data[0][0]);
   4907 			glBindTexture(GL_TEXTURE_2D, 0);
   4908 		}
   4909 
   4910 		glBindImageTexture(3, m_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
   4911 		glUseProgram(m_program);
   4912 		glDispatchCompute(1, 1, 1);
   4913 
   4914 		/* validate render target */
   4915 		{
   4916 			std::vector<vec4> data(4 * 4);
   4917 			glBindTexture(GL_TEXTURE_2D, m_texture);
   4918 			glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
   4919 			glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, &data[0][0]);
   4920 			for (std::size_t i = 0; i < data.size(); ++i)
   4921 			{
   4922 				if (!IsEqual(data[i], vec4(0, 1, 0, 1)))
   4923 				{
   4924 					m_context.getTestContext().getLog()
   4925 						<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
   4926 					return ERROR;
   4927 				}
   4928 			}
   4929 		}
   4930 		return NO_ERROR;
   4931 	}
   4932 	virtual long Cleanup()
   4933 	{
   4934 		glUseProgram(0);
   4935 		glDeleteProgram(m_program);
   4936 		glDeleteTextures(1, &m_texture);
   4937 		return NO_ERROR;
   4938 	}
   4939 };
   4940 
   4941 class AdvancedSharedMax : public ComputeShaderBase
   4942 {
   4943 	virtual std::string Title()
   4944 	{
   4945 		return NL "Shared Memory - 32K";
   4946 	}
   4947 	virtual std::string Purpose()
   4948 	{
   4949 		return NL "Support for 32K of shared memory is required by the OpenGL specifaction. Verify if an "
   4950 				  "implementation supports it.";
   4951 	}
   4952 	virtual std::string Method()
   4953 	{
   4954 		return NL "Create and dispatch CS which uses 32K of shared memory.";
   4955 	}
   4956 	virtual std::string PassCriteria()
   4957 	{
   4958 		return NL "Everything works as expected.";
   4959 	}
   4960 
   4961 	GLuint m_program;
   4962 	GLuint m_buffer;
   4963 
   4964 	virtual long Setup()
   4965 	{
   4966 		m_program = 0;
   4967 		m_buffer  = 0;
   4968 		return NO_ERROR;
   4969 	}
   4970 	virtual long Run()
   4971 	{
   4972 		const char* const glsl_cs =
   4973 			NL "layout(local_size_x = 1024) in;" NL
   4974 			   "shared struct Type { vec4 v[2]; } g_shared[1024];" // 32768 bytes of shared memory
   4975 			NL "layout(std430) buffer Output {" NL "  Type g_output[1024];" NL "};" NL NL "void main() {" NL
   4976 			   "  const int id = int(gl_GlobalInvocationID.x);" NL
   4977 			   "  g_shared[id].v = vec4[2](vec4(1.0), vec4(1.0));" NL "  memoryBarrierShared();" NL "  barrier();" NL NL
   4978 			   "  vec4 sum = vec4(0.0);" NL "  int sum_count = 0;" NL "  for (int i = id - 3; i < id + 4; ++i) {" NL
   4979 			   "    if (id >= 0 && id < g_shared.length()) {" NL "      sum += g_shared[id].v[0];" NL
   4980 			   "      sum += g_shared[id].v[1];" NL "      sum_count += 2;" NL "    }" NL "  }" NL
   4981 			   "  if (any(greaterThan(abs((sum / sum_count) - vec4(1.0)), vec4(0.0000001f)))) return;" NL NL
   4982 			   "  g_output[id] = g_shared[id];" NL "}";
   4983 		m_program = CreateComputeProgram(glsl_cs);
   4984 		glLinkProgram(m_program);
   4985 		if (!CheckProgram(m_program))
   4986 			return ERROR;
   4987 
   4988 		/* init buffer */
   4989 		{
   4990 			std::vector<vec4> data(1024 * 2);
   4991 			glGenBuffers(1, &m_buffer);
   4992 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
   4993 			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(sizeof(vec4) * data.size()), &data[0][0],
   4994 						 GL_DYNAMIC_COPY);
   4995 		}
   4996 
   4997 		glUseProgram(m_program);
   4998 		glDispatchCompute(1, 1, 1);
   4999 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   5000 
   5001 		/* validate buffer */
   5002 		{
   5003 			std::vector<vec4> data(1024 * 2);
   5004 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, (GLsizeiptr)(sizeof(vec4) * data.size()), &data[0][0]);
   5005 			for (std::size_t i = 0; i < data.size(); ++i)
   5006 			{
   5007 				if (!IsEqual(data[i], vec4(1.0f)))
   5008 				{
   5009 					m_context.getTestContext().getLog()
   5010 						<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
   5011 					return ERROR;
   5012 				}
   5013 			}
   5014 		}
   5015 		return NO_ERROR;
   5016 	}
   5017 	virtual long Cleanup()
   5018 	{
   5019 		glUseProgram(0);
   5020 		glDeleteProgram(m_program);
   5021 		glDeleteBuffers(1, &m_buffer);
   5022 		return NO_ERROR;
   5023 	}
   5024 };
   5025 
   5026 class AdvancedDynamicPaths : public ComputeShaderBase
   5027 {
   5028 	virtual std::string Title()
   5029 	{
   5030 		return NL "Dynamic execution paths";
   5031 	}
   5032 	virtual std::string Purpose()
   5033 	{
   5034 		return NL "1. Verify case where each of the four threads takes different execution path in the CS." NL
   5035 				  "2. Execution path for each thread is not known at the compilation time." NL
   5036 				  "    Selection is made based on the result of the texture sampling." NL
   5037 				  "3. Verify that memory synchronization primitives (memoryBarrier* functions) are accepted" NL
   5038 				  "    in the control flow.";
   5039 	}
   5040 	virtual std::string Method()
   5041 	{
   5042 		return NL "1. Create and dispatch CS that takes different execution paths based on the result of the texture "
   5043 				  "sampling." NL "2. In each execution path use different resources (buffers, samplers, uniform "
   5044 				  "arrays) to compute output value.";
   5045 	}
   5046 	virtual std::string PassCriteria()
   5047 	{
   5048 		return NL "Everything works as expected.";
   5049 	}
   5050 
   5051 	GLuint m_program;
   5052 	GLuint m_buffer[4];
   5053 	GLuint m_texture[2];
   5054 
   5055 	virtual long Setup()
   5056 	{
   5057 		m_program = 0;
   5058 		memset(m_buffer, 0, sizeof(m_buffer));
   5059 		memset(m_texture, 0, sizeof(m_texture));
   5060 		return NO_ERROR;
   5061 	}
   5062 	virtual long Run()
   5063 	{
   5064 		const char* const glsl_cs =
   5065 			NL "layout(local_size_x = 4) in;" NL "layout(std140, binding = 0) buffer Output {" NL
   5066 			   "  vec4 g_output[4];" NL "};" NL "uniform isamplerBuffer g_path_buffer;" NL
   5067 			   "uniform vec4[4] g_input0 = vec4[4](vec4(100), vec4(200), vec4(300), vec4(400));" NL
   5068 			   "uniform samplerBuffer g_input1;" NL "layout(binding = 1, std430) buffer Input2 {" NL
   5069 			   "  vec4[4] g_input2;" NL "};" NL NL "void Path2(int id) {" NL
   5070 			   "  g_output[id] = texelFetch(g_input1, int(gl_LocalInvocationIndex));" NL "}" NL "void main() {" NL
   5071 			   "  const int id = int(gl_GlobalInvocationID.x);" NL
   5072 			   "  const int path = texelFetch(g_path_buffer, id).x;" NL NL "  if (path == 0) {" NL
   5073 			   "    g_output[id] = g_input0[gl_LocalInvocationID.x];" NL "    memoryBarrier();" NL
   5074 			   "  } else if (path == 1) {" NL "    return;" NL "  } else if (path == 2) {" NL "    Path2(id);" NL
   5075 			   "    return;" NL "  } else if (path == 3) {" NL "    g_output[id] = g_input2[path - 1];" NL
   5076 			   "    memoryBarrierBuffer();" NL "  }" NL "}";
   5077 		m_program = CreateComputeProgram(glsl_cs);
   5078 		glLinkProgram(m_program);
   5079 		if (!CheckProgram(m_program))
   5080 			return ERROR;
   5081 
   5082 		glGenBuffers(4, m_buffer);
   5083 		glGenTextures(2, m_texture);
   5084 
   5085 		/* init 'output' buffer */
   5086 		{
   5087 			std::vector<vec4> data(4, vec4(-100.0f));
   5088 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer[0]);
   5089 			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(sizeof(vec4) * data.size()), &data[0][0],
   5090 						 GL_DYNAMIC_COPY);
   5091 		}
   5092 		/* init 'input2' buffer */
   5093 		{
   5094 			const vec4 data[4] = { vec4(1.0f), vec4(2.0f), vec4(3.0f), vec4(4.0f) };
   5095 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_buffer[1]);
   5096 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data[0][0], GL_DYNAMIC_COPY);
   5097 		}
   5098 		/* init 'path' buffer */
   5099 		{
   5100 			const int data[4] = { 3, 2, 1, 0 };
   5101 			glBindBuffer(GL_TEXTURE_BUFFER, m_buffer[2]);
   5102 			glBufferData(GL_TEXTURE_BUFFER, sizeof(data), &data[0], GL_STATIC_DRAW);
   5103 			glBindBuffer(GL_TEXTURE_BUFFER, 0);
   5104 			glBindTexture(GL_TEXTURE_BUFFER, m_texture[0]);
   5105 			glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, m_buffer[2]);
   5106 			glBindTexture(GL_TEXTURE_BUFFER, 0);
   5107 		}
   5108 		/* init 'input1' buffer */
   5109 		{
   5110 			const vec4 data[4] = { vec4(10.0f), vec4(20.0f), vec4(30.0f), vec4(40.0f) };
   5111 			glBindBuffer(GL_TEXTURE_BUFFER, m_buffer[3]);
   5112 			glBufferData(GL_TEXTURE_BUFFER, sizeof(data), &data[0], GL_STATIC_DRAW);
   5113 			glBindBuffer(GL_TEXTURE_BUFFER, 0);
   5114 			glBindTexture(GL_TEXTURE_BUFFER, m_texture[1]);
   5115 			glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, m_buffer[3]);
   5116 			glBindTexture(GL_TEXTURE_BUFFER, 0);
   5117 		}
   5118 
   5119 		glUseProgram(m_program);
   5120 		glUniform1i(glGetUniformLocation(m_program, "g_path_buffer"), 0);
   5121 		glUniform1i(glGetUniformLocation(m_program, "g_input1"), 1);
   5122 		glActiveTexture(GL_TEXTURE0);
   5123 		glBindTexture(GL_TEXTURE_BUFFER, m_texture[0]);
   5124 		glActiveTexture(GL_TEXTURE1);
   5125 		glBindTexture(GL_TEXTURE_BUFFER, m_texture[1]);
   5126 		glDispatchCompute(1, 1, 1);
   5127 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   5128 
   5129 		/* validate 'output' buffer */
   5130 		{
   5131 			vec4 data[4];
   5132 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer[0]);
   5133 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data[0][0]);
   5134 
   5135 			const vec4 expected[4] = { vec4(3.0f), vec4(20.0f), vec4(-100.0f), vec4(400.0f) };
   5136 			for (int i = 0; i < 4; ++i)
   5137 			{
   5138 				if (!IsEqual(data[i], expected[i]))
   5139 				{
   5140 					m_context.getTestContext().getLog()
   5141 						<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
   5142 					return ERROR;
   5143 				}
   5144 			}
   5145 		}
   5146 		return NO_ERROR;
   5147 	}
   5148 	virtual long Cleanup()
   5149 	{
   5150 		glUseProgram(0);
   5151 		glDeleteProgram(m_program);
   5152 		glDeleteBuffers(4, m_buffer);
   5153 		glDeleteTextures(2, m_texture);
   5154 		return NO_ERROR;
   5155 	}
   5156 };
   5157 
   5158 class AdvancedResourcesMax : public ComputeShaderBase
   5159 {
   5160 	virtual std::string Title()
   5161 	{
   5162 		return NL "Maximum number of resources in one shader";
   5163 	}
   5164 	virtual std::string Purpose()
   5165 	{
   5166 		return NL "1. Verify that using 8 SSBOs, 12 UBOs, 8 atomic counters, 16 samplers" NL
   5167 				  "    and 8 images in one CS works as expected.";
   5168 	}
   5169 	virtual std::string Method()
   5170 	{
   5171 		return NL "Create and dispatch CS. Verify result.";
   5172 	}
   5173 	virtual std::string PassCriteria()
   5174 	{
   5175 		return NL "Everything works as expected.";
   5176 	}
   5177 
   5178 	GLuint m_program;
   5179 	GLuint m_storage_buffer[8];
   5180 	GLuint m_uniform_buffer[12];
   5181 	GLuint m_atomic_buffer[8];
   5182 	GLuint m_texture_buffer[16];
   5183 	GLuint m_texture[16];
   5184 	GLuint m_image_buffer[8];
   5185 	GLuint m_image[8];
   5186 
   5187 	bool RunIteration(GLuint index)
   5188 	{
   5189 		for (GLuint i = 0; i < 8; ++i)
   5190 		{
   5191 			const GLuint data = i + 1;
   5192 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, m_storage_buffer[i]);
   5193 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
   5194 		}
   5195 		for (GLuint i = 0; i < 12; ++i)
   5196 		{
   5197 			const GLuint data = i + 1;
   5198 			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
   5199 			glBufferData(GL_UNIFORM_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
   5200 		}
   5201 		for (GLuint i = 0; i < 8; ++i)
   5202 		{
   5203 			const GLuint data = i + 1;
   5204 			glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, i, m_atomic_buffer[i]);
   5205 			glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
   5206 		}
   5207 		for (GLuint i = 0; i < 16; ++i)
   5208 		{
   5209 			const GLuint data = i + 1;
   5210 			glBindBuffer(GL_TEXTURE_BUFFER, m_texture_buffer[i]);
   5211 			glBufferData(GL_TEXTURE_BUFFER, sizeof(data), &data, GL_DYNAMIC_READ);
   5212 			glBindBuffer(GL_TEXTURE_BUFFER, 0);
   5213 
   5214 			glActiveTexture(GL_TEXTURE0 + i);
   5215 			glBindTexture(GL_TEXTURE_BUFFER, m_texture[i]);
   5216 			glTexBuffer(GL_TEXTURE_BUFFER, GL_R32UI, m_texture_buffer[i]);
   5217 		}
   5218 		for (GLuint i = 0; i < 8; ++i)
   5219 		{
   5220 			const GLuint data = i + 1;
   5221 			glBindBuffer(GL_TEXTURE_BUFFER, m_image_buffer[i]);
   5222 			glBufferData(GL_TEXTURE_BUFFER, sizeof(data), &data, GL_DYNAMIC_COPY);
   5223 			glBindBuffer(GL_TEXTURE_BUFFER, 0);
   5224 
   5225 			glBindTexture(GL_TEXTURE_BUFFER, m_image[i]);
   5226 			glTexBuffer(GL_TEXTURE_BUFFER, GL_R32UI, m_image_buffer[i]);
   5227 			glBindTexture(GL_TEXTURE_BUFFER, 0);
   5228 
   5229 			glBindImageTexture(i, m_image[i], 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
   5230 		}
   5231 
   5232 		glUseProgram(m_program);
   5233 		glUniform1ui(glGetUniformLocation(m_program, "g_index"), index);
   5234 		/* uniform array */
   5235 		{
   5236 			std::vector<GLuint> data(480);
   5237 			for (GLuint i = 0; i < static_cast<GLuint>(data.size()); ++i)
   5238 				data[i]   = i + 1;
   5239 			glUniform1uiv(glGetUniformLocation(m_program, "g_uniform_def"), static_cast<GLsizei>(data.size()),
   5240 						  &data[0]);
   5241 		}
   5242 		glDispatchCompute(1, 1, 1);
   5243 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   5244 
   5245 		bool result = true;
   5246 		/* validate buffer */
   5247 		{
   5248 			GLuint data;
   5249 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[index]);
   5250 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   5251 
   5252 			if (data != (index + 1) * 6)
   5253 			{
   5254 				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is " << data << " should be "
   5255 													<< (index + 1) * 6 << "." << tcu::TestLog::EndMessage;
   5256 				result = false;
   5257 			}
   5258 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   5259 		}
   5260 		return result;
   5261 	}
   5262 	virtual long Setup()
   5263 	{
   5264 		m_program = 0;
   5265 		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
   5266 		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
   5267 		memset(m_atomic_buffer, 0, sizeof(m_atomic_buffer));
   5268 		memset(m_texture_buffer, 0, sizeof(m_texture_buffer));
   5269 		memset(m_texture, 0, sizeof(m_texture));
   5270 		memset(m_image_buffer, 0, sizeof(m_image_buffer));
   5271 		memset(m_image, 0, sizeof(m_image));
   5272 		return NO_ERROR;
   5273 	}
   5274 	virtual long Run()
   5275 	{
   5276 		const char* const glsl_cs =
   5277 			NL "layout(local_size_x = 1) in;" NL "layout(std140, binding = 0) buffer ShaderStorageBlock {" NL
   5278 			   "  uint data;" NL "} g_shader_storage[8];" NL "layout(std140, binding = 0) uniform UniformBlock {" NL
   5279 			   "  uint data;" NL "} g_uniform[12];" NL "layout(binding = 0) uniform usamplerBuffer g_sampler[16];" NL
   5280 			   "layout(binding = 0, r32ui) uniform uimageBuffer g_image[8];" NL
   5281 			   "layout(binding = 0, offset = 0) uniform atomic_uint g_atomic_counter0;" NL
   5282 			   "layout(binding = 1, offset = 0) uniform atomic_uint g_atomic_counter1;" NL
   5283 			   "layout(binding = 2, offset = 0) uniform atomic_uint g_atomic_counter2;" NL
   5284 			   "layout(binding = 3, offset = 0) uniform atomic_uint g_atomic_counter3;" NL
   5285 			   "layout(binding = 4, offset = 0) uniform atomic_uint g_atomic_counter4;" NL
   5286 			   "layout(binding = 5, offset = 0) uniform atomic_uint g_atomic_counter5;" NL
   5287 			   "layout(binding = 6, offset = 0) uniform atomic_uint g_atomic_counter6;" NL
   5288 			   "layout(binding = 7, offset = 0) uniform atomic_uint g_atomic_counter7;" NL
   5289 			   "uniform uint g_uniform_def[480];" NL "uniform uint g_index = 0u;" NL NL "uint Add() {" NL
   5290 			   "  switch (g_index) {" NL "    case 0: return atomicCounter(g_atomic_counter0);" NL
   5291 			   "    case 1: return atomicCounter(g_atomic_counter1);" NL
   5292 			   "    case 2: return atomicCounter(g_atomic_counter2);" NL
   5293 			   "    case 3: return atomicCounter(g_atomic_counter3);" NL
   5294 			   "    case 4: return atomicCounter(g_atomic_counter4);" NL
   5295 			   "    case 5: return atomicCounter(g_atomic_counter5);" NL
   5296 			   "    case 6: return atomicCounter(g_atomic_counter6);" NL
   5297 			   "    case 7: return atomicCounter(g_atomic_counter7);" NL "  }" NL "}" NL "void main() {" NL
   5298 			   "  g_shader_storage[g_index].data += g_uniform[g_index].data;" NL
   5299 			   "  g_shader_storage[g_index].data += texelFetch(g_sampler[g_index], 0).x;" NL
   5300 			   "  g_shader_storage[g_index].data += imageLoad(g_image[g_index], 0).x;" NL
   5301 			   "  g_shader_storage[g_index].data += Add();" NL
   5302 			   "  g_shader_storage[g_index].data += g_uniform_def[g_index];" NL "}";
   5303 		m_program = CreateComputeProgram(glsl_cs);
   5304 		glLinkProgram(m_program);
   5305 		if (!CheckProgram(m_program))
   5306 			return ERROR;
   5307 
   5308 		glGenBuffers(16, m_storage_buffer);
   5309 		glGenBuffers(12, m_uniform_buffer);
   5310 		glGenBuffers(8, m_atomic_buffer);
   5311 		glGenBuffers(16, m_texture_buffer);
   5312 		glGenTextures(16, m_texture);
   5313 		glGenBuffers(8, m_image_buffer);
   5314 		glGenTextures(8, m_image);
   5315 
   5316 		if (!RunIteration(0))
   5317 			return ERROR;
   5318 		if (!RunIteration(1))
   5319 			return ERROR;
   5320 		if (!RunIteration(5))
   5321 			return ERROR;
   5322 
   5323 		return NO_ERROR;
   5324 	}
   5325 	virtual long Cleanup()
   5326 	{
   5327 		glUseProgram(0);
   5328 		glDeleteProgram(m_program);
   5329 		glDeleteBuffers(16, m_storage_buffer);
   5330 		glDeleteBuffers(12, m_uniform_buffer);
   5331 		glDeleteBuffers(8, m_atomic_buffer);
   5332 		glDeleteBuffers(16, m_texture_buffer);
   5333 		glDeleteTextures(16, m_texture);
   5334 		glDeleteBuffers(8, m_image_buffer);
   5335 		glDeleteTextures(8, m_image);
   5336 		return NO_ERROR;
   5337 	}
   5338 };
   5339 
   5340 class AdvancedFP64Case1 : public ComputeShaderBase
   5341 {
   5342 	virtual std::string Title()
   5343 	{
   5344 		return NL "FP64 support - built-in math functions";
   5345 	}
   5346 	virtual std::string Purpose()
   5347 	{
   5348 		return NL "Verify that selected double precision math functions works as expected in the CS.";
   5349 	}
   5350 	virtual std::string Method()
   5351 	{
   5352 		return NL "Create and dispatch CS which uses double precision math functions. Verify results.";
   5353 	}
   5354 	virtual std::string PassCriteria()
   5355 	{
   5356 		return NL "Everything works as expected.";
   5357 	}
   5358 
   5359 	GLuint m_program;
   5360 	GLuint m_storage_buffer[4];
   5361 	GLuint m_uniform_buffer[2];
   5362 
   5363 	virtual long Setup()
   5364 	{
   5365 		m_program = 0;
   5366 		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
   5367 		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
   5368 		return NO_ERROR;
   5369 	}
   5370 	virtual long Run()
   5371 	{
   5372 		const char* const glsl_cs =
   5373 			NL "layout(local_size_x = 4) in;" NL "layout(std140, binding = 0) buffer ShaderStorageBlock {" NL
   5374 			   "  double data;" NL "} g_shader_storage[4];" NL "layout(std140, binding = 0) uniform UniformBlock {" NL
   5375 			   "  double data;" NL "} g_uniform[2];" NL "uniform dvec2 g_uniform_def;" NL NL "void main() {" NL
   5376 			   "  if (gl_GlobalInvocationID.x == 0) {" NL
   5377 			   "    g_shader_storage[0].data = floor(g_uniform[0].data + 0.1LF);" // floor(1.1LF) == 1.0LF
   5378 			NL "  } else if (gl_GlobalInvocationID.x == 1) {" NL
   5379 			   "    g_shader_storage[1].data = ceil(g_uniform[1].data + 0.2LF);" // ceil(2.2LF) == 3.0LF
   5380 			NL "  } else if (gl_GlobalInvocationID.x == 2) {" NL
   5381 			   "    g_shader_storage[2].data = min(g_uniform_def[0] + 0.1LF, 1.0LF);" // min(1.1LF, 1.0LF) == 1.0LF
   5382 			NL "  } else if (gl_GlobalInvocationID.x == 3) {" NL
   5383 			   "    g_shader_storage[3].data = max(g_uniform_def[0], g_uniform_def.y);" // max(1.0LF, 2.0LF) == 2.0LF
   5384 			NL "  }" NL "}";
   5385 		m_program = CreateComputeProgram(glsl_cs);
   5386 		glLinkProgram(m_program);
   5387 		if (!CheckProgram(m_program))
   5388 			return ERROR;
   5389 
   5390 		glGenBuffers(4, m_storage_buffer);
   5391 		for (GLuint i = 0; i < 4; ++i)
   5392 		{
   5393 			const GLdouble data = static_cast<GLdouble>(i + 1);
   5394 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, m_storage_buffer[i]);
   5395 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
   5396 		}
   5397 
   5398 		glGenBuffers(2, m_uniform_buffer);
   5399 		for (GLuint i = 0; i < 2; ++i)
   5400 		{
   5401 			const GLdouble data = static_cast<GLdouble>(i + 1);
   5402 			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
   5403 			glBufferData(GL_UNIFORM_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
   5404 		}
   5405 
   5406 		glUseProgram(m_program);
   5407 		glUniform2d(glGetUniformLocation(m_program, "g_uniform_def"), 1.0, 2.0);
   5408 		glDispatchCompute(1, 1, 1);
   5409 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   5410 
   5411 		/* validate */
   5412 		{
   5413 			const GLdouble expected[4] = { 1.0, 3.0, 1.0, 2.0 };
   5414 			for (int i = 0; i < 4; ++i)
   5415 			{
   5416 				GLdouble data;
   5417 				glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[i]);
   5418 				glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   5419 				if (data != expected[i])
   5420 				{
   5421 					m_context.getTestContext().getLog()
   5422 						<< tcu::TestLog::Message << "Data at index " << i << " is " << data << " should be "
   5423 						<< expected[i] << "." << tcu::TestLog::EndMessage;
   5424 					return ERROR;
   5425 				}
   5426 			}
   5427 		}
   5428 		return NO_ERROR;
   5429 	}
   5430 	virtual long Cleanup()
   5431 	{
   5432 		glUseProgram(0);
   5433 		glDeleteProgram(m_program);
   5434 		glDeleteBuffers(4, m_storage_buffer);
   5435 		glDeleteBuffers(2, m_uniform_buffer);
   5436 		return NO_ERROR;
   5437 	}
   5438 };
   5439 
   5440 class AdvancedFP64Case2 : public ComputeShaderBase
   5441 {
   5442 	virtual std::string Title()
   5443 	{
   5444 		return NL "FP64 support - uniform variables";
   5445 	}
   5446 	virtual std::string Purpose()
   5447 	{
   5448 		return NL "1. Verify that all types of double precision uniform variables work as expected in CS." NL
   5449 				  "2. Verify that all double precision uniform variables can be updated with Uniform* and "
   5450 				  "ProgramUniform* commands." NL "3. Verify that re-linking CS program works as expected.";
   5451 	}
   5452 	virtual std::string Method()
   5453 	{
   5454 		return NL "1. Create CS which uses all (double precision) types of uniform variables." NL
   5455 				  "2. Update uniform variables with ProgramUniform* commands." NL
   5456 				  "3. Verify that uniform variables were updated correctly." NL "4. Re-link CS program." NL
   5457 				  "5. Update uniform variables with Uniform* commands." NL
   5458 				  "6. Verify that uniform variables were updated correctly.";
   5459 	}
   5460 	virtual std::string PassCriteria()
   5461 	{
   5462 		return NL "Everything works as expected.";
   5463 	}
   5464 
   5465 	GLuint m_program;
   5466 	GLuint m_storage_buffer;
   5467 
   5468 	virtual long Setup()
   5469 	{
   5470 		m_program		 = 0;
   5471 		m_storage_buffer = 0;
   5472 		return NO_ERROR;
   5473 	}
   5474 	virtual long Run()
   5475 	{
   5476 		const char* const glsl_cs = NL
   5477 			"layout(local_size_x = 1) in;" NL "buffer Result {" NL "  int g_result;" NL "};" NL "uniform double g_0;" NL
   5478 			"uniform dvec2 g_1;" NL "uniform dvec3 g_2;" NL "uniform dvec4 g_3;" NL "uniform dmat2 g_4;" NL
   5479 			"uniform dmat2x3 g_5;" NL "uniform dmat2x4 g_6;" NL "uniform dmat3x2 g_7;" NL "uniform dmat3 g_8;" NL
   5480 			"uniform dmat3x4 g_9;" NL "uniform dmat4x2 g_10;" NL "uniform dmat4x3 g_11;" NL "uniform dmat4 g_12;" NL NL
   5481 			"void main() {" NL "  g_result = 1;" NL NL "  if (g_0 != 1.0LF) g_result = 0;" NL
   5482 			"  if (g_1 != dvec2(2.0LF, 3.0LF)) g_result = 0;" NL
   5483 			"  if (g_2 != dvec3(4.0LF, 5.0LF, 6.0LF)) g_result = 0;" NL
   5484 			"  if (g_3 != dvec4(7.0LF, 8.0LF, 9.0LF, 10.0LF)) g_result = 0;" NL NL
   5485 			"  if (g_4 != dmat2(11.0LF, 12.0LF, 13.0LF, 14.0LF)) g_result = 0;" NL
   5486 			"  if (g_5 != dmat2x3(15.0LF, 16.0LF, 17.0LF, 18.0LF, 19.0LF, 20.0LF)) g_result = 0;" NL
   5487 			"  if (g_6 != dmat2x4(21.0LF, 22.0LF, 23.0LF, 24.0LF, 25.0LF, 26.0LF, 27.0LF, 28.0LF)) g_result = 0;" NL NL
   5488 			"  if (g_7 != dmat3x2(29.0LF, 30.0LF, 31.0LF, 32.0LF, 33.0LF, 34.0LF)) g_result = 0;" NL
   5489 			"  if (g_8 != dmat3(35.0LF, 36.0LF, 37.0LF, 38.0LF, 39.0LF, 40.0LF, 41.0LF, 42.0LF, 43.0LF)) g_result = "
   5490 			"0;" NL "  if (g_9 != dmat3x4(44.0LF, 45.0LF, 46.0LF, 47.0LF, 48.0LF, 49.0LF, 50.0LF, 51.0LF, 52.0LF, "
   5491 			"53.0LF, 54.0LF, 55.0LF)) g_result = 0;" NL NL
   5492 			"  if (g_10 != dmat4x2(56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0)) g_result = 0;" NL
   5493 			"  if (g_11 != dmat4x3(63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0)) g_result = "
   5494 			"0;" NL "  if (g_12 != dmat4(75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, "
   5495 			"88.0, 89.0, 90.0)) g_result = 0;" NL "}";
   5496 		m_program = CreateComputeProgram(glsl_cs);
   5497 		glLinkProgram(m_program);
   5498 		if (!CheckProgram(m_program))
   5499 			return ERROR;
   5500 
   5501 		glGenBuffers(1, &m_storage_buffer);
   5502 		/* create buffer */
   5503 		{
   5504 			const int data = 123;
   5505 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   5506 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
   5507 		}
   5508 
   5509 		glProgramUniform1d(m_program, glGetUniformLocation(m_program, "g_0"), 1.0);
   5510 		glProgramUniform2d(m_program, glGetUniformLocation(m_program, "g_1"), 2.0, 3.0);
   5511 		glProgramUniform3d(m_program, glGetUniformLocation(m_program, "g_2"), 4.0, 5.0, 6.0);
   5512 		glProgramUniform4d(m_program, glGetUniformLocation(m_program, "g_3"), 7.0, 8.0, 9.0, 10.0);
   5513 
   5514 		/* mat2 */
   5515 		{
   5516 			const GLdouble value[4] = { 11.0, 12.0, 13.0, 14.0 };
   5517 			glProgramUniformMatrix2dv(m_program, glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
   5518 		}
   5519 		/* mat2x3 */
   5520 		{
   5521 			const GLdouble value[6] = { 15.0, 16.0, 17.0, 18.0, 19.0, 20.0 };
   5522 			glProgramUniformMatrix2x3dv(m_program, glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
   5523 		}
   5524 		/* mat2x4 */
   5525 		{
   5526 			const GLdouble value[8] = { 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0 };
   5527 			glProgramUniformMatrix2x4dv(m_program, glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
   5528 		}
   5529 
   5530 		/* mat3x2 */
   5531 		{
   5532 			const GLdouble value[6] = { 29.0, 30.0, 31.0, 32.0, 33.0, 34.0 };
   5533 			glProgramUniformMatrix3x2dv(m_program, glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
   5534 		}
   5535 		/* mat3 */
   5536 		{
   5537 			const GLdouble value[9] = { 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0 };
   5538 			glProgramUniformMatrix3dv(m_program, glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
   5539 		}
   5540 		/* mat3x4 */
   5541 		{
   5542 			const GLdouble value[12] = { 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0 };
   5543 			glProgramUniformMatrix3x4dv(m_program, glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
   5544 		}
   5545 
   5546 		/* mat4x2 */
   5547 		{
   5548 			const GLdouble value[8] = { 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0 };
   5549 			glProgramUniformMatrix4x2dv(m_program, glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
   5550 		}
   5551 		/* mat4x3 */
   5552 		{
   5553 			const GLdouble value[12] = { 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0 };
   5554 			glProgramUniformMatrix4x3dv(m_program, glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
   5555 		}
   5556 		/* mat4 */
   5557 		{
   5558 			const GLdouble value[16] = { 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0,
   5559 										 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0 };
   5560 			glProgramUniformMatrix4dv(m_program, glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
   5561 		}
   5562 
   5563 		glUseProgram(m_program);
   5564 		glDispatchCompute(1, 1, 1);
   5565 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   5566 
   5567 		/* validate */
   5568 		{
   5569 			int data;
   5570 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   5571 			if (data != 1)
   5572 			{
   5573 				m_context.getTestContext().getLog()
   5574 					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
   5575 				return ERROR;
   5576 			}
   5577 		}
   5578 
   5579 		// re-link program (all uniforms will be set to zero)
   5580 		glLinkProgram(m_program);
   5581 
   5582 		/* clear buffer */
   5583 		{
   5584 			const int data = 123;
   5585 			glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   5586 		}
   5587 
   5588 		glUniform1d(glGetUniformLocation(m_program, "g_0"), 1.0);
   5589 		glUniform2d(glGetUniformLocation(m_program, "g_1"), 2.0, 3.0);
   5590 		glUniform3d(glGetUniformLocation(m_program, "g_2"), 4.0, 5.0, 6.0);
   5591 		glUniform4d(glGetUniformLocation(m_program, "g_3"), 7.0, 8.0, 9.0, 10.0);
   5592 
   5593 		/* mat2 */
   5594 		{
   5595 			const GLdouble value[4] = { 11.0, 12.0, 13.0, 14.0 };
   5596 			glUniformMatrix2dv(glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
   5597 		}
   5598 		/* mat2x3 */
   5599 		{
   5600 			const GLdouble value[6] = { 15.0, 16.0, 17.0, 18.0, 19.0, 20.0 };
   5601 			glUniformMatrix2x3dv(glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
   5602 		}
   5603 		/* mat2x4 */
   5604 		{
   5605 			const GLdouble value[8] = { 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0 };
   5606 			glUniformMatrix2x4dv(glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
   5607 		}
   5608 
   5609 		/* mat3x2 */
   5610 		{
   5611 			const GLdouble value[6] = { 29.0, 30.0, 31.0, 32.0, 33.0, 34.0 };
   5612 			glUniformMatrix3x2dv(glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
   5613 		}
   5614 		/* mat3 */
   5615 		{
   5616 			const GLdouble value[9] = { 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0 };
   5617 			glUniformMatrix3dv(glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
   5618 		}
   5619 		/* mat3x4 */
   5620 		{
   5621 			const GLdouble value[12] = { 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0 };
   5622 			glUniformMatrix3x4dv(glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
   5623 		}
   5624 
   5625 		/* mat4x2 */
   5626 		{
   5627 			const GLdouble value[8] = { 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0 };
   5628 			glUniformMatrix4x2dv(glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
   5629 		}
   5630 		/* mat4x3 */
   5631 		{
   5632 			const GLdouble value[12] = { 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0 };
   5633 			glUniformMatrix4x3dv(glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
   5634 		}
   5635 		/* mat4 */
   5636 		{
   5637 			const GLdouble value[16] = { 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0,
   5638 										 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0 };
   5639 			glUniformMatrix4dv(glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
   5640 		}
   5641 
   5642 		glDispatchCompute(1, 1, 1);
   5643 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   5644 
   5645 		/* validate */
   5646 		{
   5647 			int data;
   5648 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   5649 			if (data != 1)
   5650 			{
   5651 				m_context.getTestContext().getLog()
   5652 					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
   5653 				return ERROR;
   5654 			}
   5655 		}
   5656 
   5657 		return NO_ERROR;
   5658 	}
   5659 	virtual long Cleanup()
   5660 	{
   5661 		glUseProgram(0);
   5662 		glDeleteProgram(m_program);
   5663 		glDeleteBuffers(1, &m_storage_buffer);
   5664 		return NO_ERROR;
   5665 	}
   5666 };
   5667 
   5668 class AdvancedFP64Case3 : public ComputeShaderBase
   5669 {
   5670 	virtual std::string Title()
   5671 	{
   5672 		return NL "FP64 support - subroutines";
   5673 	}
   5674 	virtual std::string Purpose()
   5675 	{
   5676 		return NL "Verify that subroutines that performs double precision computation works as expected in the CS.";
   5677 	}
   5678 	virtual std::string Method()
   5679 	{
   5680 		return NL
   5681 			"Create and dispatch CS that uses double precision math functions in subroutines to compute output values.";
   5682 	}
   5683 	virtual std::string PassCriteria()
   5684 	{
   5685 		return NL "Everything works as expected.";
   5686 	}
   5687 
   5688 	GLuint m_program;
   5689 	GLuint m_storage_buffer;
   5690 
   5691 	virtual long Setup()
   5692 	{
   5693 		m_program		 = 0;
   5694 		m_storage_buffer = 0;
   5695 		return NO_ERROR;
   5696 	}
   5697 	virtual long Run()
   5698 	{
   5699 		const char* const glsl_cs =
   5700 			NL "layout(local_size_x = 1) in;" NL "uniform double[4] g_input;" NL "uniform int index;" NL
   5701 			   "layout(std430, binding = 0) buffer Output {" NL "  double g_output[4];" NL "};" NL
   5702 			   "subroutine double MathFunc(double x);" NL "subroutine uniform MathFunc g_func[4];" NL
   5703 			   "subroutine(MathFunc)" NL "double Func0(double x) {" NL "  return abs(x);" // abs(-1.0LF) == 1.0LF
   5704 			NL "}" NL "subroutine(MathFunc)" NL "double Func1(double x) {" NL
   5705 			   "  return round(x);" // round(2.2LF) == 2.0LF
   5706 			NL "}" NL "subroutine(MathFunc)" NL "double Func2(double x) {" NL
   5707 			   "  return sign(x);" // sign(3.0LF) == 1.0LF
   5708 			NL "}" NL "subroutine(MathFunc)" NL "double Func3(double x) {" NL
   5709 			   "  return fract(x);" // fract(4.1LF) == 0.1LF
   5710 			NL "}" NL "void main() {" NL "  int i = index;" NL "  g_output[i] = g_func[i](g_input[i]);" NL "}";
   5711 		m_program = CreateComputeProgram(glsl_cs);
   5712 		glLinkProgram(m_program);
   5713 		if (!CheckProgram(m_program))
   5714 			return ERROR;
   5715 
   5716 		glGenBuffers(1, &m_storage_buffer);
   5717 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   5718 		glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(double), NULL, GL_STATIC_DRAW);
   5719 
   5720 		const GLuint index_compute0 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Func0");
   5721 		const GLuint index_compute1 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Func1");
   5722 		const GLuint index_compute2 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Func2");
   5723 		const GLuint index_compute3 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Func3");
   5724 		const GLint  loc_compute0   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "g_func[0]");
   5725 		const GLint  loc_compute1   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "g_func[1]");
   5726 		const GLint  loc_compute2   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "g_func[2]");
   5727 		const GLint  loc_compute3   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "g_func[3]");
   5728 
   5729 		glUseProgram(m_program);
   5730 
   5731 		// setup subroutines
   5732 		GLuint indices[4];
   5733 		indices[loc_compute0] = index_compute0;
   5734 		indices[loc_compute1] = index_compute1;
   5735 		indices[loc_compute2] = index_compute2;
   5736 		indices[loc_compute3] = index_compute3;
   5737 		glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 4, indices);
   5738 
   5739 		/* set uniforms */
   5740 		{
   5741 			const GLdouble data[4] = { -1.0, 2.2, 3.0, 4.1 };
   5742 			glUniform1dv(glGetUniformLocation(m_program, "g_input"), 4, data);
   5743 		}
   5744 		glUniform1i(glGetUniformLocation(m_program, "index"), 0);
   5745 		glDispatchCompute(1, 1, 1);
   5746 		glUniform1i(glGetUniformLocation(m_program, "index"), 1);
   5747 		glDispatchCompute(1, 1, 1);
   5748 		glUniform1i(glGetUniformLocation(m_program, "index"), 2);
   5749 		glDispatchCompute(1, 1, 1);
   5750 		glUniform1i(glGetUniformLocation(m_program, "index"), 3);
   5751 		glDispatchCompute(1, 1, 1);
   5752 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   5753 
   5754 		/* validate */
   5755 		{
   5756 			const GLdouble expected[4] = { 1.0, 2.0, 1.0, 0.1 };
   5757 			GLdouble	   data[4];
   5758 			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   5759 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   5760 			for (int i = 0; i < 4; ++i)
   5761 			{
   5762 				if (fabs(data[i] - expected[i]) > g_color_eps.x())
   5763 				{
   5764 					m_context.getTestContext().getLog()
   5765 						<< tcu::TestLog::Message << "Data at index " << i << " is " << data[i] << " should be "
   5766 						<< expected[i] << "." << tcu::TestLog::EndMessage;
   5767 					return ERROR;
   5768 				}
   5769 			}
   5770 		}
   5771 		return NO_ERROR;
   5772 	}
   5773 	virtual long Cleanup()
   5774 	{
   5775 		glUseProgram(0);
   5776 		glDeleteProgram(m_program);
   5777 		glDeleteBuffers(1, &m_storage_buffer);
   5778 		return NO_ERROR;
   5779 	}
   5780 };
   5781 
   5782 class AdvancedConditionalDispatching : public ComputeShaderBase
   5783 {
   5784 	virtual std::string Title()
   5785 	{
   5786 		return NL "Conditional Dispatching";
   5787 	}
   5788 	virtual std::string Purpose()
   5789 	{
   5790 		return NL "Verify that DispatchCompute and DispatchComputeIndirect commands work as expected inside "
   5791 				  "conditional blocks.";
   5792 	}
   5793 	virtual std::string Method()
   5794 	{
   5795 		return NL "1. Render two quads. One will pass depth-test and the second one will not." NL
   5796 				  "2. Use GL_ANY_SAMPLES_PASSED query objects to 'remember' these results." NL
   5797 				  "3. Use DispatchCompute and DispatchComputeIndirect commands inside conditional blocks using both "
   5798 				  "query objects." NL
   5799 				  "4. Verify that DispatchCompute and DispatchComputeIndirect commands are only executed in" NL
   5800 				  "    the conditional block that uses query object that has passed depth-test.";
   5801 	}
   5802 	virtual std::string PassCriteria()
   5803 	{
   5804 		return NL "Everything works as expected.";
   5805 	}
   5806 
   5807 	GLuint m_program_vsfs;
   5808 	GLuint m_program_cs;
   5809 	GLuint m_vertex_array;
   5810 	GLuint m_query[2];
   5811 	GLuint m_storage_buffer;
   5812 	GLuint m_dispatch_buffer;
   5813 
   5814 	virtual long Setup()
   5815 	{
   5816 		m_program_vsfs = 0;
   5817 		m_program_cs   = 0;
   5818 		m_vertex_array = 0;
   5819 		memset(m_query, 0, sizeof(m_query));
   5820 		m_storage_buffer  = 0;
   5821 		m_dispatch_buffer = 0;
   5822 		return NO_ERROR;
   5823 	}
   5824 	virtual long Run()
   5825 	{
   5826 		const char* const glsl_vs = NL
   5827 			"uniform float g_depth;" NL "uniform vec2[3] g_vertex = vec2[3](vec2(-1, -1), vec2(3, -1), vec2(-1, 3));" NL
   5828 			"void main() {" NL "  gl_Position = vec4(g_vertex[gl_VertexID], g_depth, 1);" NL "}";
   5829 
   5830 		const char* const glsl_fs =
   5831 			NL "layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(0, 1, 0, 1);" NL "}";
   5832 
   5833 		m_program_vsfs = CreateProgram(glsl_vs, glsl_fs);
   5834 		glLinkProgram(m_program_vsfs);
   5835 		if (!CheckProgram(m_program_vsfs))
   5836 			return ERROR;
   5837 
   5838 		const char* const glsl_cs =
   5839 			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  int g_output;" NL "};" NL
   5840 			   "void main() {" NL "  atomicAdd(g_output, 1);" NL "}";
   5841 		m_program_cs = CreateComputeProgram(glsl_cs);
   5842 		glLinkProgram(m_program_cs);
   5843 		if (!CheckProgram(m_program_cs))
   5844 			return ERROR;
   5845 
   5846 		/* create storage buffer */
   5847 		{
   5848 			const int data = 0;
   5849 			glGenBuffers(1, &m_storage_buffer);
   5850 			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   5851 			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_DYNAMIC_COPY);
   5852 		}
   5853 		/* create dispatch buffer */
   5854 		{
   5855 			const GLuint data[3] = { 2, 2, 2 };
   5856 			glGenBuffers(1, &m_dispatch_buffer);
   5857 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   5858 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
   5859 		}
   5860 
   5861 		glGenVertexArrays(1, &m_vertex_array);
   5862 		glGenQueries(2, m_query);
   5863 
   5864 		glEnable(GL_DEPTH_TEST);
   5865 		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
   5866 
   5867 		glUseProgram(m_program_vsfs);
   5868 		glBindVertexArray(m_vertex_array);
   5869 
   5870 		// this draw call will pass depth test
   5871 		glBeginQuery(GL_ANY_SAMPLES_PASSED, m_query[0]);
   5872 		glUniform1f(glGetUniformLocation(m_program_vsfs, "g_depth"), 0.0f);
   5873 		glDrawArrays(GL_TRIANGLES, 0, 3);
   5874 		glEndQuery(GL_ANY_SAMPLES_PASSED);
   5875 
   5876 		// this draw call will NOT pass depth test
   5877 		glBeginQuery(GL_ANY_SAMPLES_PASSED, m_query[1]);
   5878 		glUniform1f(glGetUniformLocation(m_program_vsfs, "g_depth"), 0.5f);
   5879 		glDrawArrays(GL_TRIANGLES, 0, 3);
   5880 		glEndQuery(GL_ANY_SAMPLES_PASSED);
   5881 
   5882 		glDisable(GL_DEPTH_TEST);
   5883 
   5884 		glUseProgram(m_program_cs);
   5885 
   5886 		// these commands should be executed normally
   5887 		glBeginConditionalRender(m_query[0], GL_QUERY_WAIT);
   5888 		glDispatchCompute(2, 2, 2);
   5889 		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
   5890 		glDispatchComputeIndirect(0);
   5891 		glEndConditionalRender();
   5892 
   5893 		/* validate */
   5894 		{
   5895 			int data;
   5896 			glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
   5897 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   5898 			if (data != 16)
   5899 			{
   5900 				m_context.getTestContext().getLog()
   5901 					<< tcu::TestLog::Message << "Data is " << data << " should be 16." << tcu::TestLog::EndMessage;
   5902 				return ERROR;
   5903 			}
   5904 		}
   5905 
   5906 		// these commands should be discarded
   5907 		glBeginConditionalRender(m_query[1], GL_QUERY_WAIT);
   5908 		glDispatchCompute(2, 2, 2);
   5909 		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
   5910 		glDispatchComputeIndirect(0);
   5911 		glEndConditionalRender();
   5912 
   5913 		/* validate */
   5914 		{
   5915 			int data;
   5916 			glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
   5917 			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
   5918 			if (data != 16 && m_context.getRenderContext().getRenderTarget().getDepthBits() != 0)
   5919 			{
   5920 				m_context.getTestContext().getLog()
   5921 					<< tcu::TestLog::Message << "Data is " << data << " should be 16." << tcu::TestLog::EndMessage;
   5922 				return ERROR;
   5923 			}
   5924 			else if (data != 32 && m_context.getRenderContext().getRenderTarget().getDepthBits() == 0)
   5925 			{
   5926 				m_context.getTestContext().getLog()
   5927 					<< tcu::TestLog::Message << "Data is " << data << " should be 32." << tcu::TestLog::EndMessage;
   5928 				return ERROR;
   5929 			}
   5930 		}
   5931 
   5932 		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
   5933 		{
   5934 			return ERROR;
   5935 		}
   5936 
   5937 		return NO_ERROR;
   5938 	}
   5939 	virtual long Cleanup()
   5940 	{
   5941 		glUseProgram(0);
   5942 		glDeleteProgram(m_program_vsfs);
   5943 		glDeleteProgram(m_program_cs);
   5944 		glDeleteVertexArrays(1, &m_vertex_array);
   5945 		glDeleteQueries(2, m_query);
   5946 		glDeleteBuffers(1, &m_storage_buffer);
   5947 		glDeleteBuffers(1, &m_dispatch_buffer);
   5948 		return NO_ERROR;
   5949 	}
   5950 };
   5951 
   5952 class NegativeAPINoActiveProgram : public ComputeShaderBase
   5953 {
   5954 	virtual std::string Title()
   5955 	{
   5956 		return NL "API errors - no active program";
   5957 	}
   5958 	virtual std::string Purpose()
   5959 	{
   5960 		return NL "Verify that appropriate errors are generated by the OpenGL API.";
   5961 	}
   5962 	virtual std::string Method()
   5963 	{
   5964 		return NL "";
   5965 	}
   5966 	virtual std::string PassCriteria()
   5967 	{
   5968 		return NL "";
   5969 	}
   5970 
   5971 	GLuint m_program;
   5972 
   5973 	virtual long Setup()
   5974 	{
   5975 		m_program = 0;
   5976 		return NO_ERROR;
   5977 	}
   5978 	virtual long Run()
   5979 	{
   5980 		glDispatchCompute(1, 2, 3);
   5981 		if (glGetError() != GL_INVALID_OPERATION)
   5982 		{
   5983 			m_context.getTestContext().getLog()
   5984 				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
   5985 				<< "DispatchComputeIndirect if there is no active program for the compute\n"
   5986 				<< "shader stage." << tcu::TestLog::EndMessage;
   5987 			return ERROR;
   5988 		}
   5989 
   5990 		/* indirect dispatch */
   5991 		{
   5992 			GLuint		 buffer;
   5993 			const GLuint num_group[3] = { 3, 2, 1 };
   5994 			glGenBuffers(1, &buffer);
   5995 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
   5996 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_group), num_group, GL_STATIC_DRAW);
   5997 			glDispatchComputeIndirect(0);
   5998 			glDeleteBuffers(1, &buffer);
   5999 			if (glGetError() != GL_INVALID_OPERATION)
   6000 			{
   6001 				m_context.getTestContext().getLog()
   6002 					<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
   6003 					<< "DispatchComputeIndirect if there is no active program for the compute\n"
   6004 					<< "shader stage." << tcu::TestLog::EndMessage;
   6005 				return ERROR;
   6006 			}
   6007 		}
   6008 
   6009 		const char* const glsl_vs =
   6010 			NL "layout(location = 0) in vec4 g_position;" NL "void main() {" NL "  gl_Position = g_position;" NL "}";
   6011 
   6012 		const char* const glsl_fs =
   6013 			NL "layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
   6014 
   6015 		m_program = CreateProgram(glsl_vs, glsl_fs);
   6016 		glLinkProgram(m_program);
   6017 		if (!CheckProgram(m_program))
   6018 			return ERROR;
   6019 
   6020 		glUseProgram(m_program);
   6021 
   6022 		glDispatchCompute(1, 2, 3);
   6023 		if (glGetError() != GL_INVALID_OPERATION)
   6024 		{
   6025 			m_context.getTestContext().getLog()
   6026 				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
   6027 				<< "DispatchComputeIndirect if there is no active program for the compute\n"
   6028 				<< "shader stage." << tcu::TestLog::EndMessage;
   6029 			return ERROR;
   6030 		}
   6031 
   6032 		/* indirect dispatch */
   6033 		{
   6034 			GLuint		 buffer;
   6035 			const GLuint num_group[3] = { 3, 2, 1 };
   6036 			glGenBuffers(1, &buffer);
   6037 			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
   6038 			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_group), num_group, GL_STATIC_DRAW);
   6039 			glDispatchComputeIndirect(0);
   6040 			glDeleteBuffers(1, &buffer);
   6041 			if (glGetError() != GL_INVALID_OPERATION)
   6042 			{
   6043 				m_context.getTestContext().getLog()
   6044 					<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
   6045 					<< "DispatchComputeIndirect if there is no active program for the compute\n"
   6046 					<< "shader stage." << tcu::TestLog::EndMessage;
   6047 				return ERROR;
   6048 			}
   6049 		}
   6050 
   6051 		return NO_ERROR;
   6052 	}
   6053 	virtual long Cleanup()
   6054 	{
   6055 		glUseProgram(0);
   6056 		glDeleteProgram(m_program);
   6057 		return NO_ERROR;
   6058 	}
   6059 };
   6060 
   6061 class NegativeAPIWorkGroupCount : public ComputeShaderBase
   6062 {
   6063 	virtual std::string Title()
   6064 	{
   6065 		return NL "API errors - invalid work group count";
   6066 	}
   6067 	virtual std::string Purpose()
   6068 	{
   6069 		return NL "Verify that appropriate errors are generated by the OpenGL API.";
   6070 	}
   6071 	virtual std::string Method()
   6072 	{
   6073 		return NL "";
   6074 	}
   6075 	virtual std::string PassCriteria()
   6076 	{
   6077 		return NL "";
   6078 	}
   6079 
   6080 	GLuint m_program;
   6081 	GLuint m_storage_buffer;
   6082 
   6083 	virtual long Setup()
   6084 	{
   6085 		m_program		 = 0;
   6086 		m_storage_buffer = 0;
   6087 		return NO_ERROR;
   6088 	}
   6089 	virtual long Run()
   6090 	{
   6091 		const char* const glsl_cs =
   6092 			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
   6093 			   "void main() {" NL
   6094 			   "  g_output[gl_GlobalInvocationID.x * gl_GlobalInvocationID.y * gl_GlobalInvocationID.z] = 0;" NL "}";
   6095 		m_program = CreateComputeProgram(glsl_cs);
   6096 		glLinkProgram(m_program);
   6097 		if (!CheckProgram(m_program))
   6098 			return ERROR;
   6099 
   6100 		glGenBuffers(1, &m_storage_buffer);
   6101 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   6102 		glBufferData(GL_SHADER_STORAGE_BUFFER, 100000, NULL, GL_DYNAMIC_DRAW);
   6103 
   6104 		GLint x, y, z;
   6105 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &x);
   6106 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &y);
   6107 		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &z);
   6108 
   6109 		glUseProgram(m_program);
   6110 
   6111 		glDispatchCompute(x + 1, 1, 1);
   6112 		if (glGetError() != GL_INVALID_VALUE)
   6113 		{
   6114 			m_context.getTestContext().getLog()
   6115 				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
   6116 				<< "<num_groups_y> or <num_groups_z> is greater than the value of\n"
   6117 				<< "MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension." << tcu::TestLog::EndMessage;
   6118 			return ERROR;
   6119 		}
   6120 
   6121 		glDispatchCompute(1, y + 1, 1);
   6122 		if (glGetError() != GL_INVALID_VALUE)
   6123 		{
   6124 			m_context.getTestContext().getLog()
   6125 				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
   6126 				<< "<num_groups_y> or <num_groups_z> is greater than the value of\n"
   6127 				<< "MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension." << tcu::TestLog::EndMessage;
   6128 			return ERROR;
   6129 		}
   6130 
   6131 		glDispatchCompute(1, 1, z + 1);
   6132 		if (glGetError() != GL_INVALID_VALUE)
   6133 		{
   6134 			m_context.getTestContext().getLog()
   6135 				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
   6136 				<< "<num_groups_y> or <num_groups_z> is greater than the value of\n"
   6137 				<< "MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension." << tcu::TestLog::EndMessage;
   6138 			return ERROR;
   6139 		}
   6140 
   6141 		return NO_ERROR;
   6142 	}
   6143 	virtual long Cleanup()
   6144 	{
   6145 		glUseProgram(0);
   6146 		glDeleteProgram(m_program);
   6147 		glDeleteBuffers(1, &m_storage_buffer);
   6148 		return NO_ERROR;
   6149 	}
   6150 };
   6151 
   6152 class NegativeAPIIndirect : public ComputeShaderBase
   6153 {
   6154 	virtual std::string Title()
   6155 	{
   6156 		return NL "API errors - incorrect DispatchComputeIndirect usage";
   6157 	}
   6158 	virtual std::string Purpose()
   6159 	{
   6160 		return NL "Verify that appropriate errors are generated by the OpenGL API.";
   6161 	}
   6162 	virtual std::string Method()
   6163 	{
   6164 		return NL "";
   6165 	}
   6166 	virtual std::string PassCriteria()
   6167 	{
   6168 		return NL "";
   6169 	}
   6170 
   6171 	GLuint m_program;
   6172 	GLuint m_storage_buffer;
   6173 	GLuint m_dispatch_buffer;
   6174 
   6175 	virtual long Setup()
   6176 	{
   6177 		m_program		  = 0;
   6178 		m_storage_buffer  = 0;
   6179 		m_dispatch_buffer = 0;
   6180 		return NO_ERROR;
   6181 	}
   6182 
   6183 	virtual long Run()
   6184 	{
   6185 		const char* const glsl_cs =
   6186 			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
   6187 			   "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
   6188 		m_program = CreateComputeProgram(glsl_cs);
   6189 		glLinkProgram(m_program);
   6190 		if (!CheckProgram(m_program))
   6191 			return ERROR;
   6192 
   6193 		glGenBuffers(1, &m_storage_buffer);
   6194 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   6195 		glBufferData(GL_SHADER_STORAGE_BUFFER, 100000, NULL, GL_DYNAMIC_DRAW);
   6196 
   6197 		const GLuint num_groups[6] = { 1, 1, 1, 1, 1, 1 };
   6198 		glGenBuffers(1, &m_dispatch_buffer);
   6199 		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
   6200 		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), num_groups, GL_STATIC_COPY);
   6201 
   6202 		glUseProgram(m_program);
   6203 
   6204 		glDispatchComputeIndirect(-2);
   6205 		if (glGetError() != GL_INVALID_VALUE)
   6206 		{
   6207 			m_context.getTestContext().getLog()
   6208 				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchComputeIndirect if <indirect> is\n"
   6209 				<< "less than zero or not a multiple of four." << tcu::TestLog::EndMessage;
   6210 			return ERROR;
   6211 		}
   6212 
   6213 		glDispatchComputeIndirect(3);
   6214 		if (glGetError() != GL_INVALID_VALUE)
   6215 		{
   6216 			m_context.getTestContext().getLog()
   6217 				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchComputeIndirect if <indirect> is\n"
   6218 				<< "less than zero or not a multiple of four." << tcu::TestLog::EndMessage;
   6219 			return ERROR;
   6220 		}
   6221 
   6222 		glDispatchComputeIndirect(16);
   6223 		if (glGetError() != GL_INVALID_OPERATION)
   6224 		{
   6225 			m_context.getTestContext().getLog()
   6226 				<< tcu::TestLog::Message
   6227 				<< "INVALID_OPERATION is generated by DispatchComputeIndirect if no buffer is\n"
   6228 				<< "bound to DISPATCH_INDIRECT_BUFFER or if the command would source data\n"
   6229 				<< "beyond the end of the bound buffer object." << tcu::TestLog::EndMessage;
   6230 			return ERROR;
   6231 		}
   6232 
   6233 		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0);
   6234 		glDispatchComputeIndirect(0);
   6235 		if (glGetError() != GL_INVALID_OPERATION)
   6236 		{
   6237 			m_context.getTestContext().getLog()
   6238 				<< tcu::TestLog::Message
   6239 				<< "INVALID_OPERATION is generated by DispatchComputeIndirect if no buffer is\n"
   6240 				<< "bound to DISPATCH_INDIRECT_BUFFER or if the command would source data\n"
   6241 				<< "beyond the end of the bound buffer object." << tcu::TestLog::EndMessage;
   6242 			return ERROR;
   6243 		}
   6244 
   6245 		return NO_ERROR;
   6246 	}
   6247 	virtual long Cleanup()
   6248 	{
   6249 		glUseProgram(0);
   6250 		glDeleteProgram(m_program);
   6251 		glDeleteBuffers(1, &m_storage_buffer);
   6252 		glDeleteBuffers(1, &m_dispatch_buffer);
   6253 		return NO_ERROR;
   6254 	}
   6255 };
   6256 
   6257 class NegativeAPIProgram : public ComputeShaderBase
   6258 {
   6259 	virtual std::string Title()
   6260 	{
   6261 		return NL "API errors - program state";
   6262 	}
   6263 	virtual std::string Purpose()
   6264 	{
   6265 		return NL "Verify that appropriate errors are generated by the OpenGL API.";
   6266 	}
   6267 	virtual std::string Method()
   6268 	{
   6269 		return NL "";
   6270 	}
   6271 	virtual std::string PassCriteria()
   6272 	{
   6273 		return NL "";
   6274 	}
   6275 
   6276 	GLuint m_program;
   6277 	GLuint m_storage_buffer;
   6278 
   6279 	virtual long Setup()
   6280 	{
   6281 		m_program		 = 0;
   6282 		m_storage_buffer = 0;
   6283 		return NO_ERROR;
   6284 	}
   6285 	virtual long Run()
   6286 	{
   6287 		const char* const glsl_vs =
   6288 			NL "layout(location = 0) in vec4 g_position;" NL "void main() {" NL "  gl_Position = g_position;" NL "}";
   6289 
   6290 		const char* const glsl_fs =
   6291 			NL "layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
   6292 		m_program = CreateProgram(glsl_vs, glsl_fs);
   6293 
   6294 		GLint v[3];
   6295 		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
   6296 		if (glGetError() != GL_INVALID_OPERATION)
   6297 		{
   6298 			m_context.getTestContext().getLog()
   6299 				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by GetProgramiv if <pname> is\n"
   6300 				<< "COMPUTE_LOCAL_WORK_SIZE and either the program has not been linked\n"
   6301 				<< "successfully, or has been linked but contains no compute shaders." << tcu::TestLog::EndMessage;
   6302 			return ERROR;
   6303 		}
   6304 
   6305 		glLinkProgram(m_program);
   6306 		if (!CheckProgram(m_program))
   6307 			return ERROR;
   6308 
   6309 		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
   6310 		if (glGetError() != GL_INVALID_OPERATION)
   6311 		{
   6312 			m_context.getTestContext().getLog()
   6313 				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by GetProgramiv if <pname> is\n"
   6314 				<< "COMPUTE_LOCAL_WORK_SIZE and either the program has not been linked\n"
   6315 				<< "successfully, or has been linked but contains no compute shaders." << tcu::TestLog::EndMessage;
   6316 			return ERROR;
   6317 		}
   6318 		glDeleteProgram(m_program);
   6319 
   6320 		const char* const glsl_cs =
   6321 			"#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
   6322 			"  uint g_output[];" NL "};" NL "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
   6323 		m_program = glCreateProgram();
   6324 
   6325 		GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
   6326 		glAttachShader(m_program, sh);
   6327 		glDeleteShader(sh);
   6328 		glShaderSource(sh, 1, &glsl_cs, NULL);
   6329 		glCompileShader(sh);
   6330 
   6331 		sh = glCreateShader(GL_VERTEX_SHADER);
   6332 		glAttachShader(m_program, sh);
   6333 		glDeleteShader(sh);
   6334 		glShaderSource(sh, 1, &glsl_vs, NULL);
   6335 		glCompileShader(sh);
   6336 
   6337 		sh = glCreateShader(GL_FRAGMENT_SHADER);
   6338 		glAttachShader(m_program, sh);
   6339 		glDeleteShader(sh);
   6340 		glShaderSource(sh, 1, &glsl_fs, NULL);
   6341 		glCompileShader(sh);
   6342 
   6343 		glLinkProgram(m_program);
   6344 		GLint status;
   6345 		glGetProgramiv(m_program, GL_LINK_STATUS, &status);
   6346 		if (status == GL_TRUE)
   6347 		{
   6348 			m_context.getTestContext().getLog()
   6349 				<< tcu::TestLog::Message << "LinkProgram will fail if <program> contains a combination"
   6350 				<< " of compute and\n non-compute shaders.\n"
   6351 				<< tcu::TestLog::EndMessage;
   6352 			return ERROR;
   6353 		}
   6354 
   6355 		return NO_ERROR;
   6356 	}
   6357 	virtual long Cleanup()
   6358 	{
   6359 		glUseProgram(0);
   6360 		glDeleteProgram(m_program);
   6361 		glDeleteBuffers(1, &m_storage_buffer);
   6362 		return NO_ERROR;
   6363 	}
   6364 };
   6365 
   6366 class NegativeGLSLCompileTimeErrors : public ComputeShaderBase
   6367 {
   6368 	virtual std::string Title()
   6369 	{
   6370 		return NL "Compile-time errors";
   6371 	}
   6372 	virtual std::string Purpose()
   6373 	{
   6374 		return NL "Verify that appropriate errors are generated by the GLSL compiler.";
   6375 	}
   6376 	virtual std::string Method()
   6377 	{
   6378 		return NL "";
   6379 	}
   6380 	virtual std::string PassCriteria()
   6381 	{
   6382 		return NL "";
   6383 	}
   6384 
   6385 	static std::string Shader1(int x, int y, int z)
   6386 	{
   6387 		std::stringstream ss;
   6388 		ss << "#version 430 core" NL "layout(local_size_x = " << x << ", local_size_y = " << y
   6389 		   << ", local_size_z = " << z << ") in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
   6390 										  "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
   6391 		return ss.str();
   6392 	}
   6393 	virtual long Run()
   6394 	{
   6395 		// gl_GlobalInvocationID requires "#version 430" or later or GL_ARB_compute_shader
   6396 		// extension enabled
   6397 		if (!Compile("#version 420 core" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
   6398 					 "  uint g_output[];" NL "};" NL "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL
   6399 					 "}"))
   6400 			return ERROR;
   6401 
   6402 		if (!Compile("#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(local_size_x = 2) in;" NL
   6403 					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
   6404 					 "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}"))
   6405 			return ERROR;
   6406 
   6407 		if (!Compile("#version 430 core" NL "layout(local_size_x = 1) in;" NL "in uint x;" NL
   6408 					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
   6409 					 "  g_output[gl_GlobalInvocationID.x] = x;" NL "}"))
   6410 			return ERROR;
   6411 
   6412 		if (!Compile("#version 430 core" NL "layout(local_size_x = 1) in;" NL "out uint x;" NL
   6413 					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
   6414 					 "  g_output[gl_GlobalInvocationID.x] = 0;" NL "  x = 0;" NL "}"))
   6415 			return ERROR;
   6416 
   6417 		{
   6418 			GLint x, y, z;
   6419 			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &x);
   6420 			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &y);
   6421 			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &z);
   6422 
   6423 			if (!Compile(Shader1(x + 1, 1, 1)))
   6424 				return ERROR;
   6425 			if (!Compile(Shader1(1, y + 1, 1)))
   6426 				return ERROR;
   6427 			if (!Compile(Shader1(1, 1, z + 1)))
   6428 				return ERROR;
   6429 		}
   6430 
   6431 		return NO_ERROR;
   6432 	}
   6433 
   6434 	bool Compile(const std::string& source)
   6435 	{
   6436 		const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
   6437 
   6438 		const char* const src = source.c_str();
   6439 		glShaderSource(sh, 1, &src, NULL);
   6440 		glCompileShader(sh);
   6441 
   6442 		GLchar log[1024];
   6443 		glGetShaderInfoLog(sh, sizeof(log), NULL, log);
   6444 		m_context.getTestContext().getLog() << tcu::TestLog::Message << "Shader Info Log:\n"
   6445 											<< log << tcu::TestLog::EndMessage;
   6446 
   6447 		GLint status;
   6448 		glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
   6449 		glDeleteShader(sh);
   6450 
   6451 		if (status == GL_TRUE)
   6452 		{
   6453 			m_context.getTestContext().getLog()
   6454 				<< tcu::TestLog::Message << "Compilation should fail." << tcu::TestLog::EndMessage;
   6455 			return false;
   6456 		}
   6457 
   6458 		return true;
   6459 	}
   6460 };
   6461 
   6462 class NegativeGLSLLinkTimeErrors : public ComputeShaderBase
   6463 {
   6464 	virtual std::string Title()
   6465 	{
   6466 		return NL "Link-time errors";
   6467 	}
   6468 	virtual std::string Purpose()
   6469 	{
   6470 		return NL "Verify that appropriate errors are generated by the GLSL linker.";
   6471 	}
   6472 	virtual std::string Method()
   6473 	{
   6474 		return NL "";
   6475 	}
   6476 	virtual std::string PassCriteria()
   6477 	{
   6478 		return NL "";
   6479 	}
   6480 
   6481 	virtual long Run()
   6482 	{
   6483 		// no layout
   6484 		if (!Link("#version 430 core" NL "void Run();" NL "void main() {" NL "  Run();" NL "}",
   6485 				  "#version 430 core" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
   6486 				  "void Run() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}"))
   6487 			return ERROR;
   6488 
   6489 		if (!Link("#version 430 core" NL "layout(local_size_x = 2) in;" NL "void Run();" NL "void main() {" NL
   6490 				  "  Run();" NL "}",
   6491 				  "#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
   6492 				  "  uint g_output[];" NL "};" NL "void Run() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}"))
   6493 			return ERROR;
   6494 
   6495 		return NO_ERROR;
   6496 	}
   6497 
   6498 	bool Link(const std::string& cs0, const std::string& cs1)
   6499 	{
   6500 		const GLuint p = glCreateProgram();
   6501 
   6502 		/* shader 0 */
   6503 		{
   6504 			GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
   6505 			glAttachShader(p, sh);
   6506 			glDeleteShader(sh);
   6507 			const char* const src = cs0.c_str();
   6508 			glShaderSource(sh, 1, &src, NULL);
   6509 			glCompileShader(sh);
   6510 
   6511 			GLint status;
   6512 			glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
   6513 			if (status == GL_FALSE)
   6514 			{
   6515 				glDeleteProgram(p);
   6516 				m_context.getTestContext().getLog()
   6517 					<< tcu::TestLog::Message << "CS0 compilation should be ok." << tcu::TestLog::EndMessage;
   6518 				return false;
   6519 			}
   6520 		}
   6521 		/* shader 1 */
   6522 		{
   6523 			GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
   6524 			glAttachShader(p, sh);
   6525 			glDeleteShader(sh);
   6526 			const char* const src = cs1.c_str();
   6527 			glShaderSource(sh, 1, &src, NULL);
   6528 			glCompileShader(sh);
   6529 
   6530 			GLint status;
   6531 			glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
   6532 			if (status == GL_FALSE)
   6533 			{
   6534 				glDeleteProgram(p);
   6535 				m_context.getTestContext().getLog()
   6536 					<< tcu::TestLog::Message << "CS1 compilation should be ok." << tcu::TestLog::EndMessage;
   6537 				return false;
   6538 			}
   6539 		}
   6540 
   6541 		glLinkProgram(p);
   6542 
   6543 		GLchar log[1024];
   6544 		glGetProgramInfoLog(p, sizeof(log), NULL, log);
   6545 		m_context.getTestContext().getLog() << tcu::TestLog::Message << "Program Info Log:\n"
   6546 											<< log << tcu::TestLog::EndMessage;
   6547 
   6548 		GLint status;
   6549 		glGetProgramiv(p, GL_LINK_STATUS, &status);
   6550 		glDeleteProgram(p);
   6551 
   6552 		if (status == GL_TRUE)
   6553 		{
   6554 			m_context.getTestContext().getLog()
   6555 				<< tcu::TestLog::Message << "Link operation should fail." << tcu::TestLog::EndMessage;
   6556 			return false;
   6557 		}
   6558 
   6559 		return true;
   6560 	}
   6561 };
   6562 
   6563 class BasicWorkGroupSizeIsConst : public ComputeShaderBase
   6564 {
   6565 	virtual std::string Title()
   6566 	{
   6567 		return NL "gl_WorkGroupSize is an constant";
   6568 	}
   6569 	virtual std::string Purpose()
   6570 	{
   6571 		return NL "Verify that gl_WorkGroupSize can be used as an constant expression.";
   6572 	}
   6573 	virtual std::string Method()
   6574 	{
   6575 		return NL "";
   6576 	}
   6577 	virtual std::string PassCriteria()
   6578 	{
   6579 		return NL "";
   6580 	}
   6581 
   6582 	GLuint m_program;
   6583 	GLuint m_storage_buffer;
   6584 
   6585 	virtual long Setup()
   6586 	{
   6587 		m_program		 = 0;
   6588 		m_storage_buffer = 0;
   6589 		return NO_ERROR;
   6590 	}
   6591 
   6592 	virtual long Run()
   6593 	{
   6594 		const char* const glsl_cs =
   6595 			NL "layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;" NL
   6596 			   "layout(std430, binding = 0) buffer Output {" NL "  uint g_buffer[22 + gl_WorkGroupSize.x];" NL "};" NL
   6597 			   "shared uint g_shared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];" NL
   6598 			   "uniform uint g_uniform[gl_WorkGroupSize.z + 20] = { "
   6599 			   "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24 };" NL "void main() {" NL
   6600 			   "  g_shared[gl_LocalInvocationIndex] = 1U;" NL "  groupMemoryBarrier();" NL "  barrier();" NL
   6601 			   "  uint sum = 0;" NL
   6602 			   "  for (uint i = 0; i < gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; ++i) {" NL
   6603 			   "    sum += g_shared[i];" NL "  }" NL "  sum += g_uniform[gl_LocalInvocationIndex];" NL
   6604 			   "  g_buffer[gl_LocalInvocationIndex] = sum;" NL "}";
   6605 		m_program = CreateComputeProgram(glsl_cs);
   6606 		glLinkProgram(m_program);
   6607 		if (!CheckProgram(m_program))
   6608 			return ERROR;
   6609 
   6610 		glGenBuffers(1, &m_storage_buffer);
   6611 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
   6612 		glBufferData(GL_SHADER_STORAGE_BUFFER, 24 * sizeof(GLuint), NULL, GL_STATIC_DRAW);
   6613 
   6614 		glUseProgram(m_program);
   6615 		glDispatchCompute(1, 1, 1);
   6616 		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
   6617 
   6618 		long	error = NO_ERROR;
   6619 		GLuint* data;
   6620 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
   6621 		data =
   6622 			static_cast<GLuint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * 24, GL_MAP_READ_BIT));
   6623 		for (GLuint i = 0; i < 24; ++i)
   6624 		{
   6625 			if (data[i] != (i + 25))
   6626 			{
   6627 				m_context.getTestContext().getLog()
   6628 					<< tcu::TestLog::Message << "Data at index " << i << " is " << data[i] << " should be " << i + 25
   6629 					<< "." << tcu::TestLog::EndMessage;
   6630 				error = ERROR;
   6631 			}
   6632 		}
   6633 		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
   6634 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
   6635 		return error;
   6636 	}
   6637 
   6638 	virtual long Cleanup()
   6639 	{
   6640 		glUseProgram(0);
   6641 		glDeleteProgram(m_program);
   6642 		glDeleteBuffers(1, &m_storage_buffer);
   6643 		return NO_ERROR;
   6644 	}
   6645 };
   6646 
   6647 } // anonymous namespace
   6648 
   6649 ComputeShaderTests::ComputeShaderTests(deqp::Context& context) : TestCaseGroup(context, "compute_shader", "")
   6650 {
   6651 }
   6652 
   6653 ComputeShaderTests::~ComputeShaderTests(void)
   6654 {
   6655 }
   6656 
   6657 void ComputeShaderTests::init()
   6658 {
   6659 	using namespace deqp;
   6660 	addChild(new TestSubcase(m_context, "simple-compute", TestSubcase::Create<SimpleCompute>));
   6661 	addChild(new TestSubcase(m_context, "one-work-group", TestSubcase::Create<BasicOneWorkGroup>));
   6662 	addChild(new TestSubcase(m_context, "resource-ubo", TestSubcase::Create<BasicResourceUBO>));
   6663 	addChild(new TestSubcase(m_context, "resource-texture", TestSubcase::Create<BasicResourceTexture>));
   6664 	addChild(new TestSubcase(m_context, "resource-image", TestSubcase::Create<BasicResourceImage>));
   6665 	addChild(new TestSubcase(m_context, "resource-atomic-counter", TestSubcase::Create<BasicResourceAtomicCounter>));
   6666 	addChild(new TestSubcase(m_context, "resource-subroutine", TestSubcase::Create<BasicResourceSubroutine>));
   6667 	addChild(new TestSubcase(m_context, "resource-uniform", TestSubcase::Create<BasicResourceUniform>));
   6668 	addChild(new TestSubcase(m_context, "built-in-variables", TestSubcase::Create<BasicBuiltinVariables>));
   6669 	addChild(new TestSubcase(m_context, "max", TestSubcase::Create<BasicMax>));
   6670 	addChild(new TestSubcase(m_context, "work-group-size", TestSubcase::Create<BasicWorkGroupSizeIsConst>));
   6671 	addChild(new TestSubcase(m_context, "build-monolithic", TestSubcase::Create<BasicBuildMonolithic>));
   6672 	addChild(new TestSubcase(m_context, "build-separable", TestSubcase::Create<BasicBuildSeparable>));
   6673 	addChild(new TestSubcase(m_context, "shared-simple", TestSubcase::Create<BasicSharedSimple>));
   6674 	addChild(new TestSubcase(m_context, "shared-struct", TestSubcase::Create<BasicSharedStruct>));
   6675 	addChild(new TestSubcase(m_context, "dispatch-indirect", TestSubcase::Create<BasicDispatchIndirect>));
   6676 	addChild(new TestSubcase(m_context, "sso-compute-pipeline", TestSubcase::Create<BasicSSOComputePipeline>));
   6677 	addChild(new TestSubcase(m_context, "sso-case2", TestSubcase::Create<BasicSSOCase2>));
   6678 	addChild(new TestSubcase(m_context, "sso-case3", TestSubcase::Create<BasicSSOCase3>));
   6679 	addChild(new TestSubcase(m_context, "atomic-case1", TestSubcase::Create<BasicAtomicCase1>));
   6680 	addChild(new TestSubcase(m_context, "atomic-case2", TestSubcase::Create<BasicAtomicCase2>));
   6681 	addChild(new TestSubcase(m_context, "atomic-case3", TestSubcase::Create<BasicAtomicCase3>));
   6682 	addChild(new TestSubcase(m_context, "copy-image", TestSubcase::Create<AdvancedCopyImage>));
   6683 	addChild(new TestSubcase(m_context, "pipeline-pre-vs", TestSubcase::Create<AdvancedPipelinePreVS>));
   6684 	addChild(
   6685 		new TestSubcase(m_context, "pipeline-gen-draw-commands", TestSubcase::Create<AdvancedPipelineGenDrawCommands>));
   6686 	addChild(new TestSubcase(m_context, "pipeline-compute-chain", TestSubcase::Create<AdvancedPipelineComputeChain>));
   6687 	addChild(new TestSubcase(m_context, "pipeline-post-fs", TestSubcase::Create<AdvancedPipelinePostFS>));
   6688 	addChild(new TestSubcase(m_context, "pipeline-post-xfb", TestSubcase::Create<AdvancedPipelinePostXFB>));
   6689 	addChild(new TestSubcase(m_context, "shared-indexing", TestSubcase::Create<AdvancedSharedIndexing>));
   6690 	addChild(new TestSubcase(m_context, "shared-max", TestSubcase::Create<AdvancedSharedMax>));
   6691 	addChild(new TestSubcase(m_context, "dynamic-paths", TestSubcase::Create<AdvancedDynamicPaths>));
   6692 	addChild(new TestSubcase(m_context, "resources-max", TestSubcase::Create<AdvancedResourcesMax>));
   6693 	addChild(new TestSubcase(m_context, "fp64-case1", TestSubcase::Create<AdvancedFP64Case1>));
   6694 	addChild(new TestSubcase(m_context, "fp64-case2", TestSubcase::Create<AdvancedFP64Case2>));
   6695 	addChild(new TestSubcase(m_context, "fp64-case3", TestSubcase::Create<AdvancedFP64Case3>));
   6696 	addChild(
   6697 		new TestSubcase(m_context, "conditional-dispatching", TestSubcase::Create<AdvancedConditionalDispatching>));
   6698 	addChild(new TestSubcase(m_context, "api-no-active-program", TestSubcase::Create<NegativeAPINoActiveProgram>));
   6699 	addChild(new TestSubcase(m_context, "api-work-group-count", TestSubcase::Create<NegativeAPIWorkGroupCount>));
   6700 	addChild(new TestSubcase(m_context, "api-indirect", TestSubcase::Create<NegativeAPIIndirect>));
   6701 	addChild(new TestSubcase(m_context, "api-program", TestSubcase::Create<NegativeAPIProgram>));
   6702 	addChild(
   6703 		new TestSubcase(m_context, "glsl-compile-time-errors", TestSubcase::Create<NegativeGLSLCompileTimeErrors>));
   6704 	addChild(new TestSubcase(m_context, "glsl-link-time-errors", TestSubcase::Create<NegativeGLSLLinkTimeErrors>));
   6705 }
   6706 } // gl4cts namespace
   6707