1 /*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL ES 3.1 Module 3 * ------------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Basic Compute Shader Tests. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "es31fBasicComputeShaderTests.hpp" 25 #include "gluShaderProgram.hpp" 26 #include "gluObjectWrapper.hpp" 27 #include "gluRenderContext.hpp" 28 #include "gluProgramInterfaceQuery.hpp" 29 #include "gluContextInfo.hpp" 30 #include "glwFunctions.hpp" 31 #include "glwEnums.hpp" 32 #include "tcuTestLog.hpp" 33 #include "deRandom.hpp" 34 #include "deStringUtil.hpp" 35 #include "deMemory.h" 36 37 namespace deqp 38 { 39 namespace gles31 40 { 41 namespace Functional 42 { 43 44 using std::string; 45 using std::vector; 46 using tcu::TestLog; 47 using namespace glu; 48 49 //! Utility for mapping buffers. 50 class BufferMemMap 51 { 52 public: 53 BufferMemMap (const glw::Functions& gl, deUint32 target, int offset, int size, deUint32 access) 54 : m_gl (gl) 55 , m_target (target) 56 , m_ptr (DE_NULL) 57 { 58 m_ptr = gl.mapBufferRange(target, offset, size, access); 59 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()"); 60 TCU_CHECK(m_ptr); 61 } 62 63 ~BufferMemMap (void) 64 { 65 m_gl.unmapBuffer(m_target); 66 } 67 68 void* getPtr (void) const { return m_ptr; } 69 void* operator* (void) const { return m_ptr; } 70 71 private: 72 BufferMemMap (const BufferMemMap& other); 73 BufferMemMap& operator= (const BufferMemMap& other); 74 75 const glw::Functions& m_gl; 76 const deUint32 m_target; 77 void* m_ptr; 78 }; 79 80 namespace 81 { 82 83 class EmptyComputeShaderCase : public TestCase 84 { 85 public: 86 EmptyComputeShaderCase (Context& context) 87 : TestCase(context, "empty", "Empty shader") 88 { 89 } 90 91 IterateResult iterate (void) 92 { 93 const ShaderProgram program(m_context.getRenderContext(), 94 ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, 95 "#version 310 es\n" 96 "layout (local_size_x = 1) in;\n" 97 "void main (void) {}\n" 98 )); 99 100 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 101 102 m_testCtx.getLog() << program; 103 if (!program.isOk()) 104 TCU_FAIL("Compile failed"); 105 106 gl.useProgram(program.getProgram()); 107 gl.dispatchCompute(1, 1, 1); 108 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 109 110 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 111 return STOP; 112 } 113 }; 114 115 class UBOToSSBOInvertCase : public TestCase 116 { 117 public: 118 UBOToSSBOInvertCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 119 : TestCase (context, name, description) 120 , m_numValues (numValues) 121 , m_localSize (localSize) 122 , m_workSize (workSize) 123 { 124 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); 125 } 126 127 IterateResult iterate (void) 128 { 129 std::ostringstream src; 130 src << "#version 310 es\n" 131 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 132 << "uniform Input {\n" 133 << " uint values[" << m_numValues << "];\n" 134 << "} ub_in;\n" 135 << "layout(binding = 1) buffer Output {\n" 136 << " uint values[" << m_numValues << "];\n" 137 << "} sb_out;\n" 138 << "void main (void) {\n" 139 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" 140 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n" 141 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" 142 << " uint offset = numValuesPerInv*groupNdx;\n" 143 << "\n" 144 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 145 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n" 146 << "}\n"; 147 148 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 149 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 150 const Buffer inputBuffer (m_context.getRenderContext()); 151 const Buffer outputBuffer (m_context.getRenderContext()); 152 std::vector<deUint32> inputValues (m_numValues); 153 154 // Compute input values. 155 { 156 de::Random rnd(0x111223f); 157 for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) 158 inputValues[ndx] = rnd.getUint32(); 159 } 160 161 m_testCtx.getLog() << program; 162 if (!program.isOk()) 163 TCU_FAIL("Compile failed"); 164 165 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 166 167 gl.useProgram(program.getProgram()); 168 169 // Input buffer setup 170 { 171 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input"); 172 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex); 173 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values"); 174 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex); 175 176 gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer); 177 gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); 178 179 { 180 const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); 181 182 for (deUint32 ndx = 0; ndx < de::min(valueInfo.arraySize, (deUint32)inputValues.size()); ndx++) 183 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 184 } 185 186 gl.uniformBlockBinding(program.getProgram(), blockIndex, 0); 187 gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer); 188 GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); 189 } 190 191 // Output buffer setup 192 { 193 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 194 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 195 196 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 197 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 198 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer); 199 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 200 } 201 202 // Dispatch compute workload 203 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 204 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 205 206 // Read back and compare 207 { 208 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 209 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 210 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 211 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 212 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 213 214 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 215 for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) 216 { 217 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); 218 const deUint32 ref = ~inputValues[ndx]; 219 220 if (res != ref) 221 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); 222 } 223 } 224 225 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 226 return STOP; 227 } 228 229 private: 230 const int m_numValues; 231 const tcu::IVec3 m_localSize; 232 const tcu::IVec3 m_workSize; 233 }; 234 235 class CopyInvertSSBOCase : public TestCase 236 { 237 public: 238 CopyInvertSSBOCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 239 : TestCase (context, name, description) 240 , m_numValues (numValues) 241 , m_localSize (localSize) 242 , m_workSize (workSize) 243 { 244 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); 245 } 246 247 IterateResult iterate (void) 248 { 249 std::ostringstream src; 250 src << "#version 310 es\n" 251 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 252 << "layout(binding = 0) buffer Input {\n" 253 << " uint values[" << m_numValues << "];\n" 254 << "} sb_in;\n" 255 << "layout (binding = 1) buffer Output {\n" 256 << " uint values[" << m_numValues << "];\n" 257 << "} sb_out;\n" 258 << "void main (void) {\n" 259 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" 260 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n" 261 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" 262 << " uint offset = numValuesPerInv*groupNdx;\n" 263 << "\n" 264 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 265 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n" 266 << "}\n"; 267 268 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 269 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 270 const Buffer inputBuffer (m_context.getRenderContext()); 271 const Buffer outputBuffer (m_context.getRenderContext()); 272 std::vector<deUint32> inputValues (m_numValues); 273 274 // Compute input values. 275 { 276 de::Random rnd(0x124fef); 277 for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) 278 inputValues[ndx] = rnd.getUint32(); 279 } 280 281 m_testCtx.getLog() << program; 282 if (!program.isOk()) 283 TCU_FAIL("Compile failed"); 284 285 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 286 287 gl.useProgram(program.getProgram()); 288 289 // Input buffer setup 290 { 291 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); 292 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); 293 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); 294 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 295 296 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); 297 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); 298 299 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 300 301 { 302 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); 303 304 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 305 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 306 } 307 308 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); 309 GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); 310 } 311 312 // Output buffer setup 313 { 314 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 315 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); 316 317 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 318 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ); 319 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer); 320 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 321 } 322 323 // Dispatch compute workload 324 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 325 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 326 327 // Read back and compare 328 { 329 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 330 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 331 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 332 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 333 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 334 335 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 336 for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) 337 { 338 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); 339 const deUint32 ref = ~inputValues[ndx]; 340 341 if (res != ref) 342 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); 343 } 344 } 345 346 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 347 return STOP; 348 } 349 350 private: 351 const int m_numValues; 352 const tcu::IVec3 m_localSize; 353 const tcu::IVec3 m_workSize; 354 }; 355 356 class InvertSSBOInPlaceCase : public TestCase 357 { 358 public: 359 InvertSSBOInPlaceCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 360 : TestCase (context, name, description) 361 , m_numValues (numValues) 362 , m_isSized (isSized) 363 , m_localSize (localSize) 364 , m_workSize (workSize) 365 { 366 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); 367 } 368 369 IterateResult iterate (void) 370 { 371 std::ostringstream src; 372 src << "#version 310 es\n" 373 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 374 << "layout(binding = 0) buffer InOut {\n" 375 << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" 376 << "} sb_inout;\n" 377 << "void main (void) {\n" 378 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" 379 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n" 380 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" 381 << " uint offset = numValuesPerInv*groupNdx;\n" 382 << "\n" 383 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 384 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n" 385 << "}\n"; 386 387 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 388 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 389 390 m_testCtx.getLog() << program; 391 if (!program.isOk()) 392 TCU_FAIL("Compile failed"); 393 394 const Buffer outputBuffer (m_context.getRenderContext()); 395 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values"); 396 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 397 const deUint32 blockSize = valueInfo.arrayStride*(deUint32)m_numValues; 398 std::vector<deUint32> inputValues (m_numValues); 399 400 // Compute input values. 401 { 402 de::Random rnd(0x82ce7f); 403 for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) 404 inputValues[ndx] = rnd.getUint32(); 405 } 406 407 TCU_CHECK(valueInfo.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); 408 409 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 410 411 gl.useProgram(program.getProgram()); 412 413 // Output buffer setup 414 { 415 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 416 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW); 417 418 { 419 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT); 420 421 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 422 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 423 } 424 425 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 426 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); 427 } 428 429 // Dispatch compute workload 430 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 431 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 432 433 // Read back and compare 434 { 435 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 436 437 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 438 { 439 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); 440 const deUint32 ref = ~inputValues[ndx]; 441 442 if (res != ref) 443 throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]"); 444 } 445 } 446 447 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 448 return STOP; 449 } 450 451 private: 452 const int m_numValues; 453 const bool m_isSized; 454 const tcu::IVec3 m_localSize; 455 const tcu::IVec3 m_workSize; 456 }; 457 458 class WriteToMultipleSSBOCase : public TestCase 459 { 460 public: 461 WriteToMultipleSSBOCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 462 : TestCase (context, name, description) 463 , m_numValues (numValues) 464 , m_isSized (isSized) 465 , m_localSize (localSize) 466 , m_workSize (workSize) 467 { 468 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); 469 } 470 471 IterateResult iterate (void) 472 { 473 std::ostringstream src; 474 src << "#version 310 es\n" 475 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 476 << "layout(binding = 0) buffer Out0 {\n" 477 << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" 478 << "} sb_out0;\n" 479 << "layout(binding = 1) buffer Out1 {\n" 480 << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" 481 << "} sb_out1;\n" 482 << "void main (void) {\n" 483 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" 484 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" 485 << "\n" 486 << " {\n" 487 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n" 488 << " uint offset = numValuesPerInv*groupNdx;\n" 489 << "\n" 490 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 491 << " sb_out0.values[offset + ndx] = offset + ndx;\n" 492 << " }\n" 493 << " {\n" 494 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n" 495 << " uint offset = numValuesPerInv*groupNdx;\n" 496 << "\n" 497 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 498 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n" 499 << " }\n" 500 << "}\n"; 501 502 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 503 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 504 505 m_testCtx.getLog() << program; 506 if (!program.isOk()) 507 TCU_FAIL("Compile failed"); 508 509 const Buffer outputBuffer0 (m_context.getRenderContext()); 510 const deUint32 value0Index = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values"); 511 const InterfaceVariableInfo value0Info = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index); 512 const deUint32 block0Size = value0Info.arrayStride*(deUint32)m_numValues; 513 514 const Buffer outputBuffer1 (m_context.getRenderContext()); 515 const deUint32 value1Index = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values"); 516 const InterfaceVariableInfo value1Info = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index); 517 const deUint32 block1Size = value1Info.arrayStride*(deUint32)m_numValues; 518 519 TCU_CHECK(value0Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); 520 TCU_CHECK(value1Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); 521 522 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 523 524 gl.useProgram(program.getProgram()); 525 526 // Output buffer setup 527 { 528 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0); 529 gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW); 530 531 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0); 532 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); 533 } 534 { 535 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1); 536 gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW); 537 538 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1); 539 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); 540 } 541 542 // Dispatch compute workload 543 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 544 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 545 546 // Read back and compare 547 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0); 548 { 549 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT); 550 551 for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++) 552 { 553 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride*ndx)); 554 const deUint32 ref = ndx; 555 556 if (res != ref) 557 throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref)); 558 } 559 } 560 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1); 561 { 562 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT); 563 564 for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++) 565 { 566 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride*ndx)); 567 const deUint32 ref = m_numValues - ndx; 568 569 if (res != ref) 570 throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref)); 571 } 572 } 573 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 574 return STOP; 575 } 576 577 private: 578 const int m_numValues; 579 const bool m_isSized; 580 const tcu::IVec3 m_localSize; 581 const tcu::IVec3 m_workSize; 582 }; 583 584 class SSBOLocalBarrierCase : public TestCase 585 { 586 public: 587 SSBOLocalBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 588 : TestCase (context, name, description) 589 , m_localSize (localSize) 590 , m_workSize (workSize) 591 { 592 } 593 594 IterateResult iterate (void) 595 { 596 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 597 const Buffer outputBuffer (m_context.getRenderContext()); 598 const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; 599 const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; 600 const int numValues = workGroupSize*workGroupCount; 601 602 std::ostringstream src; 603 src << "#version 310 es\n" 604 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 605 << "layout(binding = 0) buffer Output {\n" 606 << " coherent uint values[" << numValues << "];\n" 607 << "} sb_out;\n\n" 608 << "shared uint offsets[" << workGroupSize << "];\n\n" 609 << "void main (void) {\n" 610 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" 611 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 612 << " uint globalOffs = localSize*globalNdx;\n" 613 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" 614 << "\n" 615 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n" 616 << " memoryBarrierBuffer();\n" 617 << " barrier();\n" 618 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" 619 << " memoryBarrierBuffer();\n" 620 << " barrier();\n" 621 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n" 622 << "}\n"; 623 624 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str())); 625 626 m_testCtx.getLog() << program; 627 if (!program.isOk()) 628 TCU_FAIL("Compile failed"); 629 630 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 631 632 gl.useProgram(program.getProgram()); 633 634 // Output buffer setup 635 { 636 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 637 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 638 639 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 640 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 641 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 642 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 643 } 644 645 // Dispatch compute workload 646 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 647 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 648 649 // Read back and compare 650 { 651 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 652 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 653 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 654 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 655 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 656 657 for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) 658 { 659 for (int localOffs = 0; localOffs < workGroupSize; localOffs++) 660 { 661 const int globalOffs = groupNdx*workGroupSize; 662 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); 663 const int offs0 = localOffs-1 < 0 ? ((localOffs+workGroupSize-1)%workGroupSize) : ((localOffs-1)%workGroupSize); 664 const int offs1 = localOffs-2 < 0 ? ((localOffs+workGroupSize-2)%workGroupSize) : ((localOffs-2)%workGroupSize); 665 const deUint32 ref = (deUint32)(globalOffs + offs0 + offs1); 666 667 if (res != ref) 668 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); 669 } 670 } 671 } 672 673 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 674 return STOP; 675 } 676 677 private: 678 const tcu::IVec3 m_localSize; 679 const tcu::IVec3 m_workSize; 680 }; 681 682 class SSBOBarrierCase : public TestCase 683 { 684 public: 685 SSBOBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& workSize) 686 : TestCase (context, name, description) 687 , m_workSize (workSize) 688 { 689 } 690 691 IterateResult iterate (void) 692 { 693 const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() << 694 ComputeSource("#version 310 es\n" 695 "layout (local_size_x = 1) in;\n" 696 "uniform uint u_baseVal;\n" 697 "layout(binding = 1) buffer Output {\n" 698 " uint values[];\n" 699 "};\n" 700 "void main (void) {\n" 701 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 702 " values[offset] = u_baseVal+offset;\n" 703 "}\n")); 704 const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() << 705 ComputeSource("#version 310 es\n" 706 "layout (local_size_x = 1) in;\n" 707 "uniform uint u_baseVal;\n" 708 "layout(binding = 1) buffer Input {\n" 709 " uint values[];\n" 710 "};\n" 711 "layout(binding = 0) buffer Output {\n" 712 " coherent uint sum;\n" 713 "};\n" 714 "void main (void) {\n" 715 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 716 " uint value = values[offset];\n" 717 " atomicAdd(sum, value);\n" 718 "}\n")); 719 720 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 721 const Buffer tempBuffer (m_context.getRenderContext()); 722 const Buffer outputBuffer (m_context.getRenderContext()); 723 const deUint32 baseValue = 127; 724 725 m_testCtx.getLog() << program0 << program1; 726 if (!program0.isOk() || !program1.isOk()) 727 TCU_FAIL("Compile failed"); 728 729 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 730 731 // Temp buffer setup 732 { 733 const deUint32 valueIndex = gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]"); 734 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 735 const deUint32 bufferSize = valueInfo.arrayStride*m_workSize[0]*m_workSize[1]*m_workSize[2]; 736 737 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer); 738 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW); 739 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer); 740 GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed"); 741 } 742 743 // Output buffer setup 744 { 745 const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 746 const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 747 748 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 749 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 750 751 { 752 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT); 753 deMemset(bufMap.getPtr(), 0, blockSize); 754 } 755 756 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 757 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 758 } 759 760 // Dispatch compute workload 761 gl.useProgram(program0.getProgram()); 762 gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue); 763 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 764 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 765 gl.useProgram(program1.getProgram()); 766 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 767 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands"); 768 769 // Read back and compare 770 { 771 const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 772 const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 773 const deUint32 valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum"); 774 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 775 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 776 777 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset)); 778 deUint32 ref = 0; 779 780 for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]*m_workSize[2]; ndx++) 781 ref += baseValue + (deUint32)ndx; 782 783 if (res != ref) 784 { 785 m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage; 786 throw tcu::TestError("Comparison failed"); 787 } 788 } 789 790 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 791 return STOP; 792 } 793 794 private: 795 const tcu::IVec3 m_workSize; 796 }; 797 798 class BasicSharedVarCase : public TestCase 799 { 800 public: 801 BasicSharedVarCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 802 : TestCase (context, name, description) 803 , m_localSize (localSize) 804 , m_workSize (workSize) 805 { 806 } 807 808 IterateResult iterate (void) 809 { 810 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 811 const Buffer outputBuffer (m_context.getRenderContext()); 812 const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; 813 const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; 814 const int numValues = workGroupSize*workGroupCount; 815 816 std::ostringstream src; 817 src << "#version 310 es\n" 818 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 819 << "layout(binding = 0) buffer Output {\n" 820 << " uint values[" << numValues << "];\n" 821 << "} sb_out;\n\n" 822 << "shared uint offsets[" << workGroupSize << "];\n\n" 823 << "void main (void) {\n" 824 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" 825 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 826 << " uint globalOffs = localSize*globalNdx;\n" 827 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" 828 << "\n" 829 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n" 830 << " barrier();\n" 831 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n" 832 << "}\n"; 833 834 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 835 836 m_testCtx.getLog() << program; 837 if (!program.isOk()) 838 TCU_FAIL("Compile failed"); 839 840 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 841 842 gl.useProgram(program.getProgram()); 843 844 // Output buffer setup 845 { 846 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 847 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 848 849 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 850 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 851 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 852 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 853 } 854 855 // Dispatch compute workload 856 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 857 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 858 859 // Read back and compare 860 { 861 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 862 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 863 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 864 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 865 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 866 867 for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) 868 { 869 for (int localOffs = 0; localOffs < workGroupSize; localOffs++) 870 { 871 const int globalOffs = groupNdx*workGroupSize; 872 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); 873 const deUint32 ref = (deUint32)(globalOffs + (workGroupSize-localOffs-1)*(workGroupSize-localOffs-1)); 874 875 if (res != ref) 876 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); 877 } 878 } 879 } 880 881 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 882 return STOP; 883 } 884 885 private: 886 const tcu::IVec3 m_localSize; 887 const tcu::IVec3 m_workSize; 888 }; 889 890 class SharedVarAtomicOpCase : public TestCase 891 { 892 public: 893 SharedVarAtomicOpCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 894 : TestCase (context, name, description) 895 , m_localSize (localSize) 896 , m_workSize (workSize) 897 { 898 } 899 900 IterateResult iterate (void) 901 { 902 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 903 const Buffer outputBuffer (m_context.getRenderContext()); 904 const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; 905 const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; 906 const int numValues = workGroupSize*workGroupCount; 907 908 std::ostringstream src; 909 src << "#version 310 es\n" 910 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 911 << "layout(binding = 0) buffer Output {\n" 912 << " uint values[" << numValues << "];\n" 913 << "} sb_out;\n\n" 914 << "shared uint count;\n\n" 915 << "void main (void) {\n" 916 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" 917 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 918 << " uint globalOffs = localSize*globalNdx;\n" 919 << "\n" 920 << " count = 0u;\n" 921 << " barrier();\n" 922 << " uint oldVal = atomicAdd(count, 1u);\n" 923 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n" 924 << "}\n"; 925 926 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 927 928 m_testCtx.getLog() << program; 929 if (!program.isOk()) 930 TCU_FAIL("Compile failed"); 931 932 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 933 934 gl.useProgram(program.getProgram()); 935 936 // Output buffer setup 937 { 938 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 939 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 940 941 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 942 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 943 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 944 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 945 } 946 947 // Dispatch compute workload 948 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 949 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 950 951 // Read back and compare 952 { 953 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 954 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 955 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 956 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 957 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 958 959 for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) 960 { 961 for (int localOffs = 0; localOffs < workGroupSize; localOffs++) 962 { 963 const int globalOffs = groupNdx*workGroupSize; 964 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); 965 const deUint32 ref = (deUint32)(localOffs+1); 966 967 if (res != ref) 968 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); 969 } 970 } 971 } 972 973 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 974 return STOP; 975 } 976 977 private: 978 const tcu::IVec3 m_localSize; 979 const tcu::IVec3 m_workSize; 980 }; 981 982 class CopyImageToSSBOCase : public TestCase 983 { 984 public: 985 CopyImageToSSBOCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize) 986 : TestCase (context, name, description) 987 , m_localSize (localSize) 988 , m_imageSize (imageSize) 989 { 990 DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0); 991 DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0); 992 } 993 994 IterateResult iterate (void) 995 { 996 997 std::ostringstream src; 998 src << "#version 310 es\n" 999 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n" 1000 << "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n" 1001 << "layout(binding = 0) buffer Output {\n" 1002 << " uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n" 1003 << "} sb_out;\n\n" 1004 << "void main (void) {\n" 1005 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" 1006 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n" 1007 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n" 1008 << "}\n"; 1009 1010 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1011 const Buffer outputBuffer (m_context.getRenderContext()); 1012 const Texture inputTexture (m_context.getRenderContext()); 1013 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 1014 const tcu::IVec2 workSize = m_imageSize / m_localSize; 1015 de::Random rnd (0xab2c7); 1016 vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]); 1017 1018 m_testCtx.getLog() << program; 1019 if (!program.isOk()) 1020 TCU_FAIL("Compile failed"); 1021 1022 m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage; 1023 1024 gl.useProgram(program.getProgram()); 1025 1026 // Input values 1027 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) 1028 *i = rnd.getUint32(); 1029 1030 // Input image setup 1031 gl.bindTexture(GL_TEXTURE_2D, *inputTexture); 1032 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); 1033 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]); 1034 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1035 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1036 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); 1037 1038 // Bind to unit 1 1039 gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI); 1040 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); 1041 1042 // Output buffer setup 1043 { 1044 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1045 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1046 1047 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 1048 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 1049 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 1050 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 1051 } 1052 1053 // Dispatch compute workload 1054 gl.dispatchCompute(workSize[0], workSize[1], 1); 1055 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1056 1057 // Read back and compare 1058 { 1059 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1060 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1061 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 1062 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1063 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 1064 1065 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 1066 1067 for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) 1068 { 1069 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); 1070 const deUint32 ref = inputValues[ndx]; 1071 1072 if (res != ref) 1073 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); 1074 } 1075 } 1076 1077 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1078 return STOP; 1079 } 1080 1081 private: 1082 const tcu::IVec2 m_localSize; 1083 const tcu::IVec2 m_imageSize; 1084 }; 1085 1086 class CopySSBOToImageCase : public TestCase 1087 { 1088 public: 1089 CopySSBOToImageCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize) 1090 : TestCase (context, name, description) 1091 , m_localSize (localSize) 1092 , m_imageSize (imageSize) 1093 { 1094 DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0); 1095 DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0); 1096 } 1097 1098 IterateResult iterate (void) 1099 { 1100 1101 std::ostringstream src; 1102 src << "#version 310 es\n" 1103 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n" 1104 << "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n" 1105 << "buffer Input {\n" 1106 << " uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n" 1107 << "} sb_in;\n\n" 1108 << "void main (void) {\n" 1109 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" 1110 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n" 1111 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n" 1112 << "}\n"; 1113 1114 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1115 const Buffer inputBuffer (m_context.getRenderContext()); 1116 const Texture outputTexture (m_context.getRenderContext()); 1117 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 1118 const tcu::IVec2 workSize = m_imageSize / m_localSize; 1119 de::Random rnd (0x77238ac2); 1120 vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]); 1121 1122 m_testCtx.getLog() << program; 1123 if (!program.isOk()) 1124 TCU_FAIL("Compile failed"); 1125 1126 m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage; 1127 1128 gl.useProgram(program.getProgram()); 1129 1130 // Input values 1131 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) 1132 *i = rnd.getUint32(); 1133 1134 // Input buffer setup 1135 { 1136 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); 1137 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); 1138 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); 1139 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1140 1141 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); 1142 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); 1143 1144 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 1145 1146 { 1147 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); 1148 1149 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 1150 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 1151 } 1152 1153 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); 1154 GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); 1155 } 1156 1157 // Output image setup 1158 gl.bindTexture(GL_TEXTURE_2D, *outputTexture); 1159 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); 1160 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1161 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1162 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); 1163 1164 // Bind to unit 1 1165 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI); 1166 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); 1167 1168 // Dispatch compute workload 1169 gl.dispatchCompute(workSize[0], workSize[1], 1); 1170 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1171 1172 // Read back and compare 1173 { 1174 Framebuffer fbo (m_context.getRenderContext()); 1175 vector<deUint32> pixels (inputValues.size()*4); 1176 1177 gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo); 1178 gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0); 1179 TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); 1180 1181 // \note In ES3 we have to use GL_RGBA_INTEGER 1182 gl.readBuffer(GL_COLOR_ATTACHMENT0); 1183 gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]); 1184 GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed"); 1185 1186 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 1187 { 1188 const deUint32 res = pixels[ndx*4]; 1189 const deUint32 ref = inputValues[ndx]; 1190 1191 if (res != ref) 1192 throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx)); 1193 } 1194 } 1195 1196 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1197 return STOP; 1198 } 1199 1200 private: 1201 const tcu::IVec2 m_localSize; 1202 const tcu::IVec2 m_imageSize; 1203 }; 1204 1205 class ImageAtomicOpCase : public TestCase 1206 { 1207 public: 1208 ImageAtomicOpCase (Context& context, const char* name, const char* description, int localSize, const tcu::IVec2& imageSize) 1209 : TestCase (context, name, description) 1210 , m_localSize (localSize) 1211 , m_imageSize (imageSize) 1212 { 1213 } 1214 1215 void init (void) 1216 { 1217 if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic")) 1218 throw tcu::NotSupportedError("Test requires OES_shader_image_atomic extension"); 1219 } 1220 1221 IterateResult iterate (void) 1222 { 1223 1224 std::ostringstream src; 1225 src << "#version 310 es\n" 1226 << "#extension GL_OES_shader_image_atomic : require\n" 1227 << "layout (local_size_x = " << m_localSize << ") in;\n" 1228 << "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n" 1229 << "buffer Input {\n" 1230 << " uint values[" << (m_imageSize[0]*m_imageSize[1]*m_localSize) << "];\n" 1231 << "} sb_in;\n\n" 1232 << "void main (void) {\n" 1233 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" 1234 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n" 1235 << "\n" 1236 << " if (gl_LocalInvocationIndex == 0u)\n" 1237 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n" 1238 << " barrier();\n" 1239 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n" 1240 << "}\n"; 1241 1242 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1243 const Buffer inputBuffer (m_context.getRenderContext()); 1244 const Texture outputTexture (m_context.getRenderContext()); 1245 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 1246 de::Random rnd (0x77238ac2); 1247 vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]*m_localSize); 1248 1249 m_testCtx.getLog() << program; 1250 if (!program.isOk()) 1251 TCU_FAIL("Compile failed"); 1252 1253 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage; 1254 1255 gl.useProgram(program.getProgram()); 1256 1257 // Input values 1258 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) 1259 *i = rnd.getUint32(); 1260 1261 // Input buffer setup 1262 { 1263 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); 1264 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); 1265 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); 1266 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1267 1268 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); 1269 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); 1270 1271 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 1272 1273 { 1274 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); 1275 1276 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 1277 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 1278 } 1279 1280 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); 1281 GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); 1282 } 1283 1284 // Output image setup 1285 gl.bindTexture(GL_TEXTURE_2D, *outputTexture); 1286 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); 1287 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1288 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1289 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); 1290 1291 // Bind to unit 1 1292 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); 1293 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); 1294 1295 // Dispatch compute workload 1296 gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1); 1297 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1298 1299 // Read back and compare 1300 { 1301 Framebuffer fbo (m_context.getRenderContext()); 1302 vector<deUint32> pixels (m_imageSize[0]*m_imageSize[1]*4); 1303 1304 gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo); 1305 gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0); 1306 TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); 1307 1308 // \note In ES3 we have to use GL_RGBA_INTEGER 1309 gl.readBuffer(GL_COLOR_ATTACHMENT0); 1310 gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]); 1311 GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed"); 1312 1313 for (int pixelNdx = 0; pixelNdx < (int)inputValues.size()/m_localSize; pixelNdx++) 1314 { 1315 const deUint32 res = pixels[pixelNdx*4]; 1316 deUint32 ref = 0; 1317 1318 for (int offs = 0; offs < m_localSize; offs++) 1319 ref += inputValues[pixelNdx*m_localSize + offs]; 1320 1321 if (res != ref) 1322 throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx)); 1323 } 1324 } 1325 1326 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1327 return STOP; 1328 } 1329 1330 private: 1331 const int m_localSize; 1332 const tcu::IVec2 m_imageSize; 1333 }; 1334 1335 class ImageBarrierCase : public TestCase 1336 { 1337 public: 1338 ImageBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec2& workSize) 1339 : TestCase (context, name, description) 1340 , m_workSize (workSize) 1341 { 1342 } 1343 1344 IterateResult iterate (void) 1345 { 1346 const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() << 1347 ComputeSource("#version 310 es\n" 1348 "layout (local_size_x = 1) in;\n" 1349 "uniform uint u_baseVal;\n" 1350 "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n" 1351 "void main (void) {\n" 1352 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 1353 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n" 1354 "}\n")); 1355 const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() << 1356 ComputeSource("#version 310 es\n" 1357 "layout (local_size_x = 1) in;\n" 1358 "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n" 1359 "layout(binding = 0) buffer Output {\n" 1360 " coherent uint sum;\n" 1361 "};\n" 1362 "void main (void) {\n" 1363 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n" 1364 " atomicAdd(sum, value);\n" 1365 "}\n")); 1366 1367 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1368 const Texture tempTexture (m_context.getRenderContext()); 1369 const Buffer outputBuffer (m_context.getRenderContext()); 1370 const deUint32 baseValue = 127; 1371 1372 m_testCtx.getLog() << program0 << program1; 1373 if (!program0.isOk() || !program1.isOk()) 1374 TCU_FAIL("Compile failed"); 1375 1376 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 1377 1378 // Temp texture setup 1379 gl.bindTexture(GL_TEXTURE_2D, *tempTexture); 1380 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]); 1381 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1382 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1383 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); 1384 1385 // Bind to unit 2 1386 gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); 1387 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); 1388 1389 // Output buffer setup 1390 { 1391 const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1392 const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1393 1394 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 1395 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 1396 1397 { 1398 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT); 1399 deMemset(bufMap.getPtr(), 0, blockSize); 1400 } 1401 1402 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 1403 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 1404 } 1405 1406 // Dispatch compute workload 1407 gl.useProgram(program0.getProgram()); 1408 gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue); 1409 gl.dispatchCompute(m_workSize[0], m_workSize[1], 1); 1410 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); 1411 gl.useProgram(program1.getProgram()); 1412 gl.dispatchCompute(m_workSize[0], m_workSize[1], 1); 1413 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands"); 1414 1415 // Read back and compare 1416 { 1417 const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1418 const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1419 const deUint32 valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum"); 1420 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1421 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 1422 1423 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset)); 1424 deUint32 ref = 0; 1425 1426 for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]; ndx++) 1427 ref += baseValue + (deUint32)ndx; 1428 1429 if (res != ref) 1430 { 1431 m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage; 1432 throw tcu::TestError("Comparison failed"); 1433 } 1434 } 1435 1436 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1437 return STOP; 1438 } 1439 1440 private: 1441 const tcu::IVec2 m_workSize; 1442 }; 1443 1444 class AtomicCounterCase : public TestCase 1445 { 1446 public: 1447 AtomicCounterCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 1448 : TestCase (context, name, description) 1449 , m_localSize (localSize) 1450 , m_workSize (workSize) 1451 { 1452 } 1453 1454 IterateResult iterate (void) 1455 { 1456 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1457 const Buffer outputBuffer (m_context.getRenderContext()); 1458 const Buffer counterBuffer (m_context.getRenderContext()); 1459 const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; 1460 const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; 1461 const int numValues = workGroupSize*workGroupCount; 1462 1463 std::ostringstream src; 1464 src << "#version 310 es\n" 1465 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 1466 << "layout(binding = 0) buffer Output {\n" 1467 << " uint values[" << numValues << "];\n" 1468 << "} sb_out;\n\n" 1469 << "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n" 1470 << "void main (void) {\n" 1471 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" 1472 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 1473 << " uint globalOffs = localSize*globalNdx;\n" 1474 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" 1475 << "\n" 1476 << " uint oldVal = atomicCounterIncrement(u_count);\n" 1477 << " sb_out.values[globalOffs+localOffs] = oldVal;\n" 1478 << "}\n"; 1479 1480 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str())); 1481 1482 m_testCtx.getLog() << program; 1483 if (!program.isOk()) 1484 TCU_FAIL("Compile failed"); 1485 1486 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 1487 1488 gl.useProgram(program.getProgram()); 1489 1490 // Atomic counter buffer setup 1491 { 1492 const deUint32 uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count"); 1493 const deUint32 bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX); 1494 const deUint32 bufferSize = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE); 1495 1496 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer); 1497 gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ); 1498 1499 { 1500 const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT); 1501 deMemset(memMap.getPtr(), 0, (int)bufferSize); 1502 } 1503 1504 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer); 1505 GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed"); 1506 } 1507 1508 // Output buffer setup 1509 { 1510 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1511 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1512 1513 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 1514 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 1515 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 1516 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 1517 } 1518 1519 // Dispatch compute workload 1520 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 1521 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1522 1523 // Read back and compare atomic counter 1524 { 1525 const deUint32 uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count"); 1526 const deUint32 uniformOffset = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET); 1527 const deUint32 bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX); 1528 const deUint32 bufferSize = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE); 1529 const BufferMemMap bufMap (gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT); 1530 1531 const deUint32 resVal = *((const deUint32*)((const deUint8*)bufMap.getPtr() + uniformOffset)); 1532 1533 if (resVal != (deUint32)numValues) 1534 throw tcu::TestError("Invalid atomic counter value"); 1535 } 1536 1537 // Read back and compare SSBO 1538 { 1539 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1540 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1541 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 1542 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1543 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 1544 deUint32 valSum = 0; 1545 deUint32 refSum = 0; 1546 1547 for (int valNdx = 0; valNdx < numValues; valNdx++) 1548 { 1549 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*valNdx)); 1550 1551 valSum += res; 1552 refSum += (deUint32)valNdx; 1553 1554 if (!de::inBounds<deUint32>(res, 0, (deUint32)numValues)) 1555 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]"); 1556 } 1557 1558 if (valSum != refSum) 1559 throw tcu::TestError("Total sum of values in Output.values doesn't match"); 1560 } 1561 1562 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1563 return STOP; 1564 } 1565 1566 private: 1567 const tcu::IVec3 m_localSize; 1568 const tcu::IVec3 m_workSize; 1569 }; 1570 1571 } // anonymous 1572 1573 BasicComputeShaderTests::BasicComputeShaderTests (Context& context) 1574 : TestCaseGroup(context, "basic", "Basic Compute Shader Tests") 1575 { 1576 } 1577 1578 BasicComputeShaderTests::~BasicComputeShaderTests (void) 1579 { 1580 } 1581 1582 void BasicComputeShaderTests::init (void) 1583 { 1584 addChild(new EmptyComputeShaderCase(m_context)); 1585 1586 addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_single_invocation", "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1587 addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(2,1,4), tcu::IVec3(1,1,1))); 1588 addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_multiple_invocations", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1))); 1589 addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1590 1591 addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1592 addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1))); 1593 addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1594 1595 addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1596 addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1597 1598 addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1599 addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1600 1601 addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation", "Write to multiple SSBOs", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1602 addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1603 1604 addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1605 addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups", "Write to multiple SSBOs", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1606 1607 addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1608 addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_single_group", "SSBO local barrier usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); 1609 addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); 1610 1611 addChild(new SSBOBarrierCase (m_context, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1,1,1))); 1612 addChild(new SSBOBarrierCase (m_context, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11,5,7))); 1613 1614 addChild(new BasicSharedVarCase (m_context, "shared_var_single_invocation", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1615 addChild(new BasicSharedVarCase (m_context, "shared_var_single_group", "Basic shared variable usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); 1616 addChild(new BasicSharedVarCase (m_context, "shared_var_multiple_invocations", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); 1617 addChild(new BasicSharedVarCase (m_context, "shared_var_multiple_groups", "Basic shared variable usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); 1618 1619 addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_single_invocation", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1620 addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_single_group", "Atomic operation with shared var", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); 1621 addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_multiple_invocations", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); 1622 addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_multiple_groups", "Atomic operation with shared var", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); 1623 1624 addChild(new CopyImageToSSBOCase (m_context, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1,1), tcu::IVec2(64,64))); 1625 addChild(new CopyImageToSSBOCase (m_context, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2,4), tcu::IVec2(512,512))); 1626 1627 addChild(new CopySSBOToImageCase (m_context, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1,1), tcu::IVec2(64,64))); 1628 addChild(new CopySSBOToImageCase (m_context, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2,4), tcu::IVec2(512,512))); 1629 1630 addChild(new ImageAtomicOpCase (m_context, "image_atomic_op_local_size_1", "Atomic operation with image", 1, tcu::IVec2(64,64))); 1631 addChild(new ImageAtomicOpCase (m_context, "image_atomic_op_local_size_8", "Atomic operation with image", 8, tcu::IVec2(64,64))); 1632 1633 addChild(new ImageBarrierCase (m_context, "image_barrier_single", "Image barrier", tcu::IVec2(1,1))); 1634 addChild(new ImageBarrierCase (m_context, "image_barrier_multiple", "Image barrier", tcu::IVec2(64,64))); 1635 1636 addChild(new AtomicCounterCase (m_context, "atomic_counter_single_invocation", "Basic atomic counter test", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1637 addChild(new AtomicCounterCase (m_context, "atomic_counter_single_group", "Basic atomic counter test", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); 1638 addChild(new AtomicCounterCase (m_context, "atomic_counter_multiple_invocations", "Basic atomic counter test", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); 1639 addChild(new AtomicCounterCase (m_context, "atomic_counter_multiple_groups", "Basic atomic counter test", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); 1640 } 1641 1642 } // Functional 1643 } // gles31 1644 } // deqp 1645