1 /*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL ES 3.1 Module 3 * ------------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Indirect compute dispatch tests. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "es31fIndirectComputeDispatchTests.hpp" 25 #include "gluObjectWrapper.hpp" 26 #include "gluRenderContext.hpp" 27 #include "gluShaderProgram.hpp" 28 #include "glwFunctions.hpp" 29 #include "glwEnums.hpp" 30 #include "tcuVector.hpp" 31 #include "tcuStringTemplate.hpp" 32 #include "tcuTestLog.hpp" 33 #include "deStringUtil.hpp" 34 35 #include <vector> 36 #include <string> 37 #include <map> 38 39 namespace deqp 40 { 41 namespace gles31 42 { 43 namespace Functional 44 { 45 46 using tcu::UVec3; 47 using tcu::TestLog; 48 using std::vector; 49 using std::string; 50 using std::map; 51 52 // \todo [2014-02-17 pyry] Should be extended with following: 53 54 // Negative: 55 // - no active shader program 56 // - indirect negative or not aligned 57 // - indirect + size outside buffer bounds 58 // - no buffer bound to DRAW_INDIRECT_BUFFER 59 // - (implict) buffer mapped 60 61 // Robustness: 62 // - lot of small work group launches 63 // - very large work group size 64 // - no synchronization, touched by gpu 65 // - compute program overwiting buffer 66 67 namespace 68 { 69 70 enum 71 { 72 RESULT_BLOCK_BASE_SIZE = (3+1)*(int)sizeof(deUint32), // uvec3 + uint 73 RESULT_BLOCK_EXPECTED_COUNT_OFFSET = 0, 74 RESULT_BLOCK_NUM_PASSED_OFFSET = 3*(int)sizeof(deUint32), 75 76 INDIRECT_COMMAND_SIZE = 3*(int)sizeof(deUint32) 77 }; 78 79 enum GenBuffer 80 { 81 GEN_BUFFER_UPLOAD = 0, 82 GEN_BUFFER_COMPUTE, 83 84 GEN_BUFFER_LAST 85 }; 86 87 glu::ProgramSources genVerifySources (const UVec3& workGroupSize) 88 { 89 static const char* s_verifyDispatchTmpl = 90 "#version 310 es\n" 91 "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n" 92 "layout(binding = 0, std430) buffer Result\n" 93 "{\n" 94 " uvec3 expectedGroupCount;\n" 95 " coherent uint numPassed;\n" 96 "} result;\n" 97 "void main (void)\n" 98 "{\n" 99 " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n" 100 " atomicAdd(result.numPassed, 1u);\n" 101 "}\n"; 102 103 map<string, string> args; 104 105 args["LOCAL_SIZE_X"] = de::toString(workGroupSize.x()); 106 args["LOCAL_SIZE_Y"] = de::toString(workGroupSize.y()); 107 args["LOCAL_SIZE_Z"] = de::toString(workGroupSize.z()); 108 109 return glu::ProgramSources() << glu::ComputeSource(tcu::StringTemplate(s_verifyDispatchTmpl).specialize(args)); 110 } 111 112 class IndirectDispatchCase : public TestCase 113 { 114 public: 115 IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer); 116 ~IndirectDispatchCase (void); 117 118 IterateResult iterate (void); 119 120 protected: 121 struct DispatchCommand 122 { 123 deIntptr offset; 124 UVec3 numWorkGroups; 125 126 DispatchCommand (void) : offset(0) {} 127 DispatchCommand (deIntptr offset_, const UVec3& numWorkGroups_) : offset(offset_), numWorkGroups(numWorkGroups_) {} 128 }; 129 130 GenBuffer m_genBuffer; 131 deUintptr m_bufferSize; 132 UVec3 m_workGroupSize; 133 vector<DispatchCommand> m_commands; 134 135 void createCommandBuffer (deUint32 buffer) const; 136 void createResultBuffer (deUint32 buffer) const; 137 138 bool verifyResultBuffer (deUint32 buffer); 139 140 void createCmdBufferUpload (deUint32 buffer) const; 141 void createCmdBufferCompute (deUint32 buffer) const; 142 143 private: 144 IndirectDispatchCase (const IndirectDispatchCase&); 145 IndirectDispatchCase& operator= (const IndirectDispatchCase&); 146 }; 147 148 IndirectDispatchCase::IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer) 149 : TestCase (context, name, description) 150 , m_genBuffer (genBuffer) 151 , m_bufferSize (0) 152 { 153 } 154 155 IndirectDispatchCase::~IndirectDispatchCase (void) 156 { 157 } 158 159 static int getResultBlockAlignedSize (const glw::Functions& gl) 160 { 161 const int baseSize = RESULT_BLOCK_BASE_SIZE; 162 int alignment = 0; 163 gl.getIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &alignment); 164 165 if (alignment == 0 || (baseSize % alignment == 0)) 166 return baseSize; 167 else 168 return (baseSize/alignment + 1)*alignment; 169 } 170 171 void IndirectDispatchCase::createCommandBuffer (deUint32 buffer) const 172 { 173 switch (m_genBuffer) 174 { 175 case GEN_BUFFER_UPLOAD: createCmdBufferUpload (buffer); break; 176 case GEN_BUFFER_COMPUTE: createCmdBufferCompute (buffer); break; 177 default: 178 DE_ASSERT(false); 179 } 180 } 181 182 void IndirectDispatchCase::createCmdBufferUpload (deUint32 buffer) const 183 { 184 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 185 vector<deUint8> data (m_bufferSize); 186 187 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) 188 { 189 DE_STATIC_ASSERT(INDIRECT_COMMAND_SIZE >= sizeof(deUint32)*3); 190 DE_ASSERT(cmdIter->offset >= 0); 191 DE_ASSERT(cmdIter->offset%sizeof(deUint32) == 0); 192 DE_ASSERT(cmdIter->offset + INDIRECT_COMMAND_SIZE <= (deIntptr)m_bufferSize); 193 194 deUint32* const dstPtr = (deUint32*)&data[cmdIter->offset]; 195 196 dstPtr[0] = cmdIter->numWorkGroups[0]; 197 dstPtr[1] = cmdIter->numWorkGroups[1]; 198 dstPtr[2] = cmdIter->numWorkGroups[2]; 199 } 200 201 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer); 202 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)data.size(), &data[0], GL_STATIC_DRAW); 203 } 204 205 void IndirectDispatchCase::createCmdBufferCompute (deUint32 buffer) const 206 { 207 std::ostringstream src; 208 209 // Header 210 src << 211 "#version 310 es\n" 212 "layout(local_size_x = 1) in;\n" 213 "layout(std430, binding = 1) buffer Out\n" 214 "{\n" 215 " highp uint data[];\n" 216 "};\n" 217 "void writeCmd (uint offset, uvec3 numWorkGroups)\n" 218 "{\n" 219 " data[offset+0u] = numWorkGroups.x;\n" 220 " data[offset+1u] = numWorkGroups.y;\n" 221 " data[offset+2u] = numWorkGroups.z;\n" 222 "}\n" 223 "void main (void)\n" 224 "{\n"; 225 226 // Commands 227 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) 228 { 229 const deUint32 offs = (deUint32)(cmdIter->offset/4); 230 DE_ASSERT((deIntptr)offs*4 == cmdIter->offset); 231 232 src << "\twriteCmd(" << offs << "u, uvec3(" 233 << cmdIter->numWorkGroups.x() << "u, " 234 << cmdIter->numWorkGroups.y() << "u, " 235 << cmdIter->numWorkGroups.z() << "u));\n"; 236 } 237 238 src << "}\n"; 239 240 { 241 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 242 glu::ShaderProgram program (m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(src.str())); 243 244 m_testCtx.getLog() << program; 245 if (!program.isOk()) 246 TCU_FAIL("Compile failed"); 247 248 gl.useProgram(program.getProgram()); 249 250 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer); 251 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)m_bufferSize, DE_NULL, GL_STATIC_DRAW); 252 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer); 253 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); 254 255 gl.dispatchCompute(1,1,1); 256 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute() failed"); 257 258 gl.memoryBarrier(GL_COMMAND_BARRIER_BIT); 259 GLU_EXPECT_NO_ERROR(gl.getError(), "glMemoryBarrier(GL_COMMAND_BARRIER_BIT) failed"); 260 } 261 } 262 263 void IndirectDispatchCase::createResultBuffer (deUint32 buffer) const 264 { 265 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 266 const int resultBlockSize = getResultBlockAlignedSize(gl); 267 const int resultBufferSize = resultBlockSize*(int)m_commands.size(); 268 vector<deUint8> data (resultBufferSize); 269 270 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) 271 { 272 deUint8* const dstPtr = &data[resultBlockSize*cmdNdx]; 273 274 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 0*4) = m_commands[cmdNdx].numWorkGroups[0]; 275 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 1*4) = m_commands[cmdNdx].numWorkGroups[1]; 276 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 2*4) = m_commands[cmdNdx].numWorkGroups[2]; 277 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0; 278 } 279 280 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); 281 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizei)data.size(), &data[0], GL_STATIC_READ); 282 } 283 284 deUint32 computeInvocationCount (const UVec3& workGroupSize, const UVec3& numWorkGroups) 285 { 286 const int numInvocationsPerGroup = workGroupSize[0]*workGroupSize[1]*workGroupSize[2]; 287 const int numGroups = numWorkGroups[0]*numWorkGroups[1]*numWorkGroups[2]; 288 289 return numInvocationsPerGroup*numGroups; 290 } 291 292 bool IndirectDispatchCase::verifyResultBuffer (deUint32 buffer) 293 { 294 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 295 296 const int resultBlockSize = getResultBlockAlignedSize(gl); 297 const int resultBufferSize = resultBlockSize*(int)m_commands.size(); 298 299 void* mapPtr = DE_NULL; 300 bool allOk = true; 301 302 try 303 { 304 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); 305 mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, resultBufferSize, GL_MAP_READ_BIT); 306 307 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange() failed"); 308 TCU_CHECK(mapPtr); 309 310 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) 311 { 312 const DispatchCommand& cmd = m_commands[cmdNdx]; 313 const deUint8* const srcPtr = (const deUint8*)mapPtr + cmdNdx*resultBlockSize; 314 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET); 315 const deUint32 expectedCount = computeInvocationCount(m_workGroupSize, cmd.numWorkGroups); 316 317 // Verify numPassed. 318 if (numPassed != expectedCount) 319 { 320 m_testCtx.getLog() << TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx 321 << ": got numPassed = " << numPassed << ", expected " << expectedCount 322 << TestLog::EndMessage; 323 allOk = false; 324 } 325 } 326 } 327 catch (...) 328 { 329 if (mapPtr) 330 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); 331 } 332 333 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); 334 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer() failed"); 335 336 return allOk; 337 } 338 339 IndirectDispatchCase::IterateResult IndirectDispatchCase::iterate (void) 340 { 341 const glu::RenderContext& renderCtx = m_context.getRenderContext(); 342 const glw::Functions& gl = renderCtx.getFunctions(); 343 344 const glu::ShaderProgram program (renderCtx, genVerifySources(m_workGroupSize)); 345 346 glu::Buffer cmdBuffer (renderCtx); 347 glu::Buffer resultBuffer (renderCtx); 348 349 m_testCtx.getLog() << program; 350 TCU_CHECK_MSG(program.isOk(), "Compile failed"); 351 352 m_testCtx.getLog() << TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << TestLog::EndMessage; 353 { 354 tcu::ScopedLogSection section(m_testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_commands.size()) + " in total)"); 355 356 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) 357 m_testCtx.getLog() << TestLog::Message << cmdNdx << ": " << "offset = " << m_commands[cmdNdx].offset 358 << ", numWorkGroups = " << m_commands[cmdNdx].numWorkGroups 359 << TestLog::EndMessage; 360 } 361 362 createResultBuffer(*resultBuffer); 363 createCommandBuffer(*cmdBuffer); 364 365 gl.useProgram(program.getProgram()); 366 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, *cmdBuffer); 367 GLU_EXPECT_NO_ERROR(gl.getError(), "State setup failed"); 368 369 { 370 const int resultBlockAlignedSize = getResultBlockAlignedSize(gl); 371 deIntptr curOffset = 0; 372 373 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) 374 { 375 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, *resultBuffer, (glw::GLintptr)curOffset, resultBlockAlignedSize); 376 gl.dispatchComputeIndirect((glw::GLintptr)cmdIter->offset); 377 378 curOffset += resultBlockAlignedSize; 379 } 380 } 381 382 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchComputeIndirect() failed"); 383 384 if (verifyResultBuffer(*resultBuffer)) 385 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 386 else 387 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid values in result buffer"); 388 389 return STOP; 390 } 391 392 class SingleDispatchCase : public IndirectDispatchCase 393 { 394 public: 395 SingleDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer, deUintptr bufferSize, deUintptr offset, const UVec3& workGroupSize, const UVec3& numWorkGroups) 396 : IndirectDispatchCase(context, name, description, genBuffer) 397 { 398 m_bufferSize = bufferSize; 399 m_workGroupSize = workGroupSize; 400 m_commands.push_back(DispatchCommand(offset, numWorkGroups)); 401 } 402 }; 403 404 class MultiDispatchCase : public IndirectDispatchCase 405 { 406 public: 407 MultiDispatchCase (Context& context, GenBuffer genBuffer) 408 : IndirectDispatchCase(context, "multi_dispatch", "Dispatch multiple compute commands from single buffer", genBuffer) 409 { 410 m_bufferSize = 1<<10; 411 m_workGroupSize = UVec3(3,1,2); 412 413 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); 414 m_commands.push_back(DispatchCommand(INDIRECT_COMMAND_SIZE, UVec3(2,1,1))); 415 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); 416 m_commands.push_back(DispatchCommand(40, UVec3(1,1,7))); 417 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); 418 } 419 }; 420 421 class MultiDispatchReuseCommandCase : public IndirectDispatchCase 422 { 423 public: 424 MultiDispatchReuseCommandCase (Context& context, GenBuffer genBuffer) 425 : IndirectDispatchCase(context, "multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", genBuffer) 426 { 427 m_bufferSize = 1<<10; 428 m_workGroupSize = UVec3(3,1,2); 429 430 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); 431 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); 432 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); 433 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); 434 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); 435 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); 436 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); 437 } 438 }; 439 440 } // anonymous 441 442 IndirectComputeDispatchTests::IndirectComputeDispatchTests (Context& context) 443 : TestCaseGroup(context, "indirect_dispatch", "Indirect dispatch tests") 444 { 445 } 446 447 IndirectComputeDispatchTests::~IndirectComputeDispatchTests (void) 448 { 449 } 450 451 void IndirectComputeDispatchTests::init (void) 452 { 453 static const struct 454 { 455 const char* name; 456 GenBuffer gen; 457 } s_genBuffer[] = 458 { 459 { "upload_buffer", GEN_BUFFER_UPLOAD }, 460 { "gen_in_compute", GEN_BUFFER_COMPUTE } 461 }; 462 463 static const struct 464 { 465 const char* name; 466 const char* description; 467 deUintptr bufferSize; 468 deUintptr offset; 469 UVec3 workGroupSize; 470 UVec3 numWorkGroups; 471 } s_singleDispatchCases[] = 472 { 473 // Name Desc BufferSize Offs WorkGroupSize NumWorkGroups 474 { "single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(1,1,1) }, 475 { "multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(2,3,5) }, 476 { "multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(2,3,1), UVec3(1,2,3) }, 477 { "small_offset", "Small offset", 16+INDIRECT_COMMAND_SIZE, 16, UVec3(1,1,1), UVec3(1,1,1) }, 478 { "large_offset", "Large offset", (2<<20), (1<<20) + 12, UVec3(1,1,1), UVec3(1,1,1) }, 479 { "large_offset_multiple_invocations", "Large offset, multiple invocations", (2<<20), (1<<20) + 12, UVec3(2,3,1), UVec3(1,2,3) }, 480 { "empty_command", "Empty command", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(0,0,0) }, 481 }; 482 483 for (int genNdx = 0; genNdx < DE_LENGTH_OF_ARRAY(s_genBuffer); genNdx++) 484 { 485 const GenBuffer genBuf = s_genBuffer[genNdx].gen; 486 tcu::TestCaseGroup* const genGroup = new tcu::TestCaseGroup(m_testCtx, s_genBuffer[genNdx].name, ""); 487 addChild(genGroup); 488 489 for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_singleDispatchCases); ndx++) 490 genGroup->addChild(new SingleDispatchCase(m_context, 491 s_singleDispatchCases[ndx].name, 492 s_singleDispatchCases[ndx].description, 493 genBuf, 494 s_singleDispatchCases[ndx].bufferSize, 495 s_singleDispatchCases[ndx].offset, 496 s_singleDispatchCases[ndx].workGroupSize, 497 s_singleDispatchCases[ndx].numWorkGroups)); 498 499 genGroup->addChild(new MultiDispatchCase (m_context, genBuf)); 500 genGroup->addChild(new MultiDispatchReuseCommandCase (m_context, genBuf)); 501 } 502 } 503 504 } // Functional 505 } // gles31 506 } // deqp 507