1 /*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL (ES) Module 3 * ----------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Shader execution utilities. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "glsShaderExecUtil.hpp" 25 #include "gluRenderContext.hpp" 26 #include "gluDrawUtil.hpp" 27 #include "gluObjectWrapper.hpp" 28 #include "gluShaderProgram.hpp" 29 #include "gluTextureUtil.hpp" 30 #include "gluProgramInterfaceQuery.hpp" 31 #include "gluPixelTransfer.hpp" 32 #include "gluStrUtil.hpp" 33 #include "tcuTestLog.hpp" 34 #include "glwFunctions.hpp" 35 #include "glwEnums.hpp" 36 #include "deSTLUtil.hpp" 37 #include "deStringUtil.hpp" 38 #include "deUniquePtr.hpp" 39 #include "deMemory.h" 40 41 #include <map> 42 43 namespace deqp 44 { 45 namespace gls 46 { 47 48 namespace ShaderExecUtil 49 { 50 51 using std::vector; 52 53 static bool isExtensionSupported (const glu::RenderContext& renderCtx, const std::string& extension) 54 { 55 const glw::Functions& gl = renderCtx.getFunctions(); 56 int numExts = 0; 57 58 gl.getIntegerv(GL_NUM_EXTENSIONS, &numExts); 59 60 for (int ndx = 0; ndx < numExts; ndx++) 61 { 62 const char* curExt = (const char*)gl.getStringi(GL_EXTENSIONS, ndx); 63 64 if (extension == curExt) 65 return true; 66 } 67 68 return false; 69 } 70 71 static void checkExtension (const glu::RenderContext& renderCtx, const std::string& extension) 72 { 73 if (!isExtensionSupported(renderCtx, extension)) 74 throw tcu::NotSupportedError(extension + " is not supported"); 75 } 76 77 static void checkLimit (const glu::RenderContext& renderCtx, deUint32 pname, int required) 78 { 79 const glw::Functions& gl = renderCtx.getFunctions(); 80 int implementationLimit = -1; 81 deUint32 error; 82 83 gl.getIntegerv(pname, &implementationLimit); 84 error = gl.getError(); 85 86 if (error != GL_NO_ERROR) 87 throw tcu::TestError("Failed to query " + de::toString(glu::getGettableStateStr(pname)) + " - got " + de::toString(glu::getErrorStr(error))); 88 if (implementationLimit < required) 89 throw tcu::NotSupportedError("Test requires " + de::toString(glu::getGettableStateStr(pname)) + " >= " + de::toString(required) + ", got " + de::toString(implementationLimit)); 90 } 91 92 // Shader utilities 93 94 static std::string generateVertexShader (const ShaderSpec& shaderSpec) 95 { 96 const bool usesInout = glu::glslVersionUsesInOutQualifiers(shaderSpec.version); 97 const char* in = usesInout ? "in" : "attribute"; 98 const char* out = usesInout ? "out" : "varying"; 99 std::ostringstream src; 100 101 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"; 102 103 if (!shaderSpec.globalDeclarations.empty()) 104 src << shaderSpec.globalDeclarations << "\n"; 105 106 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input) 107 src << in << " " << glu::declare(input->varType, input->name) << ";\n"; 108 109 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output) 110 { 111 DE_ASSERT(output->varType.isBasicType()); 112 113 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType())) 114 { 115 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType()); 116 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT; 117 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP); 118 119 src << "flat " << out << " " << glu::declare(intType, "o_" + output->name) << ";\n"; 120 } 121 else 122 src << "flat " << out << " " << glu::declare(output->varType, output->name) << ";\n"; 123 } 124 125 src << "\n" 126 << "void main (void)\n" 127 << "{\n" 128 << " gl_Position = vec4(0.0);\n" 129 << " gl_PointSize = 1.0;\n\n"; 130 131 // Declare necessary output variables (bools). 132 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output) 133 { 134 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType())) 135 src << "\t" << glu::declare(output->varType, output->name) << ";\n"; 136 } 137 138 // Operation - indented to correct level. 139 { 140 std::istringstream opSrc (shaderSpec.source); 141 std::string line; 142 143 while (std::getline(opSrc, line)) 144 src << "\t" << line << "\n"; 145 } 146 147 // Assignments to outputs. 148 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output) 149 { 150 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType())) 151 { 152 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType()); 153 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT; 154 155 src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n"; 156 } 157 } 158 159 src << "}\n"; 160 161 return src.str(); 162 } 163 164 static std::string generateGeometryShader (const ShaderSpec& shaderSpec) 165 { 166 DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version)); 167 168 std::ostringstream src; 169 170 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"; 171 172 if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES) 173 src << "#extension GL_EXT_geometry_shader : require\n"; 174 175 if (!shaderSpec.globalDeclarations.empty()) 176 src << shaderSpec.globalDeclarations << "\n"; 177 178 src << "layout(points) in;\n" 179 << "layout(points, max_vertices = 1) out;\n"; 180 181 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input) 182 src << "flat in " << glu::declare(input->varType, "geom_" + input->name) << "[];\n"; 183 184 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output) 185 { 186 DE_ASSERT(output->varType.isBasicType()); 187 188 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType())) 189 { 190 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType()); 191 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT; 192 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP); 193 194 src << "flat out " << glu::declare(intType, "o_" + output->name) << ";\n"; 195 } 196 else 197 src << "flat out " << glu::declare(output->varType, output->name) << ";\n"; 198 } 199 200 src << "\n" 201 << "void main (void)\n" 202 << "{\n" 203 << " gl_Position = gl_in[0].gl_Position;\n\n"; 204 205 // Fetch input variables 206 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input) 207 src << "\t" << glu::declare(input->varType, input->name) << " = geom_" << input->name << "[0];\n"; 208 209 // Declare necessary output variables (bools). 210 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output) 211 { 212 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType())) 213 src << "\t" << glu::declare(output->varType, output->name) << ";\n"; 214 } 215 216 src << "\n"; 217 218 // Operation - indented to correct level. 219 { 220 std::istringstream opSrc (shaderSpec.source); 221 std::string line; 222 223 while (std::getline(opSrc, line)) 224 src << "\t" << line << "\n"; 225 } 226 227 // Assignments to outputs. 228 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output) 229 { 230 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType())) 231 { 232 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType()); 233 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT; 234 235 src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n"; 236 } 237 } 238 239 src << " EmitVertex();\n" 240 << " EndPrimitive();\n" 241 << "}\n"; 242 243 return src.str(); 244 } 245 246 static std::string generateEmptyFragmentSource (glu::GLSLVersion version) 247 { 248 const bool customOut = glu::glslVersionUsesInOutQualifiers(version); 249 std::ostringstream src; 250 251 src << glu::getGLSLVersionDeclaration(version) << "\n"; 252 253 // \todo [2013-08-05 pyry] Do we need one dummy output? 254 255 src << "void main (void)\n{\n"; 256 if (!customOut) 257 src << " gl_FragColor = vec4(0.0);\n"; 258 src << "}\n"; 259 260 return src.str(); 261 } 262 263 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix) 264 { 265 // flat qualifier is not present in earlier versions? 266 DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version)); 267 268 std::ostringstream src; 269 270 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n" 271 << "in highp vec4 a_position;\n"; 272 273 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input) 274 { 275 src << "in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n" 276 << "flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n"; 277 } 278 279 src << "\nvoid main (void)\n{\n" 280 << " gl_Position = a_position;\n" 281 << " gl_PointSize = 1.0;\n"; 282 283 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input) 284 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n"; 285 286 src << "}\n"; 287 288 return src.str(); 289 } 290 291 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap) 292 { 293 DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version)); 294 295 std::ostringstream src; 296 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"; 297 298 if (!shaderSpec.globalDeclarations.empty()) 299 src << shaderSpec.globalDeclarations << "\n"; 300 301 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input) 302 src << "flat in " << glu::declare(input->varType, input->name) << ";\n"; 303 304 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx) 305 { 306 const Symbol& output = shaderSpec.outputs[outNdx]; 307 const int location = de::lookup(outLocationMap, output.name); 308 const std::string outVarName = "o_" + output.name; 309 glu::VariableDeclaration decl (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location)); 310 311 TCU_CHECK_INTERNAL(output.varType.isBasicType()); 312 313 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType())) 314 { 315 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType()); 316 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT; 317 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP); 318 319 decl.varType = uintType; 320 src << decl << ";\n"; 321 } 322 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType())) 323 { 324 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType()); 325 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT; 326 const glu::VarType intType (intBasicType, glu::PRECISION_HIGHP); 327 328 decl.varType = intType; 329 src << decl << ";\n"; 330 } 331 else if (glu::isDataTypeMatrix(output.varType.getBasicType())) 332 { 333 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType()); 334 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType()); 335 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize); 336 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP); 337 338 decl.varType = uintType; 339 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx) 340 { 341 decl.name = outVarName + "_" + de::toString(vecNdx); 342 decl.layout.location = location + vecNdx; 343 src << decl << ";\n"; 344 } 345 } 346 else 347 src << glu::VariableDeclaration(output.varType, output.name, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, location) << ";\n"; 348 } 349 350 src << "\nvoid main (void)\n{\n"; 351 352 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output) 353 { 354 if ((useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType())) || 355 glu::isDataTypeBoolOrBVec(output->varType.getBasicType()) || 356 glu::isDataTypeMatrix(output->varType.getBasicType())) 357 src << "\t" << glu::declare(output->varType, output->name) << ";\n"; 358 } 359 360 // Operation - indented to correct level. 361 { 362 std::istringstream opSrc (shaderSpec.source); 363 std::string line; 364 365 while (std::getline(opSrc, line)) 366 src << "\t" << line << "\n"; 367 } 368 369 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output) 370 { 371 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType())) 372 src << " o_" << output->name << " = floatBitsToUint(" << output->name << ");\n"; 373 else if (glu::isDataTypeMatrix(output->varType.getBasicType())) 374 { 375 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType()); 376 377 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx) 378 if (useIntOutputs) 379 src << "\to_" << output->name << "_" << vecNdx << " = floatBitsToUint(" << output->name << "[" << vecNdx << "]);\n"; 380 else 381 src << "\to_" << output->name << "_" << vecNdx << " = " << output->name << "[" << vecNdx << "];\n"; 382 } 383 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType())) 384 { 385 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType()); 386 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT; 387 388 src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n"; 389 } 390 } 391 392 src << "}\n"; 393 394 return src.str(); 395 } 396 397 // ShaderExecutor 398 399 ShaderExecutor::ShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec) 400 : m_renderCtx (renderCtx) 401 , m_inputs (shaderSpec.inputs) 402 , m_outputs (shaderSpec.outputs) 403 { 404 } 405 406 ShaderExecutor::~ShaderExecutor (void) 407 { 408 } 409 410 void ShaderExecutor::useProgram (void) 411 { 412 DE_ASSERT(isOk()); 413 m_renderCtx.getFunctions().useProgram(getProgram()); 414 } 415 416 // VertexProcessorExecutor (base class for vertex and geometry executors) 417 418 class VertexProcessorExecutor : public ShaderExecutor 419 { 420 public: 421 VertexProcessorExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources); 422 ~VertexProcessorExecutor(void); 423 424 bool isOk (void) const { return m_program.isOk(); } 425 void log (tcu::TestLog& dst) const { dst << m_program; } 426 deUint32 getProgram (void) const { return m_program.getProgram(); } 427 428 void execute (int numValues, const void* const* inputs, void* const* outputs); 429 430 protected: 431 glu::ShaderProgram m_program; 432 }; 433 434 template<typename Iterator> 435 struct SymbolNameIterator 436 { 437 Iterator symbolIter; 438 439 SymbolNameIterator (Iterator symbolIter_) : symbolIter(symbolIter_) {} 440 441 inline SymbolNameIterator& operator++ (void) { ++symbolIter; return *this; } 442 443 inline bool operator== (const SymbolNameIterator& other) { return symbolIter == other.symbolIter; } 444 inline bool operator!= (const SymbolNameIterator& other) { return symbolIter != other.symbolIter; } 445 446 inline std::string operator* (void) const 447 { 448 if (glu::isDataTypeBoolOrBVec(symbolIter->varType.getBasicType())) 449 return "o_" + symbolIter->name; 450 else 451 return symbolIter->name; 452 } 453 }; 454 455 template<typename Iterator> 456 inline glu::TransformFeedbackVaryings<SymbolNameIterator<Iterator> > getTFVaryings (Iterator begin, Iterator end) 457 { 458 return glu::TransformFeedbackVaryings<SymbolNameIterator<Iterator> >(SymbolNameIterator<Iterator>(begin), SymbolNameIterator<Iterator>(end)); 459 } 460 461 VertexProcessorExecutor::VertexProcessorExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources) 462 : ShaderExecutor (renderCtx, shaderSpec) 463 , m_program (renderCtx, 464 glu::ProgramSources(sources) << getTFVaryings(shaderSpec.outputs.begin(), shaderSpec.outputs.end()) 465 << glu::TransformFeedbackMode(GL_INTERLEAVED_ATTRIBS)) 466 { 467 } 468 469 VertexProcessorExecutor::~VertexProcessorExecutor (void) 470 { 471 } 472 473 template<typename Iterator> 474 static int computeTotalScalarSize (Iterator begin, Iterator end) 475 { 476 int size = 0; 477 for (Iterator cur = begin; cur != end; ++cur) 478 size += cur->varType.getScalarSize(); 479 return size; 480 } 481 482 void VertexProcessorExecutor::execute (int numValues, const void* const* inputs, void* const* outputs) 483 { 484 const glw::Functions& gl = m_renderCtx.getFunctions(); 485 const bool useTFObject = isContextTypeES(m_renderCtx.getType()) || (isContextTypeGLCore(m_renderCtx.getType()) && m_renderCtx.getType().getMajorVersion() >= 4); 486 vector<glu::VertexArrayBinding> vertexArrays; 487 de::UniquePtr<glu::TransformFeedback> transformFeedback (useTFObject ? new glu::TransformFeedback(m_renderCtx) : DE_NULL); 488 glu::Buffer outputBuffer (m_renderCtx); 489 const int outputBufferStride = computeTotalScalarSize(m_outputs.begin(), m_outputs.end())*sizeof(deUint32); 490 491 // Setup inputs. 492 for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++) 493 { 494 const Symbol& symbol = m_inputs[inputNdx]; 495 const void* ptr = inputs[inputNdx]; 496 const glu::DataType basicType = symbol.varType.getBasicType(); 497 const int vecSize = glu::getDataTypeScalarSize(basicType); 498 499 if (glu::isDataTypeFloatOrVec(basicType)) 500 vertexArrays.push_back(glu::va::Float(symbol.name, vecSize, numValues, 0, (const float*)ptr)); 501 else if (glu::isDataTypeIntOrIVec(basicType)) 502 vertexArrays.push_back(glu::va::Int32(symbol.name, vecSize, numValues, 0, (const deInt32*)ptr)); 503 else if (glu::isDataTypeUintOrUVec(basicType)) 504 vertexArrays.push_back(glu::va::Uint32(symbol.name, vecSize, numValues, 0, (const deUint32*)ptr)); 505 else if (glu::isDataTypeMatrix(basicType)) 506 { 507 int numRows = glu::getDataTypeMatrixNumRows(basicType); 508 int numCols = glu::getDataTypeMatrixNumColumns(basicType); 509 int stride = numRows * numCols * sizeof(float); 510 511 for (int colNdx = 0; colNdx < numCols; ++colNdx) 512 vertexArrays.push_back(glu::va::Float(symbol.name, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows)); 513 } 514 else 515 DE_ASSERT(false); 516 } 517 518 // Setup TF outputs. 519 if (useTFObject) 520 gl.bindTransformFeedback(GL_TRANSFORM_FEEDBACK, **transformFeedback); 521 gl.bindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, *outputBuffer); 522 gl.bufferData(GL_TRANSFORM_FEEDBACK_BUFFER, outputBufferStride*numValues, DE_NULL, GL_STREAM_READ); 523 gl.bindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, *outputBuffer); 524 GLU_EXPECT_NO_ERROR(gl.getError(), "Error in TF setup"); 525 526 // Draw with rasterization disabled. 527 gl.beginTransformFeedback(GL_POINTS); 528 gl.enable(GL_RASTERIZER_DISCARD); 529 glu::draw(m_renderCtx, m_program.getProgram(), (int)vertexArrays.size(), vertexArrays.empty() ? DE_NULL : &vertexArrays[0], 530 glu::pr::Points(numValues)); 531 gl.disable(GL_RASTERIZER_DISCARD); 532 gl.endTransformFeedback(); 533 GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw"); 534 535 // Read back data. 536 { 537 const void* srcPtr = gl.mapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, outputBufferStride*numValues, GL_MAP_READ_BIT); 538 int curOffset = 0; // Offset in buffer in bytes. 539 540 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER)"); 541 TCU_CHECK(srcPtr != DE_NULL); 542 543 for (int outputNdx = 0; outputNdx < (int)m_outputs.size(); outputNdx++) 544 { 545 const Symbol& symbol = m_outputs[outputNdx]; 546 void* dstPtr = outputs[outputNdx]; 547 const int scalarSize = symbol.varType.getScalarSize(); 548 549 for (int ndx = 0; ndx < numValues; ndx++) 550 deMemcpy((deUint32*)dstPtr + scalarSize*ndx, (const deUint8*)srcPtr + curOffset + ndx*outputBufferStride, scalarSize*sizeof(deUint32)); 551 552 curOffset += scalarSize*sizeof(deUint32); 553 } 554 555 gl.unmapBuffer(GL_TRANSFORM_FEEDBACK_BUFFER); 556 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()"); 557 } 558 559 if (useTFObject) 560 gl.bindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0); 561 gl.bindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); 562 GLU_EXPECT_NO_ERROR(gl.getError(), "Restore state"); 563 } 564 565 // VertexShaderExecutor 566 567 class VertexShaderExecutor : public VertexProcessorExecutor 568 { 569 public: 570 VertexShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec); 571 }; 572 573 VertexShaderExecutor::VertexShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec) 574 : VertexProcessorExecutor (renderCtx, shaderSpec, 575 glu::ProgramSources() << glu::VertexSource(generateVertexShader(shaderSpec)) 576 << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version))) 577 { 578 } 579 580 // GeometryShaderExecutor 581 582 class CheckGeomSupport 583 { 584 public: 585 inline CheckGeomSupport (const glu::RenderContext& renderCtx) 586 { 587 if (renderCtx.getType().getAPI().getProfile() == glu::PROFILE_ES) 588 checkExtension(renderCtx, "GL_EXT_geometry_shader"); 589 } 590 }; 591 592 class GeometryShaderExecutor : private CheckGeomSupport, public VertexProcessorExecutor 593 { 594 public: 595 GeometryShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec); 596 }; 597 598 GeometryShaderExecutor::GeometryShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec) 599 : CheckGeomSupport (renderCtx) 600 , VertexProcessorExecutor (renderCtx, shaderSpec, 601 glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "", "geom_")) 602 << glu::GeometrySource(generateGeometryShader(shaderSpec)) 603 << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version))) 604 { 605 } 606 607 // FragmentShaderExecutor 608 609 class FragmentShaderExecutor : public ShaderExecutor 610 { 611 public: 612 FragmentShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec); 613 ~FragmentShaderExecutor (void); 614 615 bool isOk (void) const { return m_program.isOk(); } 616 void log (tcu::TestLog& dst) const { dst << m_program; } 617 deUint32 getProgram (void) const { return m_program.getProgram(); } 618 619 void execute (int numValues, const void* const* inputs, void* const* outputs); 620 621 protected: 622 std::vector<const Symbol*> m_outLocationSymbols; 623 std::map<std::string, int> m_outLocationMap; 624 glu::ShaderProgram m_program; 625 }; 626 627 static std::map<std::string, int> generateLocationMap (const std::vector<Symbol>& symbols, std::vector<const Symbol*>& locationSymbols) 628 { 629 std::map<std::string, int> ret; 630 int location = 0; 631 632 locationSymbols.clear(); 633 634 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it) 635 { 636 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType()); 637 638 TCU_CHECK_INTERNAL(!de::contains(ret, it->name)); 639 de::insert(ret, it->name, location); 640 location += numLocations; 641 642 for (int ndx = 0; ndx < numLocations; ++ndx) 643 locationSymbols.push_back(&*it); 644 } 645 646 return ret; 647 } 648 649 inline bool hasFloatRenderTargets (const glu::RenderContext& renderCtx) 650 { 651 glu::ContextType type = renderCtx.getType(); 652 return glu::isContextTypeGLCore(type); 653 } 654 655 FragmentShaderExecutor::FragmentShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec) 656 : ShaderExecutor (renderCtx, shaderSpec) 657 , m_outLocationSymbols () 658 , m_outLocationMap (generateLocationMap(m_outputs, m_outLocationSymbols)) 659 , m_program (renderCtx, 660 glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "")) 661 << glu::FragmentSource(generateFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outLocationMap))) 662 { 663 } 664 665 FragmentShaderExecutor::~FragmentShaderExecutor (void) 666 { 667 } 668 669 inline int queryInt (const glw::Functions& gl, deUint32 pname) 670 { 671 int value = 0; 672 gl.getIntegerv(pname, &value); 673 return value; 674 } 675 676 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs) 677 { 678 const tcu::TextureFormat::ChannelOrder channelOrderMap[] = 679 { 680 tcu::TextureFormat::R, 681 tcu::TextureFormat::RG, 682 tcu::TextureFormat::RGBA, // No RGB variants available. 683 tcu::TextureFormat::RGBA 684 }; 685 686 const glu::DataType basicType = outputType.getBasicType(); 687 const int numComps = glu::getDataTypeNumComponents(basicType); 688 tcu::TextureFormat::ChannelType channelType; 689 690 switch (glu::getDataTypeScalarType(basicType)) 691 { 692 case glu::TYPE_UINT: channelType = tcu::TextureFormat::UNSIGNED_INT32; break; 693 case glu::TYPE_INT: channelType = tcu::TextureFormat::SIGNED_INT32; break; 694 case glu::TYPE_BOOL: channelType = tcu::TextureFormat::SIGNED_INT32; break; 695 case glu::TYPE_FLOAT: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT; break; 696 default: 697 throw tcu::InternalError("Invalid output type"); 698 } 699 700 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap))); 701 702 return tcu::TextureFormat(channelOrderMap[numComps-1], channelType); 703 } 704 705 void FragmentShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs) 706 { 707 const glw::Functions& gl = m_renderCtx.getFunctions(); 708 const bool useIntOutputs = !hasFloatRenderTargets(m_renderCtx); 709 const int maxRenderbufferSize = queryInt(gl, GL_MAX_RENDERBUFFER_SIZE); 710 const int framebufferW = de::min(maxRenderbufferSize, numValues); 711 const int framebufferH = (numValues / framebufferW) + ((numValues % framebufferW != 0) ? 1 : 0); 712 713 glu::Framebuffer framebuffer (m_renderCtx); 714 glu::RenderbufferVector renderbuffers (m_renderCtx, m_outLocationSymbols.size()); 715 716 vector<glu::VertexArrayBinding> vertexArrays; 717 vector<tcu::Vec2> positions (numValues); 718 719 if (framebufferH > maxRenderbufferSize) 720 throw tcu::NotSupportedError("Value count is too high for maximum supported renderbuffer size"); 721 722 // Compute positions - 1px points are used to drive fragment shading. 723 for (int valNdx = 0; valNdx < numValues; valNdx++) 724 { 725 const int ix = valNdx % framebufferW; 726 const int iy = valNdx / framebufferW; 727 const float fx = -1.0f + 2.0f*((float(ix) + 0.5f) / float(framebufferW)); 728 const float fy = -1.0f + 2.0f*((float(iy) + 0.5f) / float(framebufferH)); 729 730 positions[valNdx] = tcu::Vec2(fx, fy); 731 } 732 733 // Vertex inputs. 734 vertexArrays.push_back(glu::va::Float("a_position", 2, numValues, 0, (const float*)&positions[0])); 735 736 for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++) 737 { 738 const Symbol& symbol = m_inputs[inputNdx]; 739 const std::string attribName = "a_" + symbol.name; 740 const void* ptr = inputs[inputNdx]; 741 const glu::DataType basicType = symbol.varType.getBasicType(); 742 const int vecSize = glu::getDataTypeScalarSize(basicType); 743 744 if (glu::isDataTypeFloatOrVec(basicType)) 745 vertexArrays.push_back(glu::va::Float(attribName, vecSize, numValues, 0, (const float*)ptr)); 746 else if (glu::isDataTypeIntOrIVec(basicType)) 747 vertexArrays.push_back(glu::va::Int32(attribName, vecSize, numValues, 0, (const deInt32*)ptr)); 748 else if (glu::isDataTypeUintOrUVec(basicType)) 749 vertexArrays.push_back(glu::va::Uint32(attribName, vecSize, numValues, 0, (const deUint32*)ptr)); 750 else if (glu::isDataTypeMatrix(basicType)) 751 { 752 int numRows = glu::getDataTypeMatrixNumRows(basicType); 753 int numCols = glu::getDataTypeMatrixNumColumns(basicType); 754 int stride = numRows * numCols * sizeof(float); 755 756 for (int colNdx = 0; colNdx < numCols; ++colNdx) 757 vertexArrays.push_back(glu::va::Float(attribName, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows)); 758 } 759 else 760 DE_ASSERT(false); 761 } 762 763 // Construct framebuffer. 764 gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer); 765 766 for (int outNdx = 0; outNdx < (int)m_outLocationSymbols.size(); ++outNdx) 767 { 768 const Symbol& output = *m_outLocationSymbols[outNdx]; 769 const deUint32 renderbuffer = renderbuffers[outNdx]; 770 const deUint32 format = glu::getInternalFormat(getRenderbufferFormatForOutput(output.varType, useIntOutputs)); 771 772 gl.bindRenderbuffer(GL_RENDERBUFFER, renderbuffer); 773 gl.renderbufferStorage(GL_RENDERBUFFER, format, framebufferW, framebufferH); 774 gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0+outNdx, GL_RENDERBUFFER, renderbuffer); 775 } 776 gl.bindRenderbuffer(GL_RENDERBUFFER, 0); 777 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to set up framebuffer object"); 778 TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); 779 780 { 781 vector<deUint32> drawBuffers(m_outLocationSymbols.size()); 782 for (int ndx = 0; ndx < (int)m_outLocationSymbols.size(); ndx++) 783 drawBuffers[ndx] = GL_COLOR_ATTACHMENT0+ndx; 784 gl.drawBuffers((int)drawBuffers.size(), &drawBuffers[0]); 785 GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawBuffers()"); 786 } 787 788 // Render 789 gl.viewport(0, 0, framebufferW, framebufferH); 790 glu::draw(m_renderCtx, m_program.getProgram(), (int)vertexArrays.size(), &vertexArrays[0], 791 glu::pr::Points(numValues)); 792 GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw"); 793 794 // Read back pixels. 795 { 796 tcu::TextureLevel tmpBuf; 797 798 // \todo [2013-08-07 pyry] Some fast-paths could be added here. 799 800 for (int outNdx = 0; outNdx < (int)m_outputs.size(); ++outNdx) 801 { 802 const Symbol& output = m_outputs[outNdx]; 803 const int outSize = output.varType.getScalarSize(); 804 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType()); 805 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType()); 806 deUint32* dstPtrBase = static_cast<deUint32*>(outputs[outNdx]); 807 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, useIntOutputs); 808 const tcu::TextureFormat readFormat (tcu::TextureFormat::RGBA, format.type); 809 const int outLocation = de::lookup(m_outLocationMap, output.name); 810 811 tmpBuf.setStorage(readFormat, framebufferW, framebufferH); 812 813 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx) 814 { 815 gl.readBuffer(GL_COLOR_ATTACHMENT0 + outLocation + locNdx); 816 glu::readPixels(m_renderCtx, 0, 0, tmpBuf.getAccess()); 817 GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels"); 818 819 if (outSize == 4 && outNumLocs == 1) 820 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues*outVecSize*sizeof(deUint32)); 821 else 822 { 823 for (int valNdx = 0; valNdx < numValues; valNdx++) 824 { 825 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx*4; 826 deUint32* dstPtr = &dstPtrBase[outSize*valNdx + outVecSize*locNdx]; 827 deMemcpy(dstPtr, srcPtr, outVecSize*sizeof(deUint32)); 828 } 829 } 830 } 831 } 832 } 833 834 // \todo [2013-08-07 pyry] Clear draw buffers & viewport? 835 gl.bindFramebuffer(GL_FRAMEBUFFER, 0); 836 } 837 838 // Shared utilities for compute and tess executors 839 840 static deUint32 getVecStd430ByteAlignment (glu::DataType type) 841 { 842 switch (glu::getDataTypeScalarSize(type)) 843 { 844 case 1: return 4u; 845 case 2: return 8u; 846 case 3: return 16u; 847 case 4: return 16u; 848 default: 849 DE_ASSERT(false); 850 return 0u; 851 } 852 } 853 854 class BufferIoExecutor : public ShaderExecutor 855 { 856 public: 857 BufferIoExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources); 858 ~BufferIoExecutor (void); 859 860 bool isOk (void) const { return m_program.isOk(); } 861 void log (tcu::TestLog& dst) const { dst << m_program; } 862 deUint32 getProgram (void) const { return m_program.getProgram(); } 863 864 protected: 865 enum 866 { 867 INPUT_BUFFER_BINDING = 0, 868 OUTPUT_BUFFER_BINDING = 1, 869 }; 870 871 void initBuffers (int numValues); 872 deUint32 getInputBuffer (void) const { return *m_inputBuffer; } 873 deUint32 getOutputBuffer (void) const { return *m_outputBuffer; } 874 deUint32 getInputStride (void) const { return getLayoutStride(m_inputLayout); } 875 deUint32 getOutputStride (void) const { return getLayoutStride(m_outputLayout); } 876 877 void uploadInputBuffer (const void* const* inputPtrs, int numValues); 878 void readOutputBuffer (void* const* outputPtrs, int numValues); 879 880 static void declareBufferBlocks (std::ostream& src, const ShaderSpec& spec); 881 static void generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName); 882 883 glu::ShaderProgram m_program; 884 885 private: 886 struct VarLayout 887 { 888 deUint32 offset; 889 deUint32 stride; 890 deUint32 matrixStride; 891 892 VarLayout (void) : offset(0), stride(0), matrixStride(0) {} 893 }; 894 895 void resizeInputBuffer (int newSize); 896 void resizeOutputBuffer (int newSize); 897 898 static void computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout); 899 static deUint32 getLayoutStride (const vector<VarLayout>& layout); 900 901 static void copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr); 902 static void copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr); 903 904 glu::Buffer m_inputBuffer; 905 glu::Buffer m_outputBuffer; 906 907 vector<VarLayout> m_inputLayout; 908 vector<VarLayout> m_outputLayout; 909 }; 910 911 BufferIoExecutor::BufferIoExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources) 912 : ShaderExecutor (renderCtx, shaderSpec) 913 , m_program (renderCtx, sources) 914 , m_inputBuffer (renderCtx) 915 , m_outputBuffer (renderCtx) 916 { 917 computeVarLayout(m_inputs, &m_inputLayout); 918 computeVarLayout(m_outputs, &m_outputLayout); 919 } 920 921 BufferIoExecutor::~BufferIoExecutor (void) 922 { 923 } 924 925 void BufferIoExecutor::resizeInputBuffer (int newSize) 926 { 927 const glw::Functions& gl = m_renderCtx.getFunctions(); 928 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_inputBuffer); 929 gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW); 930 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate input buffer"); 931 } 932 933 void BufferIoExecutor::resizeOutputBuffer (int newSize) 934 { 935 const glw::Functions& gl = m_renderCtx.getFunctions(); 936 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_outputBuffer); 937 gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW); 938 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate output buffer"); 939 } 940 941 void BufferIoExecutor::initBuffers (int numValues) 942 { 943 const deUint32 inputStride = getLayoutStride(m_inputLayout); 944 const deUint32 outputStride = getLayoutStride(m_outputLayout); 945 const int inputBufferSize = numValues * inputStride; 946 const int outputBufferSize = numValues * outputStride; 947 948 resizeInputBuffer(inputBufferSize); 949 resizeOutputBuffer(outputBufferSize); 950 } 951 952 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout) 953 { 954 deUint32 maxAlignment = 0; 955 deUint32 curOffset = 0; 956 957 DE_ASSERT(layout->empty()); 958 layout->resize(symbols.size()); 959 960 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++) 961 { 962 const Symbol& symbol = symbols[varNdx]; 963 const glu::DataType basicType = symbol.varType.getBasicType(); 964 VarLayout& layoutEntry = (*layout)[varNdx]; 965 966 if (glu::isDataTypeScalarOrVector(basicType)) 967 { 968 const deUint32 alignment = getVecStd430ByteAlignment(basicType); 969 const deUint32 size = (deUint32)glu::getDataTypeScalarSize(basicType)*sizeof(deUint32); 970 971 curOffset = (deUint32)deAlign32((int)curOffset, (int)alignment); 972 maxAlignment = de::max(maxAlignment, alignment); 973 974 layoutEntry.offset = curOffset; 975 layoutEntry.matrixStride = 0; 976 977 curOffset += size; 978 } 979 else if (glu::isDataTypeMatrix(basicType)) 980 { 981 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType); 982 const glu::DataType vecType = glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType)); 983 const deUint32 vecAlignment = getVecStd430ByteAlignment(vecType); 984 985 curOffset = (deUint32)deAlign32((int)curOffset, (int)vecAlignment); 986 maxAlignment = de::max(maxAlignment, vecAlignment); 987 988 layoutEntry.offset = curOffset; 989 layoutEntry.matrixStride = vecAlignment; 990 991 curOffset += vecAlignment*numVecs; 992 } 993 else 994 DE_ASSERT(false); 995 } 996 997 { 998 const deUint32 totalSize = (deUint32)deAlign32(curOffset, maxAlignment); 999 1000 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter) 1001 varIter->stride = totalSize; 1002 } 1003 } 1004 1005 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout) 1006 { 1007 return layout.empty() ? 0 : layout[0].stride; 1008 } 1009 1010 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr) 1011 { 1012 if (varType.isBasicType()) 1013 { 1014 const glu::DataType basicType = varType.getBasicType(); 1015 const bool isMatrix = glu::isDataTypeMatrix(basicType); 1016 const int scalarSize = glu::getDataTypeScalarSize(basicType); 1017 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1; 1018 const int numComps = scalarSize / numVecs; 1019 1020 for (int elemNdx = 0; elemNdx < numValues; elemNdx++) 1021 { 1022 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++) 1023 { 1024 const int srcOffset = sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps); 1025 const int dstOffset = layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0); 1026 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset; 1027 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset; 1028 1029 deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps); 1030 } 1031 } 1032 } 1033 else 1034 throw tcu::InternalError("Unsupported type"); 1035 } 1036 1037 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr) 1038 { 1039 if (varType.isBasicType()) 1040 { 1041 const glu::DataType basicType = varType.getBasicType(); 1042 const bool isMatrix = glu::isDataTypeMatrix(basicType); 1043 const int scalarSize = glu::getDataTypeScalarSize(basicType); 1044 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1; 1045 const int numComps = scalarSize / numVecs; 1046 1047 for (int elemNdx = 0; elemNdx < numValues; elemNdx++) 1048 { 1049 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++) 1050 { 1051 const int srcOffset = layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0); 1052 const int dstOffset = sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps); 1053 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset; 1054 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset; 1055 1056 deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps); 1057 } 1058 } 1059 } 1060 else 1061 throw tcu::InternalError("Unsupported type"); 1062 } 1063 1064 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues) 1065 { 1066 const glw::Functions& gl = m_renderCtx.getFunctions(); 1067 const deUint32 buffer = *m_inputBuffer; 1068 const deUint32 inputStride = getLayoutStride(m_inputLayout); 1069 const int inputBufferSize = inputStride*numValues; 1070 1071 if (inputBufferSize == 0) 1072 return; // No inputs 1073 1074 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); 1075 void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, inputBufferSize, GL_MAP_WRITE_BIT); 1076 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()"); 1077 TCU_CHECK(mapPtr); 1078 1079 try 1080 { 1081 DE_ASSERT(m_inputs.size() == m_inputLayout.size()); 1082 for (size_t inputNdx = 0; inputNdx < m_inputs.size(); ++inputNdx) 1083 { 1084 const glu::VarType& varType = m_inputs[inputNdx].varType; 1085 const VarLayout& layout = m_inputLayout[inputNdx]; 1086 1087 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], mapPtr); 1088 } 1089 } 1090 catch (...) 1091 { 1092 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); 1093 throw; 1094 } 1095 1096 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); 1097 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()"); 1098 } 1099 1100 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues) 1101 { 1102 const glw::Functions& gl = m_renderCtx.getFunctions(); 1103 const deUint32 buffer = *m_outputBuffer; 1104 const deUint32 outputStride = getLayoutStride(m_outputLayout); 1105 const int outputBufferSize = numValues*outputStride; 1106 1107 DE_ASSERT(outputBufferSize > 0); // At least some outputs are required. 1108 1109 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); 1110 void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, outputBufferSize, GL_MAP_READ_BIT); 1111 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()"); 1112 TCU_CHECK(mapPtr); 1113 1114 try 1115 { 1116 DE_ASSERT(m_outputs.size() == m_outputLayout.size()); 1117 for (size_t outputNdx = 0; outputNdx < m_outputs.size(); ++outputNdx) 1118 { 1119 const glu::VarType& varType = m_outputs[outputNdx].varType; 1120 const VarLayout& layout = m_outputLayout[outputNdx]; 1121 1122 copyFromBuffer(varType, layout, numValues, mapPtr, outputPtrs[outputNdx]); 1123 } 1124 } 1125 catch (...) 1126 { 1127 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); 1128 throw; 1129 } 1130 1131 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); 1132 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()"); 1133 } 1134 1135 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec) 1136 { 1137 // Input struct 1138 if (!spec.inputs.empty()) 1139 { 1140 glu::StructType inputStruct("Inputs"); 1141 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter) 1142 inputStruct.addMember(symIter->name.c_str(), symIter->varType); 1143 src << glu::declare(&inputStruct) << ";\n"; 1144 } 1145 1146 // Output struct 1147 { 1148 glu::StructType outputStruct("Outputs"); 1149 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter) 1150 outputStruct.addMember(symIter->name.c_str(), symIter->varType); 1151 src << glu::declare(&outputStruct) << ";\n"; 1152 } 1153 1154 src << "\n"; 1155 1156 if (!spec.inputs.empty()) 1157 { 1158 src << "layout(binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n" 1159 << "{\n" 1160 << " Inputs inputs[];\n" 1161 << "};\n"; 1162 } 1163 1164 src << "layout(binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n" 1165 << "{\n" 1166 << " Outputs outputs[];\n" 1167 << "};\n" 1168 << "\n"; 1169 } 1170 1171 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName) 1172 { 1173 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter) 1174 src << "\t" << glu::declare(symIter->varType, symIter->name) << " = inputs[" << invocationNdxName << "]." << symIter->name << ";\n"; 1175 1176 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter) 1177 src << "\t" << glu::declare(symIter->varType, symIter->name) << ";\n"; 1178 1179 src << "\n"; 1180 1181 { 1182 std::istringstream opSrc (spec.source); 1183 std::string line; 1184 1185 while (std::getline(opSrc, line)) 1186 src << "\t" << line << "\n"; 1187 } 1188 1189 src << "\n"; 1190 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter) 1191 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n"; 1192 } 1193 1194 // ComputeShaderExecutor 1195 1196 class ComputeShaderExecutor : public BufferIoExecutor 1197 { 1198 public: 1199 ComputeShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec); 1200 ~ComputeShaderExecutor (void); 1201 1202 void execute (int numValues, const void* const* inputs, void* const* outputs); 1203 1204 protected: 1205 static std::string generateComputeShader (const ShaderSpec& spec); 1206 1207 tcu::IVec3 m_maxWorkSize; 1208 }; 1209 1210 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec) 1211 { 1212 std::ostringstream src; 1213 1214 src << glu::getGLSLVersionDeclaration(spec.version) << "\n"; 1215 1216 if (!spec.globalDeclarations.empty()) 1217 src << spec.globalDeclarations << "\n"; 1218 1219 src << "layout(local_size_x = 1) in;\n" 1220 << "\n"; 1221 1222 declareBufferBlocks(src, spec); 1223 1224 src << "void main (void)\n" 1225 << "{\n" 1226 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n" 1227 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"; 1228 1229 generateExecBufferIo(src, spec, "invocationNdx"); 1230 1231 src << "}\n"; 1232 1233 return src.str(); 1234 } 1235 1236 ComputeShaderExecutor::ComputeShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec) 1237 : BufferIoExecutor (renderCtx, shaderSpec, 1238 glu::ProgramSources() << glu::ComputeSource(generateComputeShader(shaderSpec))) 1239 { 1240 m_maxWorkSize = tcu::IVec3(128,128,64); // Minimum in 3plus 1241 } 1242 1243 ComputeShaderExecutor::~ComputeShaderExecutor (void) 1244 { 1245 } 1246 1247 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs) 1248 { 1249 const glw::Functions& gl = m_renderCtx.getFunctions(); 1250 const int maxValuesPerInvocation = m_maxWorkSize[0]; 1251 const deUint32 inputStride = getInputStride(); 1252 const deUint32 outputStride = getOutputStride(); 1253 1254 initBuffers(numValues); 1255 1256 // Setup input buffer & copy data 1257 uploadInputBuffer(inputs, numValues); 1258 1259 // Perform compute invocations 1260 { 1261 int curOffset = 0; 1262 while (curOffset < numValues) 1263 { 1264 const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset); 1265 1266 if (inputStride > 0) 1267 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer(), curOffset*inputStride, numToExec*inputStride); 1268 1269 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer(), curOffset*outputStride, numToExec*outputStride); 1270 GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferRange(GL_SHADER_STORAGE_BUFFER)"); 1271 1272 gl.dispatchCompute(numToExec, 1, 1); 1273 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1274 1275 curOffset += numToExec; 1276 } 1277 } 1278 1279 // Read back data 1280 readOutputBuffer(outputs, numValues); 1281 } 1282 1283 // Tessellation utils 1284 1285 static std::string generateVertexShaderForTess (glu::GLSLVersion version) 1286 { 1287 std::ostringstream src; 1288 1289 src << glu::getGLSLVersionDeclaration(version) << "\n"; 1290 1291 src << "void main (void)\n{\n" 1292 << " gl_Position = vec4(gl_VertexID/2, gl_VertexID%2, 0.0, 1.0);\n" 1293 << "}\n"; 1294 1295 return src.str(); 1296 } 1297 1298 class CheckTessSupport 1299 { 1300 public: 1301 enum Stage 1302 { 1303 STAGE_CONTROL = 0, 1304 STAGE_EVAL, 1305 }; 1306 1307 inline CheckTessSupport (const glu::RenderContext& renderCtx, Stage stage) 1308 { 1309 const int numBlockRequired = 2; // highest binding is always 1 (output) i.e. count == 2 1310 1311 if (renderCtx.getType().getAPI().getProfile() == glu::PROFILE_ES) 1312 checkExtension(renderCtx, "GL_EXT_tessellation_shader"); 1313 1314 if (stage == STAGE_CONTROL) 1315 checkLimit(renderCtx, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, numBlockRequired); 1316 else if (stage == STAGE_EVAL) 1317 checkLimit(renderCtx, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, numBlockRequired); 1318 else 1319 DE_ASSERT(false); 1320 } 1321 }; 1322 1323 // TessControlExecutor 1324 1325 class TessControlExecutor : private CheckTessSupport, public BufferIoExecutor 1326 { 1327 public: 1328 TessControlExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec); 1329 ~TessControlExecutor (void); 1330 1331 void execute (int numValues, const void* const* inputs, void* const* outputs); 1332 1333 protected: 1334 static std::string generateTessControlShader (const ShaderSpec& shaderSpec); 1335 }; 1336 1337 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec) 1338 { 1339 std::ostringstream src; 1340 1341 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"; 1342 1343 if (shaderSpec.version == glu::GLSL_VERSION_310_ES) 1344 src << "#extension GL_EXT_tessellation_shader : require\n"; 1345 1346 if (!shaderSpec.globalDeclarations.empty()) 1347 src << shaderSpec.globalDeclarations << "\n"; 1348 1349 src << "\nlayout(vertices = 1) out;\n\n"; 1350 1351 declareBufferBlocks(src, shaderSpec); 1352 1353 src << "void main (void)\n{\n"; 1354 1355 for (int ndx = 0; ndx < 2; ndx++) 1356 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n"; 1357 1358 for (int ndx = 0; ndx < 4; ndx++) 1359 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n"; 1360 1361 src << "\n" 1362 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n"; 1363 1364 generateExecBufferIo(src, shaderSpec, "invocationId"); 1365 1366 src << "}\n"; 1367 1368 return src.str(); 1369 } 1370 1371 static std::string generateEmptyTessEvalShader (glu::GLSLVersion version) 1372 { 1373 std::ostringstream src; 1374 1375 src << glu::getGLSLVersionDeclaration(version) << "\n"; 1376 1377 if (version == glu::GLSL_VERSION_310_ES) 1378 src << "#extension GL_EXT_tessellation_shader : require\n\n"; 1379 1380 src << "layout(triangles, ccw) in;\n"; 1381 1382 src << "\nvoid main (void)\n{\n" 1383 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n" 1384 << "}\n"; 1385 1386 return src.str(); 1387 } 1388 1389 TessControlExecutor::TessControlExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec) 1390 : CheckTessSupport (renderCtx, STAGE_CONTROL) 1391 , BufferIoExecutor (renderCtx, shaderSpec, glu::ProgramSources() 1392 << glu::VertexSource(generateVertexShaderForTess(shaderSpec.version)) 1393 << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) 1394 << glu::TessellationEvaluationSource(generateEmptyTessEvalShader(shaderSpec.version)) 1395 << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version))) 1396 { 1397 } 1398 1399 TessControlExecutor::~TessControlExecutor (void) 1400 { 1401 } 1402 1403 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs) 1404 { 1405 const glw::Functions& gl = m_renderCtx.getFunctions(); 1406 1407 initBuffers(numValues); 1408 1409 // Setup input buffer & copy data 1410 uploadInputBuffer(inputs, numValues); 1411 1412 if (!m_inputs.empty()) 1413 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer()); 1414 1415 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer()); 1416 1417 // Render patches 1418 gl.patchParameteri(GL_PATCH_VERTICES, 3); 1419 gl.drawArrays(GL_PATCHES, 0, 3*numValues); 1420 1421 // Read back data 1422 readOutputBuffer(outputs, numValues); 1423 } 1424 1425 // TessEvaluationExecutor 1426 1427 class TessEvaluationExecutor : private CheckTessSupport, public BufferIoExecutor 1428 { 1429 public: 1430 TessEvaluationExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec); 1431 ~TessEvaluationExecutor (void); 1432 1433 void execute (int numValues, const void* const* inputs, void* const* outputs); 1434 1435 protected: 1436 static std::string generateTessEvalShader (const ShaderSpec& shaderSpec); 1437 }; 1438 1439 static std::string generatePassthroughTessControlShader (glu::GLSLVersion version) 1440 { 1441 std::ostringstream src; 1442 1443 src << glu::getGLSLVersionDeclaration(version) << "\n"; 1444 1445 if (version == glu::GLSL_VERSION_310_ES) 1446 src << "#extension GL_EXT_tessellation_shader : require\n\n"; 1447 1448 src << "layout(vertices = 1) out;\n\n"; 1449 1450 src << "void main (void)\n{\n"; 1451 1452 for (int ndx = 0; ndx < 2; ndx++) 1453 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n"; 1454 1455 for (int ndx = 0; ndx < 4; ndx++) 1456 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n"; 1457 1458 src << "}\n"; 1459 1460 return src.str(); 1461 } 1462 1463 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec) 1464 { 1465 std::ostringstream src; 1466 1467 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"; 1468 1469 if (shaderSpec.version == glu::GLSL_VERSION_310_ES) 1470 src << "#extension GL_EXT_tessellation_shader : require\n"; 1471 1472 if (!shaderSpec.globalDeclarations.empty()) 1473 src << shaderSpec.globalDeclarations << "\n"; 1474 1475 src << "\n"; 1476 1477 src << "layout(isolines, equal_spacing) in;\n\n"; 1478 1479 declareBufferBlocks(src, shaderSpec); 1480 1481 src << "void main (void)\n{\n" 1482 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n" 1483 << "\thighp uint invocationId = uint(gl_PrimitiveID) + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n"; 1484 1485 generateExecBufferIo(src, shaderSpec, "invocationId"); 1486 1487 src << "}\n"; 1488 1489 return src.str(); 1490 } 1491 1492 TessEvaluationExecutor::TessEvaluationExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec) 1493 : CheckTessSupport (renderCtx, STAGE_EVAL) 1494 , BufferIoExecutor (renderCtx, shaderSpec, glu::ProgramSources() 1495 << glu::VertexSource(generateVertexShaderForTess(shaderSpec.version)) 1496 << glu::TessellationControlSource(generatePassthroughTessControlShader(shaderSpec.version)) 1497 << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) 1498 << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version))) 1499 { 1500 } 1501 1502 TessEvaluationExecutor::~TessEvaluationExecutor (void) 1503 { 1504 } 1505 1506 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs) 1507 { 1508 const glw::Functions& gl = m_renderCtx.getFunctions(); 1509 const int alignedValues = deAlign32(numValues, 2); 1510 1511 // Initialize buffers with aligned value count to make room for padding 1512 initBuffers(alignedValues); 1513 1514 // Setup input buffer & copy data 1515 uploadInputBuffer(inputs, numValues); 1516 1517 // \todo [2014-06-26 pyry] Duplicate last value in the buffer to prevent infinite loops for example? 1518 1519 if (!m_inputs.empty()) 1520 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer()); 1521 1522 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer()); 1523 1524 // Render patches 1525 gl.patchParameteri(GL_PATCH_VERTICES, 2); 1526 gl.drawArrays(GL_PATCHES, 0, 2*alignedValues); 1527 1528 // Read back data 1529 readOutputBuffer(outputs, numValues); 1530 } 1531 1532 // Utilities 1533 1534 ShaderExecutor* createExecutor (const glu::RenderContext& renderCtx, glu::ShaderType shaderType, const ShaderSpec& shaderSpec) 1535 { 1536 switch (shaderType) 1537 { 1538 case glu::SHADERTYPE_VERTEX: return new VertexShaderExecutor (renderCtx, shaderSpec); 1539 case glu::SHADERTYPE_TESSELLATION_CONTROL: return new TessControlExecutor (renderCtx, shaderSpec); 1540 case glu::SHADERTYPE_TESSELLATION_EVALUATION: return new TessEvaluationExecutor (renderCtx, shaderSpec); 1541 case glu::SHADERTYPE_GEOMETRY: return new GeometryShaderExecutor (renderCtx, shaderSpec); 1542 case glu::SHADERTYPE_FRAGMENT: return new FragmentShaderExecutor (renderCtx, shaderSpec); 1543 case glu::SHADERTYPE_COMPUTE: return new ComputeShaderExecutor (renderCtx, shaderSpec); 1544 default: 1545 throw tcu::InternalError("Unsupported shader type"); 1546 } 1547 } 1548 1549 } // ShaderExecUtil 1550 } // gls 1551 } // deqp 1552