1 /*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL ES 3.0 Module 3 * ------------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Depth buffer performance tests. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "es3pDepthTests.hpp" 25 26 #include "glsCalibration.hpp" 27 28 #include "gluShaderProgram.hpp" 29 #include "gluObjectWrapper.hpp" 30 #include "gluPixelTransfer.hpp" 31 32 #include "glwFunctions.hpp" 33 #include "glwEnums.hpp" 34 35 #include "tcuTestLog.hpp" 36 #include "tcuStringTemplate.hpp" 37 #include "tcuCPUWarmup.hpp" 38 #include "tcuCommandLine.hpp" 39 40 #include "deClock.h" 41 #include "deString.h" 42 #include "deMath.h" 43 #include "deStringUtil.hpp" 44 #include "deRandom.hpp" 45 #include "deUniquePtr.hpp" 46 47 #include <vector> 48 #include <algorithm> 49 50 namespace deqp 51 { 52 namespace gles3 53 { 54 namespace Performance 55 { 56 namespace 57 { 58 using namespace glw; 59 using de::MovePtr; 60 using tcu::TestContext; 61 using tcu::TestLog; 62 using tcu::Vec4; 63 using tcu::Vec3; 64 using tcu::Vec2; 65 using glu::RenderContext; 66 using glu::ProgramSources; 67 using glu::ShaderSource; 68 using std::vector; 69 using std::string; 70 using std::map; 71 72 struct Sample 73 { 74 deInt64 nullTime; 75 deInt64 baseTime; 76 deInt64 testTime; 77 int order; 78 int workload; 79 }; 80 81 struct SampleParams 82 { 83 int step; 84 int measurement; 85 86 SampleParams(int step_, int measurement_) : step(step_), measurement(measurement_) {} 87 }; 88 89 typedef vector<float> Geometry; 90 91 struct ObjectData 92 { 93 ProgramSources shader; 94 Geometry geometry; 95 96 ObjectData (const ProgramSources& shader_, const Geometry& geometry_) : shader(shader_), geometry(geometry_) {} 97 }; 98 99 class RenderData 100 { 101 public: 102 RenderData (const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log); 103 ~RenderData (void) {}; 104 105 const glu::ShaderProgram m_program; 106 const glu::VertexArray m_vao; 107 const glu::Buffer m_vbo; 108 109 const int m_numVertices; 110 }; 111 112 RenderData::RenderData (const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log) 113 : m_program (renderCtx, object.shader) 114 , m_vao (renderCtx.getFunctions()) 115 , m_vbo (renderCtx.getFunctions()) 116 , m_numVertices (int(object.geometry.size())/4) 117 { 118 const glw::Functions& gl = renderCtx.getFunctions(); 119 120 if (!m_program.isOk()) 121 log << m_program; 122 123 gl.bindBuffer(GL_ARRAY_BUFFER, *m_vbo); 124 gl.bufferData(GL_ARRAY_BUFFER, object.geometry.size() * sizeof(float), &object.geometry[0], GL_STATIC_DRAW); 125 gl.bindAttribLocation(m_program.getProgram(), 0, "a_position"); 126 127 gl.bindVertexArray(*m_vao); 128 gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); 129 gl.enableVertexAttribArray(0); 130 gl.bindVertexArray(0); 131 } 132 133 namespace Utils 134 { 135 vector<float> getFullscreenQuad (float depth) 136 { 137 const float data[] = 138 { 139 +1.0f, +1.0f, depth, 0.0f, // .w is gl_VertexId%3 since Nexus 4&5 can't handle that on their own 140 +1.0f, -1.0f, depth, 1.0f, 141 -1.0f, -1.0f, depth, 2.0f, 142 -1.0f, -1.0f, depth, 0.0f, 143 -1.0f, +1.0f, depth, 1.0f, 144 +1.0f, +1.0f, depth, 2.0f, 145 }; 146 147 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); 148 } 149 150 vector<float> getFullscreenQuadWithGradient (float depth0, float depth1) 151 { 152 const float data[] = 153 { 154 +1.0f, +1.0f, depth0, 0.0f, 155 +1.0f, -1.0f, depth0, 1.0f, 156 -1.0f, -1.0f, depth1, 2.0f, 157 -1.0f, -1.0f, depth1, 0.0f, 158 -1.0f, +1.0f, depth1, 1.0f, 159 +1.0f, +1.0f, depth0, 2.0f, 160 }; 161 162 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); 163 } 164 165 vector<float> getPartScreenQuad (float coverage, float depth) 166 { 167 const float xMax = -1.0f + 2.0f*coverage; 168 const float data[] = 169 { 170 xMax, +1.0f, depth, 0.0f, 171 xMax, -1.0f, depth, 1.0f, 172 -1.0f, -1.0f, depth, 2.0f, 173 -1.0f, -1.0f, depth, 0.0f, 174 -1.0f, +1.0f, depth, 1.0f, 175 xMax, +1.0f, depth, 2.0f, 176 }; 177 178 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); 179 } 180 181 // Axis aligned grid. Depth of vertices is baseDepth +/- depthNoise 182 vector<float> getFullScreenGrid (int resolution, deUint32 seed, float baseDepth, float depthNoise, float xyNoise) 183 { 184 const int gridsize = resolution+1; 185 vector<Vec3> vertices (gridsize*gridsize); 186 vector<float> retval; 187 de::Random rng (seed); 188 189 for (int y = 0; y < gridsize; y++) 190 for (int x = 0; x < gridsize; x++) 191 { 192 const bool isEdge = x == 0 || y == 0 || x == resolution || y == resolution; 193 const float x_ = float(x)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise)); 194 const float y_ = float(y)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise)); 195 const float z_ = baseDepth + rng.getFloat(-depthNoise, +depthNoise); 196 197 vertices[y*gridsize + x] = Vec3(x_, y_, z_); 198 } 199 200 retval.reserve(resolution*resolution*6); 201 202 for (int y = 0; y < resolution; y++) 203 for (int x = 0; x < resolution; x++) 204 { 205 const Vec3& p0 = vertices[(y+0)*gridsize + (x+0)]; 206 const Vec3& p1 = vertices[(y+0)*gridsize + (x+1)]; 207 const Vec3& p2 = vertices[(y+1)*gridsize + (x+0)]; 208 const Vec3& p3 = vertices[(y+1)*gridsize + (x+1)]; 209 210 const float temp[6*4] = 211 { 212 p0.x(), p0.y(), p0.z(), 0.0f, 213 p2.x(), p2.y(), p2.z(), 1.0f, 214 p1.x(), p1.y(), p1.z(), 2.0f, 215 216 p3.x(), p3.y(), p3.z(), 0.0f, 217 p1.x(), p1.y(), p1.z(), 1.0f, 218 p2.x(), p2.y(), p2.z(), 2.0f, 219 }; 220 221 retval.insert(retval.end(), DE_ARRAY_BEGIN(temp), DE_ARRAY_END(temp)); 222 } 223 224 return retval; 225 } 226 227 // Outputs barycentric coordinates as v_bcoords. Otherwise a passthrough shader 228 string getBaseVertexShader (void) 229 { 230 return "#version 300 es\n" 231 "in highp vec4 a_position;\n" 232 "out mediump vec3 v_bcoords;\n" 233 "void main()\n" 234 "{\n" 235 " v_bcoords = vec3(0, 0, 0);\n" 236 " v_bcoords[int(a_position.w)] = 1.0;\n" 237 " gl_Position = vec4(a_position.xyz, 1.0);\n" 238 "}\n"; 239 } 240 241 // Adds noise to coordinates based on InstanceID Outputs barycentric coordinates as v_bcoords 242 string getInstanceNoiseVertexShader (void) 243 { 244 return "#version 300 es\n" 245 "in highp vec4 a_position;\n" 246 "out mediump vec3 v_bcoords;\n" 247 "void main()\n" 248 "{\n" 249 " v_bcoords = vec3(0, 0, 0);\n" 250 " v_bcoords[int(a_position.w)] = 1.0;\n" 251 " vec3 noise = vec3(sin(float(gl_InstanceID)*1.05), sin(float(gl_InstanceID)*1.23), sin(float(gl_InstanceID)*1.71));\n" 252 " gl_Position = vec4(a_position.xyz + noise * 0.005, 1.0);\n" 253 "}\n"; 254 } 255 256 // Renders green triangles with edges highlighted. Exact shade depends on depth. 257 string getDepthAsGreenFragmentShader (void) 258 { 259 return "#version 300 es\n" 260 "in mediump vec3 v_bcoords;\n" 261 "out mediump vec4 fragColor;\n" 262 "void main()\n" 263 "{\n" 264 " mediump float d = gl_FragCoord.z;\n" 265 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 266 " fragColor = vec4(d,1,d,1);\n" 267 " else\n" 268 " fragColor = vec4(0,d,0,1);\n" 269 "}\n"; 270 } 271 272 // Renders green triangles with edges highlighted. Exact shade depends on depth. 273 string getDepthAsRedFragmentShader (void) 274 { 275 return "#version 300 es\n" 276 "in mediump vec3 v_bcoords;\n" 277 "out mediump vec4 fragColor;\n" 278 "void main()\n" 279 "{\n" 280 " mediump float d = gl_FragCoord.z;\n" 281 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 282 " fragColor = vec4(1,d,d,1);\n" 283 " else\n" 284 " fragColor = vec4(d,0,0,1);\n" 285 "}\n"; 286 } 287 288 // Basic time waster. Renders red triangles with edges highlighted. Exact shade depends on depth. 289 string getArithmeticWorkloadFragmentShader (void) 290 { 291 292 return "#version 300 es\n" 293 "in mediump vec3 v_bcoords;\n" 294 "out mediump vec4 fragColor;\n" 295 "uniform mediump int u_iterations;\n" 296 "void main()\n" 297 "{\n" 298 " mediump float d = gl_FragCoord.z;\n" 299 " for (int i = 0; i<u_iterations; i++)\n" 300 // cos(a)^2 + sin(a)^2 == 1. since d is in range [0,1] this will lose a few ULP's of precision per iteration but should not significantly change the value of d without extreme iteration counts 301 " d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n" 302 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 303 " fragColor = vec4(1,d,d,1);\n" 304 " else\n" 305 " fragColor = vec4(d,0,0,1);\n" 306 "}\n"; 307 } 308 309 // Arithmetic workload shader but contains discard 310 string getArithmeticWorkloadDiscardFragmentShader (void) 311 { 312 return "#version 300 es\n" 313 "in mediump vec3 v_bcoords;\n" 314 "out mediump vec4 fragColor;\n" 315 "uniform mediump int u_iterations;\n" 316 "void main()\n" 317 "{\n" 318 " mediump float d = gl_FragCoord.z;\n" 319 " for (int i = 0; i<u_iterations; i++)\n" 320 " d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n" 321 " if (d < 0.5) discard;\n" 322 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 323 " fragColor = vec4(1,d,d,1);\n" 324 " else\n" 325 " fragColor = vec4(d,0,0,1);\n" 326 "}\n"; 327 } 328 329 // Texture fetch based time waster. Renders red triangles with edges highlighted. Exact shade depends on depth. 330 string getTextureWorkloadFragmentShader (void) 331 { 332 return "#version 300 es\n" 333 "in mediump vec3 v_bcoords;\n" 334 "out mediump vec4 fragColor;\n" 335 "uniform mediump int u_iterations;\n" 336 "uniform sampler2D u_texture;\n" 337 "void main()\n" 338 "{\n" 339 " mediump float d = gl_FragCoord.z;\n" 340 " for (int i = 0; i<u_iterations; i++)\n" 341 " d *= texture(u_texture, (gl_FragCoord.xy+vec2(i))/512.0).r;\n" // Texture is expected to be fully white 342 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 343 " fragColor = vec4(1,1,1,1);\n" 344 " else\n" 345 " fragColor = vec4(d,0,0,1);\n" 346 "}\n"; 347 } 348 349 // Discard fragments in a grid pattern 350 string getGridDiscardFragmentShader (int gridsize) 351 { 352 const string fragSrc = "#version 300 es\n" 353 "in mediump vec3 v_bcoords;\n" 354 "out mediump vec4 fragColor;\n" 355 "void main()\n" 356 "{\n" 357 " mediump float d = gl_FragCoord.z;\n" 358 " if ((int(gl_FragCoord.x)/${GRIDRENDER_SIZE} + int(gl_FragCoord.y)/${GRIDRENDER_SIZE})%2 == 0)\n" 359 " discard;\n" 360 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 361 " fragColor = vec4(d,1,d,1);\n" 362 " else\n" 363 " fragColor = vec4(0,d,0,1);\n" 364 "}\n"; 365 map<string, string> params; 366 367 params["GRIDRENDER_SIZE"] = de::toString(gridsize); 368 369 return tcu::StringTemplate(fragSrc).specialize(params); 370 } 371 372 // A static increment to frag depth 373 string getStaticFragDepthFragmentShader (void) 374 { 375 return "#version 300 es\n" 376 "in mediump vec3 v_bcoords;\n" 377 "out mediump vec4 fragColor;\n" 378 "void main()\n" 379 "{\n" 380 " mediump float d = gl_FragCoord.z;\n" 381 " gl_FragDepth = gl_FragCoord.z + 0.1;\n" 382 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 383 " fragColor = vec4(d,1,d,1);\n" 384 " else\n" 385 " fragColor = vec4(0,d,0,1);\n" 386 "}\n"; 387 } 388 389 // A trivial dynamic change to frag depth 390 string getDynamicFragDepthFragmentShader (void) 391 { 392 return "#version 300 es\n" 393 "in mediump vec3 v_bcoords;\n" 394 "out mediump vec4 fragColor;\n" 395 "void main()\n" 396 "{\n" 397 " mediump float d = gl_FragCoord.z;\n" 398 " gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1 399 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 400 " fragColor = vec4(d,1,d,1);\n" 401 " else\n" 402 " fragColor = vec4(0,d,0,1);\n" 403 "}\n"; 404 } 405 406 // A static increment to frag depth 407 string getStaticFragDepthArithmeticWorkloadFragmentShader (void) 408 { 409 return "#version 300 es\n" 410 "in mediump vec3 v_bcoords;\n" 411 "out mediump vec4 fragColor;\n" 412 "uniform mediump int u_iterations;\n" 413 "void main()\n" 414 "{\n" 415 " mediump float d = gl_FragCoord.z;\n" 416 " gl_FragDepth = gl_FragCoord.z + 0.1;\n" 417 " for (int i = 0; i<u_iterations; i++)\n" 418 " d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n" 419 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 420 " fragColor = vec4(1,d,d,1);\n" 421 " else\n" 422 " fragColor = vec4(d,0,0,1);\n" 423 "}\n"; 424 } 425 426 // A trivial dynamic change to frag depth 427 string getDynamicFragDepthArithmeticWorkloadFragmentShader (void) 428 { 429 return "#version 300 es\n" 430 "in mediump vec3 v_bcoords;\n" 431 "out mediump vec4 fragColor;\n" 432 "uniform mediump int u_iterations;\n" 433 "void main()\n" 434 "{\n" 435 " mediump float d = gl_FragCoord.z;\n" 436 " gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1 437 " for (int i = 0; i<u_iterations; i++)\n" 438 " d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n" 439 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 440 " fragColor = vec4(1,d,d,1);\n" 441 " else\n" 442 " fragColor = vec4(d,0,0,1);\n" 443 "}\n"; 444 } 445 446 glu::ProgramSources getBaseShader (void) 447 { 448 return glu::makeVtxFragSources(getBaseVertexShader(), getDepthAsGreenFragmentShader()); 449 } 450 451 glu::ProgramSources getArithmeticWorkloadShader (void) 452 { 453 return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadFragmentShader()); 454 } 455 456 glu::ProgramSources getArithmeticWorkloadDiscardShader (void) 457 { 458 return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadDiscardFragmentShader()); 459 } 460 461 glu::ProgramSources getTextureWorkloadShader (void) 462 { 463 return glu::makeVtxFragSources(getBaseVertexShader(), getTextureWorkloadFragmentShader()); 464 } 465 466 glu::ProgramSources getGridDiscardShader (int gridsize) 467 { 468 return glu::makeVtxFragSources(getBaseVertexShader(), getGridDiscardFragmentShader(gridsize)); 469 } 470 471 inline ObjectData quadWith (const glu::ProgramSources& shader, float depth) 472 { 473 return ObjectData(shader, getFullscreenQuad(depth)); 474 } 475 476 inline ObjectData quadWith (const string& fragShader, float depth) 477 { 478 return ObjectData(glu::makeVtxFragSources(getBaseVertexShader(), fragShader), getFullscreenQuad(depth)); 479 } 480 481 inline ObjectData variableQuad (float depth) 482 { 483 return ObjectData(glu::makeVtxFragSources(getInstanceNoiseVertexShader(), getDepthAsRedFragmentShader()), getFullscreenQuad(depth)); 484 } 485 486 inline ObjectData fastQuad (float depth) 487 { 488 return ObjectData(getBaseShader(), getFullscreenQuad(depth)); 489 } 490 491 inline ObjectData slowQuad (float depth) 492 { 493 return ObjectData(getArithmeticWorkloadShader(), getFullscreenQuad(depth)); 494 } 495 496 inline ObjectData fastQuadWithGradient (float depth0, float depth1) 497 { 498 return ObjectData(getBaseShader(), getFullscreenQuadWithGradient(depth0, depth1)); 499 } 500 } // Utils 501 502 // Shared base 503 class BaseCase : public tcu::TestCase 504 { 505 public: 506 enum {RENDER_SIZE = 512}; 507 508 BaseCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 509 virtual ~BaseCase (void) {} 510 511 virtual IterateResult iterate (void); 512 513 protected: 514 void logSamples (const vector<Sample>& samples, const string& name, const string& desc); 515 void logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg); 516 virtual void logAnalysis (const vector<Sample>& samples) = 0; 517 virtual void logDescription (void) = 0; 518 519 virtual ObjectData genOccluderGeometry (void) const = 0; 520 virtual ObjectData genOccludedGeometry (void) const = 0; 521 522 virtual int calibrate (void) const = 0; 523 virtual Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const = 0; 524 525 void render (const RenderData& data) const; 526 void render (const RenderData& data, int instances) const; 527 528 const RenderContext& m_renderCtx; 529 tcu::ResultCollector m_results; 530 531 enum {ITERATION_STEPS = 10, ITERATION_SAMPLES = 16}; 532 }; 533 534 BaseCase::BaseCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 535 : TestCase (testCtx, tcu::NODETYPE_PERFORMANCE, name, desc) 536 , m_renderCtx (renderCtx) 537 { 538 } 539 540 BaseCase::IterateResult BaseCase::iterate (void) 541 { 542 typedef de::MovePtr<RenderData> RenderDataP; 543 544 const glw::Functions& gl = m_renderCtx.getFunctions(); 545 TestLog& log = m_testCtx.getLog(); 546 547 const glu::Framebuffer framebuffer (gl); 548 const glu::Renderbuffer renderbuffer (gl); 549 const glu::Renderbuffer depthbuffer (gl); 550 551 vector<Sample> results; 552 vector<int> params; 553 RenderDataP occluderData; 554 RenderDataP occludedData; 555 tcu::TextureLevel resultTex (tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8), RENDER_SIZE, RENDER_SIZE); 556 int maxWorkload = 0; 557 de::Random rng (deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed()); 558 559 logDescription(); 560 561 gl.bindRenderbuffer(GL_RENDERBUFFER, *renderbuffer); 562 gl.renderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, RENDER_SIZE, RENDER_SIZE); 563 gl.bindRenderbuffer(GL_RENDERBUFFER, *depthbuffer); 564 gl.renderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, RENDER_SIZE, RENDER_SIZE); 565 566 gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer); 567 gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, *renderbuffer); 568 gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, *depthbuffer); 569 gl.viewport(0, 0, RENDER_SIZE, RENDER_SIZE); 570 gl.clearColor(0.125f, 0.25f, 0.5f, 1.0f); 571 572 maxWorkload = calibrate(); 573 574 // Setup data 575 occluderData = RenderDataP(new RenderData (genOccluderGeometry(), m_renderCtx, log)); 576 occludedData = RenderDataP(new RenderData (genOccludedGeometry(), m_renderCtx, log)); 577 578 TCU_CHECK(occluderData->m_program.isOk()); 579 TCU_CHECK(occludedData->m_program.isOk()); 580 581 // Force initialization of GPU resources 582 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 583 gl.enable(GL_DEPTH_TEST); 584 585 render(*occluderData); 586 render(*occludedData); 587 glu::readPixels(m_renderCtx, 0, 0, resultTex.getAccess()); 588 589 logGeometry(resultTex.getAccess(), occluderData->m_program, occludedData->m_program); 590 591 params.reserve(ITERATION_STEPS*ITERATION_SAMPLES); 592 593 // Setup parameters 594 for (int step = 0; step < ITERATION_STEPS; step++) 595 { 596 const int workload = maxWorkload*step/ITERATION_STEPS; 597 598 for (int count = 0; count < ITERATION_SAMPLES; count++) 599 params.push_back(workload); 600 } 601 602 rng.shuffle(params.begin(), params.end()); 603 604 // Render samples 605 for (size_t ndx = 0; ndx < params.size(); ndx++) 606 { 607 const int workload = params[ndx]; 608 Sample sample = renderSample(*occluderData, *occludedData, workload); 609 610 sample.workload = workload; 611 sample.order = int(ndx); 612 613 results.push_back(sample); 614 } 615 616 logSamples(results, "Samples", "Samples"); 617 logAnalysis(results); 618 619 m_results.setTestContextResult(m_testCtx); 620 621 return STOP; 622 } 623 624 void BaseCase::logSamples (const vector<Sample>& samples, const string& name, const string& desc) 625 { 626 TestLog& log = m_testCtx.getLog(); 627 628 bool testOnly = true; 629 630 for (size_t ndx = 0; ndx < samples.size(); ndx++) 631 { 632 if (samples[ndx].baseTime != 0 || samples[ndx].nullTime != 0) 633 { 634 testOnly = false; 635 break; 636 } 637 } 638 639 log << TestLog::SampleList(name, desc); 640 641 if (testOnly) 642 { 643 log << TestLog::SampleInfo 644 << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 645 << TestLog::ValueInfo("Order", "Order of sample", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 646 << TestLog::ValueInfo("TestTime", "Test render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 647 << TestLog::EndSampleInfo; 648 649 for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++) 650 { 651 const Sample& sample = samples[sampleNdx]; 652 653 log << TestLog::Sample << sample.workload << sample.order << sample.testTime << TestLog::EndSample; 654 } 655 } 656 else 657 { 658 log << TestLog::SampleInfo 659 << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 660 << TestLog::ValueInfo("Order", "Order of sample", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 661 << TestLog::ValueInfo("TestTime", "Test render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 662 << TestLog::ValueInfo("NullTime", "Read pixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 663 << TestLog::ValueInfo("BaseTime", "Base render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 664 << TestLog::EndSampleInfo; 665 666 for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++) 667 { 668 const Sample& sample = samples[sampleNdx]; 669 670 log << TestLog::Sample << sample.workload << sample.order << sample.testTime << sample.nullTime << sample.baseTime << TestLog::EndSample; 671 } 672 } 673 674 log << TestLog::EndSampleList; 675 } 676 677 void BaseCase::logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg) 678 { 679 TestLog& log = m_testCtx.getLog(); 680 681 log << TestLog::Section("Geometry", "Geometry"); 682 log << TestLog::Message << "Occluding geometry is green with shade dependent on depth (rgb == 0, depth, 0)" << TestLog::EndMessage; 683 log << TestLog::Message << "Occluded geometry is red with shade dependent on depth (rgb == depth, 0, 0)" << TestLog::EndMessage; 684 log << TestLog::Message << "Primitive edges are a lighter shade of red/green" << TestLog::EndMessage; 685 686 log << TestLog::Image("Test Geometry", "Test Geometry", sample); 687 log << TestLog::EndSection; 688 689 log << TestLog::Section("Occluder", "Occluder"); 690 log << occluderProg; 691 log << TestLog::EndSection; 692 693 log << TestLog::Section("Occluded", "Occluded"); 694 log << occludedProg; 695 log << TestLog::EndSection; 696 } 697 698 void BaseCase::render (const RenderData& data) const 699 { 700 const glw::Functions& gl = m_renderCtx.getFunctions(); 701 702 gl.useProgram(data.m_program.getProgram()); 703 704 gl.bindVertexArray(*data.m_vao); 705 gl.drawArrays(GL_TRIANGLES, 0, data.m_numVertices); 706 gl.bindVertexArray(0); 707 } 708 709 void BaseCase::render (const RenderData& data, int instances) const 710 { 711 const glw::Functions& gl = m_renderCtx.getFunctions(); 712 713 gl.useProgram(data.m_program.getProgram()); 714 715 gl.bindVertexArray(*data.m_vao); 716 gl.drawArraysInstanced(GL_TRIANGLES, 0, data.m_numVertices, instances); 717 gl.bindVertexArray(0); 718 } 719 720 // Render occluder once, then repeatedly render occluded geometry. Sample with multiple repetition counts & establish time per call with linear regression 721 class RenderCountCase : public BaseCase 722 { 723 public: 724 RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 725 ~RenderCountCase (void) {} 726 727 protected: 728 virtual void logAnalysis (const vector<Sample>& samples); 729 730 private: 731 virtual int calibrate (void) const; 732 virtual Sample renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const; 733 }; 734 735 RenderCountCase::RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 736 : BaseCase (testCtx, renderCtx, name, desc) 737 { 738 } 739 740 void RenderCountCase::logAnalysis (const vector<Sample>& samples) 741 { 742 using namespace gls; 743 744 TestLog& log = m_testCtx.getLog(); 745 int maxWorkload = 0; 746 vector<Vec2> testSamples (samples.size()); 747 748 for (size_t ndx = 0; ndx < samples.size(); ndx++) 749 { 750 const Sample& sample = samples[ndx]; 751 752 testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime); 753 754 maxWorkload = de::max(maxWorkload, sample.workload); 755 } 756 757 { 758 const float confidence = 0.60f; 759 const LineParametersWithConfidence testParam = theilSenSiegelLinearRegression(testSamples, confidence); 760 const float usPerCall = testParam.coefficient; 761 const float pxPerCall = RENDER_SIZE*RENDER_SIZE; 762 const float pxPerUs = pxPerCall/usPerCall; 763 const float mpxPerS = pxPerUs; 764 765 log << TestLog::Section("Linear Regression", "Linear Regression"); 766 log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage; 767 log << TestLog::Message << "Render time for scene with depth test was\n\t" 768 << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset << ", " << testParam.offsetConfidenceUpper << "]us +" 769 << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]" 770 << "us/workload" << TestLog::EndMessage; 771 log << TestLog::EndSection; 772 773 log << TestLog::Section("Result", "Result"); 774 775 if (testParam.coefficientConfidenceLower < 0.0f) 776 { 777 log << TestLog::Message << "Coefficient confidence bounds include values below 0.0, the operation likely has neglible per-pixel cost" << TestLog::EndMessage; 778 m_results.addResult(QP_TEST_RESULT_PASS, "Pass"); 779 } 780 else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25) 781 { 782 log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage; 783 m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); 784 } 785 else 786 { 787 log << TestLog::Message << "Culled hidden pixels @ " << mpxPerS << "Mpx/s" << TestLog::EndMessage; 788 m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(mpxPerS, 2)); 789 } 790 791 log << TestLog::EndSection; 792 } 793 } 794 795 Sample RenderCountCase::renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const 796 { 797 const glw::Functions& gl = m_renderCtx.getFunctions(); 798 Sample sample; 799 deUint64 now = 0; 800 deUint64 prev = 0; 801 deUint8 buffer[4]; 802 803 // Stabilize 804 { 805 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 806 gl.enable(GL_DEPTH_TEST); 807 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 808 } 809 810 prev = deGetMicroseconds(); 811 812 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 813 gl.enable(GL_DEPTH_TEST); 814 815 render(occluder); 816 render(occluded, callcount); 817 818 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 819 820 now = deGetMicroseconds(); 821 822 sample.testTime = now - prev; 823 sample.baseTime = 0; 824 sample.nullTime = 0; 825 sample.workload = callcount; 826 827 return sample; 828 } 829 830 int RenderCountCase::calibrate (void) const 831 { 832 using namespace gls; 833 834 const glw::Functions& gl = m_renderCtx.getFunctions(); 835 TestLog& log = m_testCtx.getLog(); 836 837 const RenderData occluderGeometry (genOccluderGeometry(), m_renderCtx, log); 838 const RenderData occludedGeometry (genOccludedGeometry(), m_renderCtx, log); 839 840 TheilSenCalibrator calibrator (CalibratorParameters(20, // Initial workload 841 10, // Max iteration frames 842 20.0f, // Iteration shortcut threshold ms 843 20, // Max iterations 844 33.0f, // Target frame time 845 40.0f, // Frame time cap 846 1000.0f // Target measurement duration 847 )); 848 849 while (true) 850 { 851 switch(calibrator.getState()) 852 { 853 case TheilSenCalibrator::STATE_FINISHED: 854 logCalibrationInfo(m_testCtx.getLog(), calibrator); 855 return calibrator.getCallCount(); 856 857 case TheilSenCalibrator::STATE_MEASURE: 858 { 859 deUint8 buffer[4]; 860 deInt64 now; 861 deInt64 prev; 862 863 prev = deGetMicroseconds(); 864 865 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 866 gl.disable(GL_DEPTH_TEST); 867 868 render(occluderGeometry); 869 render(occludedGeometry, calibrator.getCallCount()); 870 871 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 872 873 now = deGetMicroseconds(); 874 875 calibrator.recordIteration(now - prev); 876 break; 877 } 878 879 case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS: 880 calibrator.recomputeParameters(); 881 break; 882 default: 883 DE_ASSERT(false); 884 return 1; 885 } 886 } 887 } 888 889 // Compares time/workload gradients of same geometry with and without depth testing 890 class RelativeChangeCase : public BaseCase 891 { 892 public: 893 RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 894 virtual ~RelativeChangeCase (void) {} 895 896 protected: 897 Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const; 898 899 virtual void logAnalysis (const vector<Sample>& samples); 900 901 private: 902 int calibrate (void) const; 903 }; 904 905 RelativeChangeCase::RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 906 : BaseCase (testCtx, renderCtx, name, desc) 907 { 908 } 909 910 int RelativeChangeCase::calibrate (void) const 911 { 912 using namespace gls; 913 914 const glw::Functions& gl = m_renderCtx.getFunctions(); 915 TestLog& log = m_testCtx.getLog(); 916 917 const RenderData geom (genOccludedGeometry(), m_renderCtx, log); 918 919 TheilSenCalibrator calibrator(CalibratorParameters( 20, // Initial workload 920 10, // Max iteration frames 921 20.0f, // Iteration shortcut threshold ms 922 20, // Max iterations 923 10.0f, // Target frame time 924 15.0f, // Frame time cap 925 1000.0f // Target measurement duration 926 )); 927 928 while (true) 929 { 930 switch(calibrator.getState()) 931 { 932 case TheilSenCalibrator::STATE_FINISHED: 933 logCalibrationInfo(m_testCtx.getLog(), calibrator); 934 return calibrator.getCallCount(); 935 936 case TheilSenCalibrator::STATE_MEASURE: 937 { 938 deUint8 buffer[4]; 939 const GLuint program = geom.m_program.getProgram(); 940 941 gl.useProgram(program); 942 gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), calibrator.getCallCount()); 943 944 const deInt64 prev = deGetMicroseconds(); 945 946 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 947 gl.disable(GL_DEPTH_TEST); 948 949 render(geom); 950 951 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 952 953 const deInt64 now = deGetMicroseconds(); 954 955 calibrator.recordIteration(now - prev); 956 break; 957 } 958 959 case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS: 960 calibrator.recomputeParameters(); 961 break; 962 default: 963 DE_ASSERT(false); 964 return 1; 965 } 966 } 967 } 968 969 Sample RelativeChangeCase::renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const 970 { 971 const glw::Functions& gl = m_renderCtx.getFunctions(); 972 const GLuint program = occluded.m_program.getProgram(); 973 Sample sample; 974 deUint64 now = 0; 975 deUint64 prev = 0; 976 deUint8 buffer[4]; 977 978 gl.useProgram(program); 979 gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload); 980 981 // Warmup (this workload seems to reduce variation in following workloads) 982 { 983 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 984 gl.disable(GL_DEPTH_TEST); 985 986 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 987 } 988 989 // Null time 990 { 991 prev = deGetMicroseconds(); 992 993 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 994 gl.disable(GL_DEPTH_TEST); 995 996 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 997 998 now = deGetMicroseconds(); 999 1000 sample.nullTime = now - prev; 1001 } 1002 1003 // Test time 1004 { 1005 prev = deGetMicroseconds(); 1006 1007 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1008 gl.enable(GL_DEPTH_TEST); 1009 1010 render(occluder); 1011 render(occluded); 1012 1013 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1014 1015 now = deGetMicroseconds(); 1016 1017 sample.testTime = now - prev; 1018 } 1019 1020 // Base time 1021 { 1022 prev = deGetMicroseconds(); 1023 1024 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1025 gl.disable(GL_DEPTH_TEST); 1026 1027 render(occluder); 1028 render(occluded); 1029 1030 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1031 1032 now = deGetMicroseconds(); 1033 1034 sample.baseTime = now - prev; 1035 } 1036 1037 sample.workload = 0; 1038 1039 return sample; 1040 } 1041 1042 void RelativeChangeCase::logAnalysis (const vector<Sample>& samples) 1043 { 1044 using namespace gls; 1045 1046 TestLog& log = m_testCtx.getLog(); 1047 1048 int maxWorkload = 0; 1049 1050 vector<Vec2> nullSamples (samples.size()); 1051 vector<Vec2> baseSamples (samples.size()); 1052 vector<Vec2> testSamples (samples.size()); 1053 1054 for (size_t ndx = 0; ndx < samples.size(); ndx++) 1055 { 1056 const Sample& sample = samples[ndx]; 1057 1058 nullSamples[ndx] = Vec2((float)sample.workload, (float)sample.nullTime); 1059 baseSamples[ndx] = Vec2((float)sample.workload, (float)sample.baseTime); 1060 testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime); 1061 1062 maxWorkload = de::max(maxWorkload, sample.workload); 1063 } 1064 1065 { 1066 const float confidence = 0.60f; 1067 1068 const LineParametersWithConfidence nullParam = theilSenSiegelLinearRegression(nullSamples, confidence); 1069 const LineParametersWithConfidence baseParam = theilSenSiegelLinearRegression(baseSamples, confidence); 1070 const LineParametersWithConfidence testParam = theilSenSiegelLinearRegression(testSamples, confidence); 1071 1072 if (!de::inRange(0.0f, nullParam.coefficientConfidenceLower, nullParam.coefficientConfidenceUpper)) 1073 { 1074 m_results.addResult(QP_TEST_RESULT_FAIL, "Constant operation sequence duration not constant"); 1075 log << TestLog::Message << "Constant operation sequence timing may vary as a function of workload. Result quality extremely low" << TestLog::EndMessage; 1076 } 1077 1078 if (de::inRange(0.0f, baseParam.coefficientConfidenceLower, baseParam.coefficientConfidenceUpper)) 1079 { 1080 m_results.addResult(QP_TEST_RESULT_FAIL, "Workload has no effect on duration"); 1081 log << TestLog::Message << "Workload factor has no effect on duration of sample (smart optimizer?)" << TestLog::EndMessage; 1082 } 1083 1084 log << TestLog::Section("Linear Regression", "Linear Regression"); 1085 log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage; 1086 1087 log << TestLog::Message << "Render time for empty scene was\n\t" 1088 << "[" << nullParam.offsetConfidenceLower << ", " << nullParam.offset << ", " << nullParam.offsetConfidenceUpper << "]us +" 1089 << "[" << nullParam.coefficientConfidenceLower << ", " << nullParam.coefficient << ", " << nullParam.coefficientConfidenceUpper << "]" 1090 << "us/workload" << TestLog::EndMessage; 1091 1092 log << TestLog::Message << "Render time for scene without depth test was\n\t" 1093 << "[" << baseParam.offsetConfidenceLower << ", " << baseParam.offset << ", " << baseParam.offsetConfidenceUpper << "]us +" 1094 << "[" << baseParam.coefficientConfidenceLower << ", " << baseParam.coefficient << ", " << baseParam.coefficientConfidenceUpper << "]" 1095 << "us/workload" << TestLog::EndMessage; 1096 1097 log << TestLog::Message << "Render time for scene with depth test was\n\t" 1098 << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset << ", " << testParam.offsetConfidenceUpper << "]us +" 1099 << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]" 1100 << "us/workload" << TestLog::EndMessage; 1101 1102 log << TestLog::EndSection; 1103 1104 if (de::inRange(0.0f, testParam.coefficientConfidenceLower, testParam.coefficientConfidenceUpper)) 1105 { 1106 log << TestLog::Message << "Test duration not dependent on culled workload" << TestLog::EndMessage; 1107 m_results.addResult(QP_TEST_RESULT_PASS, "0.0"); 1108 } 1109 else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25) 1110 { 1111 log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage; 1112 m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); 1113 } 1114 else if (baseParam.coefficientConfidenceLower < baseParam.coefficientConfidenceUpper*0.25) 1115 { 1116 log << TestLog::Message << "Coefficient confidence range for base render time is extremely large, cannot give reliable result" << TestLog::EndMessage; 1117 m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); 1118 } 1119 else 1120 { 1121 log << TestLog::Message << "Test duration is dependent on culled workload" << TestLog::EndMessage; 1122 m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(de::abs(testParam.coefficient)/de::abs(baseParam.coefficient), 2)); 1123 } 1124 } 1125 } 1126 1127 // Speed of trivial culling 1128 class BaseCostCase : public RenderCountCase 1129 { 1130 public: 1131 BaseCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1132 : RenderCountCase (testCtx, renderCtx, name, desc) {} 1133 1134 ~BaseCostCase (void) {} 1135 1136 private: 1137 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1138 virtual ObjectData genOccludedGeometry (void) const { return Utils::variableQuad(0.8f); } 1139 1140 virtual void logDescription (void) 1141 { 1142 TestLog& log = m_testCtx.getLog(); 1143 1144 log << TestLog::Section("Description", "Test description"); 1145 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1146 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1147 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1148 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1149 log << TestLog::EndSection; 1150 } 1151 }; 1152 1153 // Gradient 1154 class GradientCostCase : public RenderCountCase 1155 { 1156 public: 1157 GradientCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float gradientDistance) 1158 : RenderCountCase (testCtx, renderCtx, name, desc) 1159 , m_gradientDistance (gradientDistance) 1160 { 1161 } 1162 1163 ~GradientCostCase (void) {} 1164 1165 private: 1166 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuadWithGradient(0.0f, 1.0f - m_gradientDistance); } 1167 virtual ObjectData genOccludedGeometry (void) const 1168 { 1169 return ObjectData(glu::makeVtxFragSources(Utils::getInstanceNoiseVertexShader(), Utils::getDepthAsRedFragmentShader()), Utils::getFullscreenQuadWithGradient(m_gradientDistance, 1.0f)); 1170 } 1171 1172 virtual void logDescription (void) 1173 { 1174 TestLog& log = m_testCtx.getLog(); 1175 1176 log << TestLog::Section("Description", "Test description"); 1177 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1178 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1179 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1180 log << TestLog::Message << "The quads are tilted so that the left edge of the occluded quad has a depth of 1.0 and the right edge of the occluding quad has a depth of 0.0." << TestLog::EndMessage; 1181 log << TestLog::Message << "The quads are spaced to have a depth difference of " << m_gradientDistance << " at all points." << TestLog::EndMessage; 1182 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1183 log << TestLog::EndSection; 1184 } 1185 1186 const float m_gradientDistance; 1187 }; 1188 1189 // Constant offset to frag depth in occluder 1190 class OccluderStaticFragDepthCostCase : public RenderCountCase 1191 { 1192 public: 1193 OccluderStaticFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1194 : RenderCountCase(testCtx, renderCtx, name, desc) 1195 { 1196 } 1197 1198 ~OccluderStaticFragDepthCostCase (void) {} 1199 1200 private: 1201 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } 1202 virtual ObjectData genOccludedGeometry (void) const { return Utils::fastQuad(0.8f); } 1203 1204 virtual void logDescription (void) 1205 { 1206 TestLog& log = m_testCtx.getLog(); 1207 1208 log << TestLog::Section("Description", "Test description"); 1209 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1210 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1211 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1212 log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; 1213 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1214 log << TestLog::EndSection; 1215 } 1216 }; 1217 1218 // Dynamic offset to frag depth in occluder 1219 class OccluderDynamicFragDepthCostCase : public RenderCountCase 1220 { 1221 public: 1222 OccluderDynamicFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1223 : RenderCountCase(testCtx, renderCtx, name, desc) 1224 { 1225 } 1226 1227 ~OccluderDynamicFragDepthCostCase (void) {} 1228 1229 private: 1230 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } 1231 virtual ObjectData genOccludedGeometry (void) const { return Utils::fastQuad(0.8f); } 1232 1233 virtual void logDescription (void) 1234 { 1235 TestLog& log = m_testCtx.getLog(); 1236 1237 log << TestLog::Section("Description", "Test description"); 1238 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1239 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1240 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1241 log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; 1242 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1243 log << TestLog::EndSection; 1244 } 1245 }; 1246 1247 // Constant offset to frag depth in occluder 1248 class OccludedStaticFragDepthCostCase : public RenderCountCase 1249 { 1250 public: 1251 OccludedStaticFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1252 : RenderCountCase(testCtx, renderCtx, name, desc) 1253 { 1254 } 1255 1256 ~OccludedStaticFragDepthCostCase (void) {} 1257 1258 private: 1259 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1260 virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } 1261 1262 virtual void logDescription (void) 1263 { 1264 TestLog& log = m_testCtx.getLog(); 1265 1266 log << TestLog::Section("Description", "Test description"); 1267 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1268 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1269 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1270 log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; 1271 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1272 log << TestLog::EndSection; 1273 } 1274 }; 1275 1276 // Dynamic offset to frag depth in occluder 1277 class OccludedDynamicFragDepthCostCase : public RenderCountCase 1278 { 1279 public: 1280 OccludedDynamicFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1281 : RenderCountCase(testCtx, renderCtx, name, desc) 1282 { 1283 } 1284 1285 ~OccludedDynamicFragDepthCostCase (void) {} 1286 1287 private: 1288 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1289 virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } 1290 1291 virtual void logDescription (void) 1292 { 1293 TestLog& log = m_testCtx.getLog(); 1294 1295 log << TestLog::Section("Description", "Test description"); 1296 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1297 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1298 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1299 log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; 1300 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1301 log << TestLog::EndSection; 1302 } 1303 }; 1304 1305 // Culling speed with slightly less trivial geometry 1306 class OccludingGeometryComplexityCostCase : public RenderCountCase 1307 { 1308 public: 1309 OccludingGeometryComplexityCostCase (TestContext& testCtx, 1310 const RenderContext& renderCtx, 1311 const char* name, 1312 const char* desc, 1313 int resolution, 1314 float xyNoise, 1315 float zNoise) 1316 : RenderCountCase (testCtx, renderCtx, name, desc) 1317 , m_resolution (resolution) 1318 , m_xyNoise (xyNoise) 1319 , m_zNoise (zNoise) 1320 { 1321 } 1322 1323 ~OccludingGeometryComplexityCostCase (void) {} 1324 1325 private: 1326 virtual ObjectData genOccluderGeometry (void) const 1327 { 1328 return ObjectData(Utils::getBaseShader(), 1329 Utils::getFullScreenGrid(m_resolution, 1330 deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed(), 1331 0.2f, 1332 m_zNoise, 1333 m_xyNoise)); 1334 } 1335 1336 virtual ObjectData genOccludedGeometry (void) const { return Utils::variableQuad(0.8f); } 1337 1338 virtual void logDescription (void) 1339 { 1340 TestLog& log = m_testCtx.getLog(); 1341 1342 log << TestLog::Section("Description", "Test description"); 1343 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1344 log << TestLog::Message << "Geometry consists of an occluding grid and an occluded fullsceen quad. The occluding geometry is rendered once, the occluded one is rendered repeatedly" << TestLog::EndMessage; 1345 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1346 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1347 log << TestLog::EndSection; 1348 } 1349 1350 const int m_resolution; 1351 const float m_xyNoise; 1352 const float m_zNoise; 1353 }; 1354 1355 1356 // Cases with varying workloads in the fragment shader 1357 class FragmentWorkloadCullCase : public RelativeChangeCase 1358 { 1359 public: 1360 FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 1361 virtual ~FragmentWorkloadCullCase (void) {} 1362 1363 private: 1364 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1365 1366 virtual void logDescription (void); 1367 }; 1368 1369 FragmentWorkloadCullCase::FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1370 : RelativeChangeCase (testCtx, renderCtx, name, desc) 1371 { 1372 } 1373 1374 void FragmentWorkloadCullCase::logDescription (void) 1375 { 1376 TestLog& log = m_testCtx.getLog(); 1377 1378 log << TestLog::Section("Description", "Test description"); 1379 log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage; 1380 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader," 1381 "the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1382 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1383 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1384 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1385 log << TestLog::EndSection; 1386 } 1387 1388 // Additional workload consists of texture lookups 1389 class FragmentTextureWorkloadCullCase : public FragmentWorkloadCullCase 1390 { 1391 public: 1392 FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 1393 virtual ~FragmentTextureWorkloadCullCase (void) {} 1394 1395 virtual void init (void); 1396 virtual void deinit (void); 1397 1398 private: 1399 typedef MovePtr<glu::Texture> TexPtr; 1400 1401 virtual ObjectData genOccludedGeometry (void) const 1402 { 1403 return ObjectData(Utils::getTextureWorkloadShader(), Utils::getFullscreenQuad(0.8f)); 1404 } 1405 1406 TexPtr m_texture; 1407 }; 1408 1409 FragmentTextureWorkloadCullCase::FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1410 : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) 1411 { 1412 } 1413 1414 void FragmentTextureWorkloadCullCase::init (void) 1415 { 1416 const glw::Functions& gl = m_renderCtx.getFunctions(); 1417 const int size = 128; 1418 const vector<deUint8> data (size*size*4, 255); 1419 1420 m_texture = MovePtr<glu::Texture>(new glu::Texture(gl)); 1421 1422 gl.bindTexture(GL_TEXTURE_2D, m_texture); 1423 gl.texImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size, size, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]); 1424 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1425 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1426 } 1427 1428 void FragmentTextureWorkloadCullCase::deinit (void) 1429 { 1430 m_texture.clear(); 1431 } 1432 1433 // Additional workload consists of arithmetic 1434 class FragmentArithmeticWorkloadCullCase : public FragmentWorkloadCullCase 1435 { 1436 public: 1437 FragmentArithmeticWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1438 : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) 1439 { 1440 } 1441 virtual ~FragmentArithmeticWorkloadCullCase (void) {} 1442 1443 private: 1444 virtual ObjectData genOccludedGeometry (void) const 1445 { 1446 return ObjectData(Utils::getArithmeticWorkloadShader(), Utils::getFullscreenQuad(0.8f)); 1447 } 1448 }; 1449 1450 // Contains dynamicly unused discard after a series of calculations 1451 class FragmentDiscardArithmeticWorkloadCullCase : public FragmentWorkloadCullCase 1452 { 1453 public: 1454 FragmentDiscardArithmeticWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1455 : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) 1456 { 1457 } 1458 1459 virtual ~FragmentDiscardArithmeticWorkloadCullCase (void) {} 1460 1461 private: 1462 virtual ObjectData genOccludedGeometry (void) const 1463 { 1464 return ObjectData(Utils::getArithmeticWorkloadDiscardShader(), Utils::getFullscreenQuad(0.8f)); 1465 } 1466 1467 virtual void logDescription (void) 1468 { 1469 TestLog& log = m_testCtx.getLog(); 1470 1471 log << TestLog::Section("Description", "Test description"); 1472 log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage; 1473 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader," 1474 "the second (occluded) contains significant fragment shader work and a discard that is never triggers but has a dynamic condition" << TestLog::EndMessage; 1475 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1476 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1477 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1478 log << TestLog::EndSection; 1479 } 1480 }; 1481 1482 // Discards fragments from the occluder in a grid pattern 1483 class PartialOccluderDiscardCullCase : public RelativeChangeCase 1484 { 1485 public: 1486 PartialOccluderDiscardCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, int gridsize) 1487 : RelativeChangeCase (testCtx, renderCtx, name, desc) 1488 , m_gridsize (gridsize) 1489 { 1490 } 1491 virtual ~PartialOccluderDiscardCullCase (void) {} 1492 1493 private: 1494 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getGridDiscardShader(m_gridsize), 0.2f); } 1495 virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } 1496 1497 virtual void logDescription (void) 1498 { 1499 TestLog& log = m_testCtx.getLog(); 1500 1501 log << TestLog::Section("Description", "Test description"); 1502 log << TestLog::Message << "Testing effects of partially discarded occluder on rendering time" << TestLog::EndMessage; 1503 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad discards half the " 1504 "fragments in a grid pattern, the second (partially occluded) contains significant fragment shader work" << TestLog::EndMessage; 1505 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1506 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1507 log << TestLog::Message << "Successfull early Z-testing should result in depth testing halving the render time" << TestLog::EndMessage; 1508 log << TestLog::EndSection; 1509 } 1510 1511 const int m_gridsize; 1512 }; 1513 1514 // Trivial occluder covering part of screen 1515 class PartialOccluderCullCase : public RelativeChangeCase 1516 { 1517 public: 1518 PartialOccluderCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float coverage) 1519 : RelativeChangeCase (testCtx, renderCtx, name, desc) 1520 , m_coverage (coverage) 1521 { 1522 } 1523 ~PartialOccluderCullCase (void) {} 1524 1525 private: 1526 virtual ObjectData genOccluderGeometry (void) const { return ObjectData(Utils::getBaseShader(), Utils::getPartScreenQuad(m_coverage, 0.2f)); } 1527 virtual ObjectData genOccludedGeometry (void) const {return Utils::slowQuad(0.8f); } 1528 1529 virtual void logDescription (void) 1530 { 1531 TestLog& log = m_testCtx.getLog(); 1532 1533 log << TestLog::Section("Description", "Test description"); 1534 log << TestLog::Message << "Testing effects of partial occluder on rendering time" << TestLog::EndMessage; 1535 log << TestLog::Message << "Geometry consists of two quads. The first (occluding) quad covers " << m_coverage*100.0f 1536 << "% of the screen, while the second (partially occluded, fullscreen) contains significant fragment shader work" << TestLog::EndMessage; 1537 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1538 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1539 log << TestLog::Message << "Successfull early Z-testing should result in render time increasing proportionally with unoccluded area" << TestLog::EndMessage; 1540 log << TestLog::EndSection; 1541 } 1542 1543 const float m_coverage; 1544 }; 1545 1546 // Constant offset to frag depth in occluder 1547 class StaticOccluderFragDepthCullCase : public RelativeChangeCase 1548 { 1549 public: 1550 StaticOccluderFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1551 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1552 { 1553 } 1554 1555 ~StaticOccluderFragDepthCullCase (void) {} 1556 1557 private: 1558 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } 1559 virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } 1560 1561 virtual void logDescription (void) 1562 { 1563 TestLog& log = m_testCtx.getLog(); 1564 1565 log << TestLog::Section("Description", "Test description"); 1566 log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage; 1567 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1568 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1569 log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; 1570 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1571 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1572 log << TestLog::EndSection; 1573 } 1574 }; 1575 1576 // Dynamic offset to frag depth in occluder 1577 class DynamicOccluderFragDepthCullCase : public RelativeChangeCase 1578 { 1579 public: 1580 DynamicOccluderFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1581 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1582 { 1583 } 1584 1585 ~DynamicOccluderFragDepthCullCase (void) {} 1586 1587 private: 1588 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } 1589 virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } 1590 1591 virtual void logDescription (void) 1592 { 1593 TestLog& log = m_testCtx.getLog(); 1594 1595 log << TestLog::Section("Description", "Test description"); 1596 log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage; 1597 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1598 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1599 log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; 1600 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1601 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1602 log << TestLog::EndSection; 1603 } 1604 }; 1605 1606 // Constant offset to frag depth in occluded 1607 class StaticOccludedFragDepthCullCase : public RelativeChangeCase 1608 { 1609 public: 1610 StaticOccludedFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1611 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1612 { 1613 } 1614 1615 ~StaticOccludedFragDepthCullCase (void) {} 1616 1617 private: 1618 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1619 virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthArithmeticWorkloadFragmentShader(), 0.2f); } 1620 1621 virtual void logDescription (void) 1622 { 1623 TestLog& log = m_testCtx.getLog(); 1624 1625 log << TestLog::Section("Description", "Test description"); 1626 log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage; 1627 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1628 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1629 log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; 1630 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1631 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1632 log << TestLog::EndSection; 1633 } 1634 }; 1635 1636 // Dynamic offset to frag depth in occluded 1637 class DynamicOccludedFragDepthCullCase : public RelativeChangeCase 1638 { 1639 public: 1640 DynamicOccludedFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1641 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1642 { 1643 } 1644 1645 ~DynamicOccludedFragDepthCullCase (void) {} 1646 1647 private: 1648 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1649 virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthArithmeticWorkloadFragmentShader(), 0.2f); } 1650 1651 virtual void logDescription (void) 1652 { 1653 TestLog& log = m_testCtx.getLog(); 1654 1655 log << TestLog::Section("Description", "Test description"); 1656 log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage; 1657 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1658 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1659 log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; 1660 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1661 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1662 log << TestLog::EndSection; 1663 } 1664 }; 1665 1666 // Dynamic offset to frag depth in occluded 1667 class ReversedDepthOrderCullCase : public RelativeChangeCase 1668 { 1669 public: 1670 ReversedDepthOrderCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1671 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1672 { 1673 } 1674 1675 ~ReversedDepthOrderCullCase (void) {} 1676 1677 private: 1678 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1679 virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } 1680 1681 virtual void logDescription (void) 1682 { 1683 TestLog& log = m_testCtx.getLog(); 1684 1685 log << TestLog::Section("Description", "Test description"); 1686 log << TestLog::Message << "Testing effects of of back first rendering order on culling efficiency" << TestLog::EndMessage; 1687 log << TestLog::Message << "Geometry consists of two fullscreen quads. The second (occluding) quad is trivial, while the first (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1688 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1689 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1690 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1691 log << TestLog::EndSection; 1692 } 1693 1694 // Rendering order of occluder & occluded is reversed, otherwise identical to parent version 1695 Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const 1696 { 1697 const glw::Functions& gl = m_renderCtx.getFunctions(); 1698 const GLuint program = occluded.m_program.getProgram(); 1699 Sample sample; 1700 deUint64 now = 0; 1701 deUint64 prev = 0; 1702 deUint8 buffer[4]; 1703 1704 gl.useProgram(program); 1705 gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload); 1706 1707 // Warmup (this workload seems to reduce variation in following workloads) 1708 { 1709 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1710 gl.disable(GL_DEPTH_TEST); 1711 1712 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1713 } 1714 1715 // Null time 1716 { 1717 prev = deGetMicroseconds(); 1718 1719 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1720 gl.disable(GL_DEPTH_TEST); 1721 1722 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1723 1724 now = deGetMicroseconds(); 1725 1726 sample.nullTime = now - prev; 1727 } 1728 1729 // Test time 1730 { 1731 prev = deGetMicroseconds(); 1732 1733 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1734 gl.enable(GL_DEPTH_TEST); 1735 1736 render(occluded); 1737 render(occluder); 1738 1739 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1740 1741 now = deGetMicroseconds(); 1742 1743 sample.testTime = now - prev; 1744 } 1745 1746 // Base time 1747 { 1748 prev = deGetMicroseconds(); 1749 1750 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1751 gl.disable(GL_DEPTH_TEST); 1752 1753 render(occluded); 1754 render(occluder); 1755 1756 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1757 1758 now = deGetMicroseconds(); 1759 1760 sample.baseTime = now - prev; 1761 } 1762 1763 sample.workload = 0; 1764 1765 return sample; 1766 } 1767 }; 1768 1769 } // Anonymous 1770 1771 DepthTests::DepthTests (Context& context) 1772 : TestCaseGroup (context, "depth", "Depth culling performance") 1773 { 1774 } 1775 1776 void DepthTests::init (void) 1777 { 1778 TestContext& testCtx = m_context.getTestContext(); 1779 const RenderContext& renderCtx = m_context.getRenderContext(); 1780 1781 { 1782 tcu::TestCaseGroup* const cullEfficiencyGroup = new tcu::TestCaseGroup(m_testCtx, "cull_efficiency", "Fragment cull efficiency"); 1783 1784 addChild(cullEfficiencyGroup); 1785 1786 { 1787 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "workload", "Workload"); 1788 1789 cullEfficiencyGroup->addChild(group); 1790 1791 group->addChild(new FragmentTextureWorkloadCullCase( testCtx, renderCtx, "workload_texture", "Fragment shader with texture lookup workload")); 1792 group->addChild(new FragmentArithmeticWorkloadCullCase( testCtx, renderCtx, "workload_arithmetic", "Fragment shader with arithmetic workload")); 1793 group->addChild(new FragmentDiscardArithmeticWorkloadCullCase( testCtx, renderCtx, "workload_arithmetic_discard", "Fragment shader that may discard with arithmetic workload")); 1794 } 1795 1796 { 1797 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_discard", "Discard"); 1798 1799 cullEfficiencyGroup->addChild(group); 1800 1801 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_256", "Parts of occluder geometry discarded", 256)); 1802 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_128", "Parts of occluder geometry discarded", 128)); 1803 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_64", "Parts of occluder geometry discarded", 64)); 1804 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_32", "Parts of occluder geometry discarded", 32)); 1805 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_16", "Parts of occluder geometry discarded", 16)); 1806 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_8", "Parts of occluder geometry discarded", 8)); 1807 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_4", "Parts of occluder geometry discarded", 4)); 1808 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_2", "Parts of occluder geometry discarded", 2)); 1809 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_1", "Parts of occluder geometry discarded", 1)); 1810 } 1811 1812 { 1813 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "partial_coverage", "Partial Coverage"); 1814 1815 cullEfficiencyGroup->addChild(group); 1816 1817 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "100", "Occluder covering only part of occluded geometry", 1.00f)); 1818 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "099", "Occluder covering only part of occluded geometry", 0.99f)); 1819 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "095", "Occluder covering only part of occluded geometry", 0.95f)); 1820 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "090", "Occluder covering only part of occluded geometry", 0.90f)); 1821 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "080", "Occluder covering only part of occluded geometry", 0.80f)); 1822 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "070", "Occluder covering only part of occluded geometry", 0.70f)); 1823 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "050", "Occluder covering only part of occluded geometry", 0.50f)); 1824 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "025", "Occluder covering only part of occluded geometry", 0.25f)); 1825 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "010", "Occluder covering only part of occluded geometry", 0.10f)); 1826 } 1827 1828 { 1829 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Partial Coverage"); 1830 1831 cullEfficiencyGroup->addChild(group); 1832 1833 group->addChild(new StaticOccluderFragDepthCullCase( testCtx, renderCtx, "occluder_static", "")); 1834 group->addChild(new DynamicOccluderFragDepthCullCase(testCtx, renderCtx, "occluder_dynamic", "")); 1835 group->addChild(new StaticOccludedFragDepthCullCase( testCtx, renderCtx, "occluded_static", "")); 1836 group->addChild(new DynamicOccludedFragDepthCullCase(testCtx, renderCtx, "occluded_dynamic", "")); 1837 } 1838 1839 { 1840 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "order", "Rendering order"); 1841 1842 cullEfficiencyGroup->addChild(group); 1843 1844 group->addChild(new ReversedDepthOrderCullCase(testCtx, renderCtx, "reversed", "Back to front rendering order")); 1845 } 1846 } 1847 1848 { 1849 tcu::TestCaseGroup* const testCostGroup = new tcu::TestCaseGroup(m_testCtx, "culled_pixel_cost", "Fragment cull efficiency"); 1850 1851 addChild(testCostGroup); 1852 1853 { 1854 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "gradient", "Gradients with small depth differences"); 1855 1856 testCostGroup->addChild(group); 1857 1858 group->addChild(new BaseCostCase(testCtx, renderCtx, "flat", "")); 1859 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_050", "", 0.50f)); 1860 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_010", "", 0.10f)); 1861 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_005", "", 0.05f)); 1862 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_002", "", 0.02f)); 1863 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_001", "", 0.01f)); 1864 } 1865 1866 { 1867 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_geometry", "Occluders with varying geometry complexity"); 1868 1869 testCostGroup->addChild(group); 1870 1871 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_5", "", 5, 0.0f, 0.0f)); 1872 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_15", "", 15, 0.0f, 0.0f)); 1873 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_25", "", 25, 0.0f, 0.0f)); 1874 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_50", "", 50, 0.0f, 0.0f)); 1875 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_100", "", 100, 0.0f, 0.0f)); 1876 1877 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_5", "", 5, 1.0f/5.0f, 0.0f)); 1878 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_15", "", 15, 1.0f/15.0f, 0.0f)); 1879 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_25", "", 25, 1.0f/25.0f, 0.0f)); 1880 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_50", "", 50, 1.0f/50.0f, 0.0f)); 1881 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_100", "", 100, 1.0f/100.0f, 0.0f)); 1882 1883 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_5", "", 5, 0.0f, 0.2f)); 1884 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_15", "", 15, 0.0f, 0.2f)); 1885 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_25", "", 25, 0.0f, 0.2f)); 1886 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_50", "", 50, 0.0f, 0.2f)); 1887 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_100", "", 100, 0.0f, 0.2f)); 1888 1889 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_5", "", 5, 1.0f/5.0f, 0.2f)); 1890 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_15", "", 15, 1.0f/15.0f, 0.2f)); 1891 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_25", "", 25, 1.0f/25.0f, 0.2f)); 1892 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_50", "", 50, 1.0f/50.0f, 0.2f)); 1893 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_100", "", 100, 1.0f/100.0f, 0.2f)); 1894 } 1895 1896 { 1897 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Modifying gl_FragDepth"); 1898 1899 testCostGroup->addChild(group); 1900 1901 group->addChild(new OccluderStaticFragDepthCostCase( testCtx, renderCtx, "occluder_static", "")); 1902 group->addChild(new OccluderDynamicFragDepthCostCase(testCtx, renderCtx, "occluder_dynamic", "")); 1903 group->addChild(new OccludedStaticFragDepthCostCase( testCtx, renderCtx, "occluded_static", "")); 1904 group->addChild(new OccludedDynamicFragDepthCostCase(testCtx, renderCtx, "occluded_dynamic", "")); 1905 } 1906 } 1907 } 1908 1909 } // Performance 1910 } // gles3 1911 } // deqp 1912