1 /*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL ES 3.0 Module 3 * ------------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Buffer data upload performance tests. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "es3pBufferDataUploadTests.hpp" 25 #include "glsCalibration.hpp" 26 #include "tcuTestLog.hpp" 27 #include "tcuVectorUtil.hpp" 28 #include "tcuSurface.hpp" 29 #include "tcuCPUWarmup.hpp" 30 #include "tcuRenderTarget.hpp" 31 #include "gluRenderContext.hpp" 32 #include "gluShaderProgram.hpp" 33 #include "gluStrUtil.hpp" 34 #include "gluPixelTransfer.hpp" 35 #include "gluObjectWrapper.hpp" 36 #include "glwFunctions.hpp" 37 #include "glwEnums.hpp" 38 #include "deClock.h" 39 #include "deMath.h" 40 #include "deStringUtil.hpp" 41 #include "deRandom.hpp" 42 #include "deMemory.h" 43 #include "deThread.h" 44 45 #include <algorithm> 46 #include <iomanip> 47 #include <limits> 48 49 namespace deqp 50 { 51 namespace gles3 52 { 53 namespace Performance 54 { 55 namespace 56 { 57 58 using gls::theilSenSiegelLinearRegression; 59 using gls::LineParametersWithConfidence; 60 61 static const char* const s_dummyVertexShader = "#version 300 es\n" 62 "in highp vec4 a_position;\n" 63 "void main (void)\n" 64 "{\n" 65 " gl_Position = a_position;\n" 66 "}\n"; 67 68 static const char* const s_dummyFragnentShader = "#version 300 es\n" 69 "layout(location = 0) out mediump vec4 dEQP_FragColor;\n" 70 "void main (void)\n" 71 "{\n" 72 " dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n" 73 "}\n"; 74 75 static const char* const s_colorVertexShader = "#version 300 es\n" 76 "in highp vec4 a_position;\n" 77 "in highp vec4 a_color;\n" 78 "out highp vec4 v_color;\n" 79 "void main (void)\n" 80 "{\n" 81 " gl_Position = a_position;\n" 82 " v_color = a_color;\n" 83 "}\n"; 84 85 static const char* const s_colorFragmentShader = "#version 300 es\n" 86 "layout(location = 0) out mediump vec4 dEQP_FragColor;\n" 87 "in mediump vec4 v_color;\n" 88 "void main (void)\n" 89 "{\n" 90 " dEQP_FragColor = v_color;\n" 91 "}\n"; 92 93 template <typename TrueType, int cond> 94 struct EnableIf 95 { 96 typedef TrueType Type; 97 }; 98 99 template <typename TrueType> 100 struct EnableIf<TrueType, 0> 101 { 102 }; 103 104 template <typename TrueType, int cond> 105 struct EnableIfNot 106 { 107 }; 108 109 template <typename TrueType> 110 struct EnableIfNot<TrueType, 0> 111 { 112 typedef TrueType Type; 113 }; 114 115 struct SingleOperationDuration 116 { 117 deUint64 totalDuration; 118 deUint64 fitResponseDuration; // used for fitting 119 }; 120 121 struct MapBufferRangeDuration 122 { 123 deUint64 mapDuration; 124 deUint64 unmapDuration; 125 deUint64 writeDuration; 126 deUint64 allocDuration; 127 deUint64 totalDuration; 128 129 deUint64 fitResponseDuration; 130 }; 131 132 struct MapBufferRangeDurationNoAlloc 133 { 134 deUint64 mapDuration; 135 deUint64 unmapDuration; 136 deUint64 writeDuration; 137 deUint64 totalDuration; 138 139 deUint64 fitResponseDuration; 140 }; 141 142 struct MapBufferRangeFlushDuration 143 { 144 deUint64 mapDuration; 145 deUint64 unmapDuration; 146 deUint64 writeDuration; 147 deUint64 flushDuration; 148 deUint64 allocDuration; 149 deUint64 totalDuration; 150 151 deUint64 fitResponseDuration; 152 }; 153 154 struct MapBufferRangeFlushDurationNoAlloc 155 { 156 deUint64 mapDuration; 157 deUint64 unmapDuration; 158 deUint64 writeDuration; 159 deUint64 flushDuration; 160 deUint64 totalDuration; 161 162 deUint64 fitResponseDuration; 163 }; 164 165 struct RenderReadDuration 166 { 167 deUint64 renderDuration; 168 deUint64 readDuration; 169 deUint64 renderReadDuration; 170 deUint64 totalDuration; 171 172 deUint64 fitResponseDuration; 173 }; 174 175 struct UnrelatedUploadRenderReadDuration 176 { 177 deUint64 renderDuration; 178 deUint64 readDuration; 179 deUint64 renderReadDuration; 180 deUint64 totalDuration; 181 182 deUint64 fitResponseDuration; 183 }; 184 185 struct UploadRenderReadDuration 186 { 187 deUint64 uploadDuration; 188 deUint64 renderDuration; 189 deUint64 readDuration; 190 deUint64 totalDuration; 191 deUint64 renderReadDuration; 192 193 deUint64 fitResponseDuration; 194 }; 195 196 struct UploadRenderReadDurationWithUnrelatedUploadSize 197 { 198 deUint64 uploadDuration; 199 deUint64 renderDuration; 200 deUint64 readDuration; 201 deUint64 totalDuration; 202 deUint64 renderReadDuration; 203 204 deUint64 fitResponseDuration; 205 }; 206 207 struct RenderUploadRenderReadDuration 208 { 209 deUint64 firstRenderDuration; 210 deUint64 uploadDuration; 211 deUint64 secondRenderDuration; 212 deUint64 readDuration; 213 deUint64 totalDuration; 214 deUint64 renderReadDuration; 215 216 deUint64 fitResponseDuration; 217 }; 218 219 template <typename SampleT> 220 struct UploadSampleResult 221 { 222 typedef SampleT SampleType; 223 224 int bufferSize; 225 int allocatedSize; 226 int writtenSize; 227 SampleType duration; 228 }; 229 230 template <typename SampleT> 231 struct RenderSampleResult 232 { 233 typedef SampleT SampleType; 234 235 int uploadedDataSize; 236 int renderDataSize; 237 int unrelatedDataSize; 238 int numVertices; 239 SampleT duration; 240 }; 241 242 struct SingleOperationStatistics 243 { 244 float minTime; 245 float maxTime; 246 float medianTime; 247 float min2DecileTime; // !< minimum value in the 2nd decile 248 float max9DecileTime; // !< maximum value in the 9th decile 249 }; 250 251 struct SingleCallStatistics 252 { 253 SingleOperationStatistics result; 254 255 float medianRate; 256 float maxDiffTime; 257 float maxDiff9DecileTime; 258 float medianDiffTime; 259 260 float maxRelDiffTime; 261 float max9DecileRelDiffTime; 262 float medianRelDiffTime; 263 }; 264 265 struct MapCallStatistics 266 { 267 SingleOperationStatistics map; 268 SingleOperationStatistics unmap; 269 SingleOperationStatistics write; 270 SingleOperationStatistics alloc; 271 SingleOperationStatistics result; 272 273 float medianRate; 274 float maxDiffTime; 275 float maxDiff9DecileTime; 276 float medianDiffTime; 277 278 float maxRelDiffTime; 279 float max9DecileRelDiffTime; 280 float medianRelDiffTime; 281 }; 282 283 struct MapFlushCallStatistics 284 { 285 SingleOperationStatistics map; 286 SingleOperationStatistics unmap; 287 SingleOperationStatistics write; 288 SingleOperationStatistics flush; 289 SingleOperationStatistics alloc; 290 SingleOperationStatistics result; 291 292 float medianRate; 293 float maxDiffTime; 294 float maxDiff9DecileTime; 295 float medianDiffTime; 296 297 float maxRelDiffTime; 298 float max9DecileRelDiffTime; 299 float medianRelDiffTime; 300 }; 301 302 struct RenderReadStatistics 303 { 304 SingleOperationStatistics render; 305 SingleOperationStatistics read; 306 SingleOperationStatistics result; 307 SingleOperationStatistics total; 308 309 float medianRate; 310 float maxDiffTime; 311 float maxDiff9DecileTime; 312 float medianDiffTime; 313 314 float maxRelDiffTime; 315 float max9DecileRelDiffTime; 316 float medianRelDiffTime; 317 }; 318 319 struct UploadRenderReadStatistics 320 { 321 SingleOperationStatistics upload; 322 SingleOperationStatistics render; 323 SingleOperationStatistics read; 324 SingleOperationStatistics result; 325 SingleOperationStatistics total; 326 327 float medianRate; 328 float maxDiffTime; 329 float maxDiff9DecileTime; 330 float medianDiffTime; 331 332 float maxRelDiffTime; 333 float max9DecileRelDiffTime; 334 float medianRelDiffTime; 335 }; 336 337 struct RenderUploadRenderReadStatistics 338 { 339 SingleOperationStatistics firstRender; 340 SingleOperationStatistics upload; 341 SingleOperationStatistics secondRender; 342 SingleOperationStatistics read; 343 SingleOperationStatistics result; 344 SingleOperationStatistics total; 345 346 float medianRate; 347 float maxDiffTime; 348 float maxDiff9DecileTime; 349 float medianDiffTime; 350 351 float maxRelDiffTime; 352 float max9DecileRelDiffTime; 353 float medianRelDiffTime; 354 }; 355 356 template <typename T> 357 struct SampleTypeTraits 358 { 359 }; 360 361 template <> 362 struct SampleTypeTraits<SingleOperationDuration> 363 { 364 typedef SingleCallStatistics StatsType; 365 366 enum { HAS_MAP_STATS = 0 }; 367 enum { HAS_UNMAP_STATS = 0 }; 368 enum { HAS_WRITE_STATS = 0 }; 369 enum { HAS_FLUSH_STATS = 0 }; 370 enum { HAS_ALLOC_STATS = 0 }; 371 enum { LOG_CONTRIBUTIONS = 0 }; 372 }; 373 374 template <> 375 struct SampleTypeTraits<MapBufferRangeDuration> 376 { 377 typedef MapCallStatistics StatsType; 378 379 enum { HAS_MAP_STATS = 1 }; 380 enum { HAS_UNMAP_STATS = 1 }; 381 enum { HAS_WRITE_STATS = 1 }; 382 enum { HAS_FLUSH_STATS = 0 }; 383 enum { HAS_ALLOC_STATS = 1 }; 384 enum { LOG_CONTRIBUTIONS = 1 }; 385 }; 386 387 template <> 388 struct SampleTypeTraits<MapBufferRangeDurationNoAlloc> 389 { 390 typedef MapCallStatistics StatsType; 391 392 enum { HAS_MAP_STATS = 1 }; 393 enum { HAS_UNMAP_STATS = 1 }; 394 enum { HAS_WRITE_STATS = 1 }; 395 enum { HAS_FLUSH_STATS = 0 }; 396 enum { HAS_ALLOC_STATS = 0 }; 397 enum { LOG_CONTRIBUTIONS = 1 }; 398 }; 399 400 template <> 401 struct SampleTypeTraits<MapBufferRangeFlushDuration> 402 { 403 typedef MapFlushCallStatistics StatsType; 404 405 enum { HAS_MAP_STATS = 1 }; 406 enum { HAS_UNMAP_STATS = 1 }; 407 enum { HAS_WRITE_STATS = 1 }; 408 enum { HAS_FLUSH_STATS = 1 }; 409 enum { HAS_ALLOC_STATS = 1 }; 410 enum { LOG_CONTRIBUTIONS = 1 }; 411 }; 412 413 template <> 414 struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc> 415 { 416 typedef MapFlushCallStatistics StatsType; 417 418 enum { HAS_MAP_STATS = 1 }; 419 enum { HAS_UNMAP_STATS = 1 }; 420 enum { HAS_WRITE_STATS = 1 }; 421 enum { HAS_FLUSH_STATS = 1 }; 422 enum { HAS_ALLOC_STATS = 0 }; 423 enum { LOG_CONTRIBUTIONS = 1 }; 424 }; 425 426 template <> 427 struct SampleTypeTraits<RenderReadDuration> 428 { 429 typedef RenderReadStatistics StatsType; 430 431 enum { HAS_RENDER_STATS = 1 }; 432 enum { HAS_READ_STATS = 1 }; 433 enum { HAS_UPLOAD_STATS = 0 }; 434 enum { HAS_TOTAL_STATS = 1 }; 435 enum { HAS_FIRST_RENDER_STATS = 0 }; 436 enum { HAS_SECOND_RENDER_STATS = 0 }; 437 438 enum { LOG_CONTRIBUTIONS = 1 }; 439 }; 440 441 template <> 442 struct SampleTypeTraits<UnrelatedUploadRenderReadDuration> 443 { 444 typedef RenderReadStatistics StatsType; 445 446 enum { HAS_RENDER_STATS = 1 }; 447 enum { HAS_READ_STATS = 1 }; 448 enum { HAS_UPLOAD_STATS = 0 }; 449 enum { HAS_TOTAL_STATS = 1 }; 450 enum { HAS_FIRST_RENDER_STATS = 0 }; 451 enum { HAS_SECOND_RENDER_STATS = 0 }; 452 453 enum { LOG_CONTRIBUTIONS = 1 }; 454 }; 455 456 template <> 457 struct SampleTypeTraits<UploadRenderReadDuration> 458 { 459 typedef UploadRenderReadStatistics StatsType; 460 461 enum { HAS_RENDER_STATS = 1 }; 462 enum { HAS_READ_STATS = 1 }; 463 enum { HAS_UPLOAD_STATS = 1 }; 464 enum { HAS_TOTAL_STATS = 1 }; 465 enum { HAS_FIRST_RENDER_STATS = 0 }; 466 enum { HAS_SECOND_RENDER_STATS = 0 }; 467 468 enum { LOG_CONTRIBUTIONS = 1 }; 469 enum { LOG_UNRELATED_UPLOAD_SIZE = 0 }; 470 }; 471 472 template <> 473 struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize> 474 { 475 typedef UploadRenderReadStatistics StatsType; 476 477 enum { HAS_RENDER_STATS = 1 }; 478 enum { HAS_READ_STATS = 1 }; 479 enum { HAS_UPLOAD_STATS = 1 }; 480 enum { HAS_TOTAL_STATS = 1 }; 481 enum { HAS_FIRST_RENDER_STATS = 0 }; 482 enum { HAS_SECOND_RENDER_STATS = 0 }; 483 484 enum { LOG_CONTRIBUTIONS = 1 }; 485 enum { LOG_UNRELATED_UPLOAD_SIZE = 1 }; 486 }; 487 488 template <> 489 struct SampleTypeTraits<RenderUploadRenderReadDuration> 490 { 491 typedef RenderUploadRenderReadStatistics StatsType; 492 493 enum { HAS_RENDER_STATS = 0 }; 494 enum { HAS_READ_STATS = 1 }; 495 enum { HAS_UPLOAD_STATS = 1 }; 496 enum { HAS_TOTAL_STATS = 1 }; 497 enum { HAS_FIRST_RENDER_STATS = 1 }; 498 enum { HAS_SECOND_RENDER_STATS = 1 }; 499 500 enum { LOG_CONTRIBUTIONS = 1 }; 501 enum { LOG_UNRELATED_UPLOAD_SIZE = 1 }; 502 }; 503 504 struct UploadSampleAnalyzeResult 505 { 506 float transferRateMedian; 507 float transferRateAtRange; 508 float transferRateAtInfinity; 509 }; 510 511 struct RenderSampleAnalyzeResult 512 { 513 float renderRateMedian; 514 float renderRateAtRange; 515 float renderRateAtInfinity; 516 }; 517 518 class UnmapFailureError : public std::exception 519 { 520 public: 521 UnmapFailureError (void) : std::exception() {} 522 }; 523 524 static std::string getHumanReadableByteSize (int numBytes) 525 { 526 std::ostringstream buf; 527 528 if (numBytes < 1024) 529 buf << numBytes << " byte(s)"; 530 else if (numBytes < 1024 * 1024) 531 buf << de::floatToString(numBytes/1024.0f, 1) << " KiB"; 532 else 533 buf << de::floatToString(numBytes/1024.0f/1024.0f, 1) << " MiB"; 534 535 return buf.str(); 536 } 537 538 static deUint64 medianTimeMemcpy (void* dst, const void* src, int numBytes) 539 { 540 // Time used by memcpy is assumed to be asymptotically linear 541 542 // With large numBytes, the probability of context switch or other random 543 // event is high. Apply memcpy in parts and report how much time would 544 // memcpy have used with the median transfer rate. 545 546 // Less than 1MiB, no need to do anything special 547 if (numBytes < 1048576) 548 { 549 deUint64 startTime; 550 deUint64 endTime; 551 552 deYield(); 553 554 startTime = deGetMicroseconds(); 555 deMemcpy(dst, src, numBytes); 556 endTime = deGetMicroseconds(); 557 558 return endTime - startTime; 559 } 560 else 561 { 562 // Do memcpy in multiple parts 563 564 const int numSections = 5; 565 const int sectionAlign = 16; 566 567 int sectionStarts[numSections+1]; 568 int sectionLens[numSections]; 569 deUint64 sectionTimes[numSections]; 570 deUint64 medianTime; 571 deUint64 bestTime = 0; 572 573 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx) 574 sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign); 575 sectionStarts[numSections] = numBytes; 576 577 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx) 578 sectionLens[sectionNdx] = sectionStarts[sectionNdx+1] - sectionStarts[sectionNdx]; 579 580 // Memcpy is usually called after mapbuffer range which may take 581 // a lot of time. To prevent power management from kicking in during 582 // copy, warm up more. 583 { 584 deYield(); 585 tcu::warmupCPU(); 586 deYield(); 587 } 588 589 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx) 590 { 591 deUint64 startTime; 592 deUint64 endTime; 593 594 startTime = deGetMicroseconds(); 595 deMemcpy((deUint8*)dst + sectionStarts[sectionNdx], (const deUint8*)src + sectionStarts[sectionNdx], sectionLens[sectionNdx]); 596 endTime = deGetMicroseconds(); 597 598 sectionTimes[sectionNdx] = endTime - startTime; 599 600 if (!bestTime || sectionTimes[sectionNdx] < bestTime) 601 bestTime = sectionTimes[sectionNdx]; 602 603 // Detect if write takes 50% longer than it should, and warm up if that happened 604 if (sectionNdx != numSections-1 && (float)sectionTimes[sectionNdx] > 1.5f * bestTime) 605 { 606 deYield(); 607 tcu::warmupCPU(); 608 deYield(); 609 } 610 } 611 612 std::sort(sectionTimes, sectionTimes + numSections); 613 614 if ((numSections % 2) == 0) 615 medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2; 616 else 617 medianTime = sectionTimes[numSections / 2]; 618 619 return medianTime*numSections; 620 } 621 } 622 623 static float dummyCalculation (float initial, int workSize) 624 { 625 float a = initial; 626 int b = 123; 627 628 for (int ndx = 0; ndx < workSize; ++ndx) 629 { 630 a = deFloatCos(a + (float)b); 631 b = (b + 63) % 107 + de::abs((int)(a*10.0f)); 632 } 633 634 return a + (float)b; 635 } 636 637 static void busyWait (int microseconds) 638 { 639 const deUint64 maxSingleWaitTime = 1000; // 1ms 640 const deUint64 endTime = deGetMicroseconds() + microseconds; 641 float dummy = *tcu::warmupCPUInternal::g_dummy.m_v; 642 int workSize = 500; 643 644 // exponentially increase work, cap to 1ms 645 while (deGetMicroseconds() < endTime) 646 { 647 const deUint64 startTime = deGetMicroseconds(); 648 deUint64 totalTime; 649 650 dummy = dummyCalculation(dummy, workSize); 651 652 totalTime = deGetMicroseconds() - startTime; 653 654 if (totalTime >= maxSingleWaitTime) 655 break; 656 else 657 workSize *= 2; 658 } 659 660 // "wait" 661 while (deGetMicroseconds() < endTime) 662 dummy = dummyCalculation(dummy, workSize); 663 664 *tcu::warmupCPUInternal::g_dummy.m_v = dummy; 665 } 666 667 // Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1] 668 template <typename T> 669 static float linearSample (const std::vector<T>& values, float position) 670 { 671 DE_ASSERT(position >= 0.0f); 672 DE_ASSERT(position <= 1.0f); 673 674 const float floatNdx = ((int)values.size() - 1) * position; 675 const int lowerNdx = (int)deFloatFloor(floatNdx); 676 const int higherNdx = lowerNdx + 1; 677 const float interpolationFactor = floatNdx - (float)lowerNdx; 678 679 DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size()); 680 DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size()); 681 DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f); 682 683 return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor); 684 } 685 686 template <typename T> 687 SingleOperationStatistics calculateSingleOperationStatistics (const std::vector<T>& samples, deUint64 T::SampleType::*target) 688 { 689 SingleOperationStatistics stats; 690 std::vector<deUint64> values(samples.size()); 691 692 for (int ndx = 0; ndx < (int)samples.size(); ++ndx) 693 values[ndx] = samples[ndx].duration.*target; 694 695 std::sort(values.begin(), values.end()); 696 697 stats.minTime = (float)values.front(); 698 stats.maxTime = (float)values.back(); 699 stats.medianTime = linearSample(values, 0.5f); 700 stats.min2DecileTime = linearSample(values, 0.1f); 701 stats.max9DecileTime = linearSample(values, 0.9f); 702 703 return stats; 704 } 705 706 template <typename StatisticsType, typename SampleType> 707 void calculateBasicStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples, int SampleType::*predictor) 708 { 709 std::vector<deUint64> values(samples.size()); 710 711 for (int ndx = 0; ndx < (int)samples.size(); ++ndx) 712 values[ndx] = samples[ndx].duration.fitResponseDuration; 713 714 // median rate 715 { 716 std::vector<float> processingRates(samples.size()); 717 718 for (int ndx = 0; ndx < (int)samples.size(); ++ndx) 719 { 720 const float timeInSeconds = values[ndx] / 1000.0f / 1000.0f; 721 processingRates[ndx] = samples[ndx].*predictor / timeInSeconds; 722 } 723 724 std::sort(processingRates.begin(), processingRates.end()); 725 726 stats.medianRate = linearSample(processingRates, 0.5f); 727 } 728 729 // results compared to the approximation 730 { 731 std::vector<float> timeDiffs(samples.size()); 732 733 for (int ndx = 0; ndx < (int)samples.size(); ++ndx) 734 { 735 const float prediction = samples[ndx].*predictor * fit.coefficient + fit.offset; 736 const float actual = (float)values[ndx]; 737 timeDiffs[ndx] = actual - prediction; 738 } 739 std::sort(timeDiffs.begin(), timeDiffs.end()); 740 741 stats.maxDiffTime = timeDiffs.back(); 742 stats.maxDiff9DecileTime = linearSample(timeDiffs, 0.9f); 743 stats.medianDiffTime = linearSample(timeDiffs, 0.5f); 744 } 745 746 // relative comparison to the approximation 747 { 748 std::vector<float> relativeDiffs(samples.size()); 749 750 for (int ndx = 0; ndx < (int)samples.size(); ++ndx) 751 { 752 const float prediction = samples[ndx].*predictor * fit.coefficient + fit.offset; 753 const float actual = (float)values[ndx]; 754 755 // Ignore cases where we predict negative times, or if 756 // ratio would be (nearly) infinite: ignore if predicted 757 // time is less than 1 microsecond 758 if (prediction < 1.0f) 759 relativeDiffs[ndx] = 0.0f; 760 else 761 relativeDiffs[ndx] = (actual - prediction) / prediction; 762 } 763 std::sort(relativeDiffs.begin(), relativeDiffs.end()); 764 765 stats.maxRelDiffTime = relativeDiffs.back(); 766 stats.max9DecileRelDiffTime = linearSample(relativeDiffs, 0.9f); 767 stats.medianRelDiffTime = linearSample(relativeDiffs, 0.5f); 768 } 769 770 // values calculated using sorted timings 771 772 std::sort(values.begin(), values.end()); 773 774 stats.result.minTime = (float)values.front(); 775 stats.result.maxTime = (float)values.back(); 776 stats.result.medianTime = linearSample(values, 0.5f); 777 stats.result.min2DecileTime = linearSample(values, 0.1f); 778 stats.result.max9DecileTime = linearSample(values, 0.9f); 779 } 780 781 template <typename StatisticsType, typename SampleType> 782 void calculateBasicTransferStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples) 783 { 784 calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize); 785 } 786 787 template <typename StatisticsType, typename SampleType> 788 void calculateBasicRenderStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples) 789 { 790 calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize); 791 } 792 793 static SingleCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples) 794 { 795 SingleCallStatistics stats; 796 797 calculateBasicTransferStatistics(stats, fit, samples); 798 799 return stats; 800 } 801 802 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples) 803 { 804 MapCallStatistics stats; 805 806 calculateBasicTransferStatistics(stats, fit, samples); 807 808 stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration); 809 stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration); 810 stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration); 811 stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration); 812 813 return stats; 814 } 815 816 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples) 817 { 818 MapFlushCallStatistics stats; 819 820 calculateBasicTransferStatistics(stats, fit, samples); 821 822 stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration); 823 stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration); 824 stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration); 825 stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration); 826 stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration); 827 828 return stats; 829 } 830 831 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples) 832 { 833 MapCallStatistics stats; 834 835 calculateBasicTransferStatistics(stats, fit, samples); 836 837 stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration); 838 stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration); 839 stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration); 840 841 return stats; 842 } 843 844 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples) 845 { 846 MapFlushCallStatistics stats; 847 848 calculateBasicTransferStatistics(stats, fit, samples); 849 850 stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration); 851 stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration); 852 stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration); 853 stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration); 854 855 return stats; 856 } 857 858 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderReadDuration> >& samples) 859 { 860 RenderReadStatistics stats; 861 862 calculateBasicRenderStatistics(stats, fit, samples); 863 864 stats.render = calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration); 865 stats.read = calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration); 866 stats.total = calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration); 867 868 return stats; 869 } 870 871 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples) 872 { 873 RenderReadStatistics stats; 874 875 calculateBasicRenderStatistics(stats, fit, samples); 876 877 stats.render = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration); 878 stats.read = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration); 879 stats.total = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration); 880 881 return stats; 882 } 883 884 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples) 885 { 886 UploadRenderReadStatistics stats; 887 888 calculateBasicRenderStatistics(stats, fit, samples); 889 890 stats.upload = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration); 891 stats.render = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration); 892 stats.read = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration); 893 stats.total = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration); 894 895 return stats; 896 } 897 898 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples) 899 { 900 UploadRenderReadStatistics stats; 901 902 calculateBasicRenderStatistics(stats, fit, samples); 903 904 stats.upload = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration); 905 stats.render = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration); 906 stats.read = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration); 907 stats.total = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration); 908 909 return stats; 910 } 911 912 static RenderUploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples) 913 { 914 RenderUploadRenderReadStatistics stats; 915 916 calculateBasicRenderStatistics(stats, fit, samples); 917 918 stats.firstRender = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration); 919 stats.upload = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration); 920 stats.secondRender = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration); 921 stats.read = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration); 922 stats.total = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration); 923 924 return stats; 925 } 926 927 template <typename DurationType> 928 static LineParametersWithConfidence fitLineToSamples (const std::vector<UploadSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration) 929 { 930 std::vector<tcu::Vec2> samplePoints; 931 932 for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step) 933 { 934 tcu::Vec2 point; 935 936 point.x() = (float)(samples[sampleNdx].writtenSize); 937 point.y() = (float)(samples[sampleNdx].duration.*target); 938 939 samplePoints.push_back(point); 940 } 941 942 return theilSenSiegelLinearRegression(samplePoints, 0.6f); 943 } 944 945 template <typename DurationType> 946 static LineParametersWithConfidence fitLineToSamples (const std::vector<RenderSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration) 947 { 948 std::vector<tcu::Vec2> samplePoints; 949 950 for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step) 951 { 952 tcu::Vec2 point; 953 954 point.x() = (float)(samples[sampleNdx].renderDataSize); 955 point.y() = (float)(samples[sampleNdx].duration.*target); 956 957 samplePoints.push_back(point); 958 } 959 960 return theilSenSiegelLinearRegression(samplePoints, 0.6f); 961 } 962 963 template <typename T> 964 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, int beginNdx, int endNdx, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration) 965 { 966 return fitLineToSamples(samples, beginNdx, endNdx, 1, target); 967 } 968 969 template <typename T> 970 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration) 971 { 972 return fitLineToSamples(samples, 0, (int)samples.size(), target); 973 } 974 975 static float getAreaBetweenLines (float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset, float lineBCoefficient) 976 { 977 const float lineAMin = lineAOffset + lineACoefficient * xmin; 978 const float lineAMax = lineAOffset + lineACoefficient * xmax; 979 const float lineBMin = lineBOffset + lineBCoefficient * xmin; 980 const float lineBMax = lineBOffset + lineBCoefficient * xmax; 981 const bool aOverBAtBegin = (lineAMin > lineBMin); 982 const bool aOverBAtEnd = (lineAMax > lineBMax); 983 984 if (aOverBAtBegin == aOverBAtEnd) 985 { 986 // lines do not intersect 987 988 const float midpoint = (xmin + xmax) / 2.0f; 989 const float width = (xmax - xmin); 990 991 const float lineAHeight = lineAOffset + lineACoefficient * midpoint; 992 const float lineBHeight = lineBOffset + lineBCoefficient * midpoint; 993 994 return width * de::abs(lineAHeight - lineBHeight); 995 } 996 else 997 { 998 999 // lines intersect 1000 1001 const float approachCoeffient = de::abs(lineACoefficient - lineBCoefficient); 1002 const float epsilon = 0.0001f; 1003 const float leftHeight = de::abs(lineAMin - lineBMin); 1004 const float rightHeight = de::abs(lineAMax - lineBMax); 1005 1006 if (approachCoeffient < epsilon) 1007 return 0.0f; 1008 1009 return (0.5f * leftHeight * (leftHeight / approachCoeffient)) + (0.5f * rightHeight * (rightHeight / approachCoeffient)); 1010 } 1011 } 1012 1013 template <typename T> 1014 static float calculateSampleFitLinearity (const std::vector<T>& samples, int T::*predictor) 1015 { 1016 // Compare the fitted line of first half of the samples to the fitted line of 1017 // the second half of the samples. Calculate a AABB that fully contains every 1018 // sample's x component and both fit lines in this range. Calculate the ratio 1019 // of the area between the lines and the AABB. 1020 1021 const float epsilon = 1.e-6f; 1022 const int midPoint = (int)samples.size() / 2; 1023 const LineParametersWithConfidence startApproximation = fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration); 1024 const LineParametersWithConfidence endApproximation = fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration); 1025 1026 const float aabbMinX = (float)(samples.front().*predictor); 1027 const float aabbMinY = de::min(startApproximation.offset + startApproximation.coefficient*aabbMinX, endApproximation.offset + endApproximation.coefficient*aabbMinX); 1028 const float aabbMaxX = (float)(samples.back().*predictor); 1029 const float aabbMaxY = de::max(startApproximation.offset + startApproximation.coefficient*aabbMaxX, endApproximation.offset + endApproximation.coefficient*aabbMaxX); 1030 1031 const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY); 1032 const float areaBetweenLines = getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient, endApproximation.offset, endApproximation.coefficient); 1033 const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea); 1034 1035 return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f); 1036 } 1037 1038 template <typename DurationType> 1039 static float calculateSampleFitLinearity (const std::vector<UploadSampleResult<DurationType> >& samples) 1040 { 1041 return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize); 1042 } 1043 1044 template <typename DurationType> 1045 static float calculateSampleFitLinearity (const std::vector<RenderSampleResult<DurationType> >& samples) 1046 { 1047 return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize); 1048 } 1049 1050 template <typename T> 1051 static float calculateSampleTemporalStability (const std::vector<T>& samples, int T::*predictor) 1052 { 1053 // Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order) 1054 // Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully 1055 // contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between 1056 // the lines and the AABB. 1057 1058 const float epsilon = 1.e-6f; 1059 const LineParametersWithConfidence evenApproximation = fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration); 1060 const LineParametersWithConfidence oddApproximation = fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration); 1061 1062 const float aabbMinX = (float)(samples.front().*predictor); 1063 const float aabbMinY = de::min(evenApproximation.offset + evenApproximation.coefficient*aabbMinX, oddApproximation.offset + oddApproximation.coefficient*aabbMinX); 1064 const float aabbMaxX = (float)(samples.back().*predictor); 1065 const float aabbMaxY = de::max(evenApproximation.offset + evenApproximation.coefficient*aabbMaxX, oddApproximation.offset + oddApproximation.coefficient*aabbMaxX); 1066 1067 const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY); 1068 const float areaBetweenLines = getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient, oddApproximation.offset, oddApproximation.coefficient); 1069 const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea); 1070 1071 return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f); 1072 } 1073 1074 template <typename DurationType> 1075 static float calculateSampleTemporalStability (const std::vector<UploadSampleResult<DurationType> >& samples) 1076 { 1077 return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize); 1078 } 1079 1080 template <typename DurationType> 1081 static float calculateSampleTemporalStability (const std::vector<RenderSampleResult<DurationType> >& samples) 1082 { 1083 return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize); 1084 } 1085 1086 template <typename DurationType> 1087 static void bucketizeSamplesUniformly (const std::vector<UploadSampleResult<DurationType> >& samples, std::vector<UploadSampleResult<DurationType> >* buckets, int numBuckets, int& minBufferSize, int& maxBufferSize) 1088 { 1089 minBufferSize = 0; 1090 maxBufferSize = 0; 1091 1092 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1093 { 1094 DE_ASSERT(samples[sampleNdx].allocatedSize != 0); 1095 1096 if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize) 1097 minBufferSize = samples[sampleNdx].allocatedSize; 1098 if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize) 1099 maxBufferSize = samples[sampleNdx].allocatedSize; 1100 } 1101 1102 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1103 { 1104 const float bucketNdxFloat = (samples[sampleNdx].allocatedSize - minBufferSize) / (float)(maxBufferSize - minBufferSize) * numBuckets; 1105 const int bucketNdx = de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets-1); 1106 1107 buckets[bucketNdx].push_back(samples[sampleNdx]); 1108 } 1109 } 1110 1111 template <typename SampleType> 1112 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1113 { 1114 log << tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime) 1115 << tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime) 1116 << tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.map.min2DecileTime) 1117 << tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.map.max9DecileTime) 1118 << tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime); 1119 } 1120 1121 template <typename SampleType> 1122 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1123 { 1124 log << tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime) 1125 << tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime) 1126 << tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime) 1127 << tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime) 1128 << tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime); 1129 } 1130 1131 template <typename SampleType> 1132 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1133 { 1134 log << tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime) 1135 << tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime) 1136 << tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime) 1137 << tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime) 1138 << tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime); 1139 } 1140 1141 template <typename SampleType> 1142 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1143 { 1144 log << tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime) 1145 << tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime) 1146 << tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime) 1147 << tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime) 1148 << tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime); 1149 } 1150 1151 template <typename SampleType> 1152 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1153 { 1154 log << tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime) 1155 << tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime) 1156 << tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime) 1157 << tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime) 1158 << tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime); 1159 } 1160 1161 template <typename SampleType> 1162 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1163 { 1164 DE_UNREF(log); 1165 DE_UNREF(stats); 1166 } 1167 1168 template <typename SampleType> 1169 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1170 { 1171 DE_UNREF(log); 1172 DE_UNREF(stats); 1173 } 1174 1175 template <typename SampleType> 1176 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1177 { 1178 DE_UNREF(log); 1179 DE_UNREF(stats); 1180 } 1181 1182 template <typename SampleType> 1183 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1184 { 1185 DE_UNREF(log); 1186 DE_UNREF(stats); 1187 } 1188 1189 template <typename SampleType> 1190 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1191 { 1192 DE_UNREF(log); 1193 DE_UNREF(stats); 1194 } 1195 1196 template <typename SampleType> 1197 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1198 { 1199 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration); 1200 log << tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1201 << tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1202 << tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime); 1203 } 1204 1205 template <typename SampleType> 1206 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1207 { 1208 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration); 1209 log << tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1210 << tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1211 << tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime); 1212 } 1213 1214 template <typename SampleType> 1215 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1216 { 1217 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration); 1218 log << tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1219 << tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1220 << tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime); 1221 } 1222 1223 template <typename SampleType> 1224 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1225 { 1226 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration); 1227 log << tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1228 << tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1229 << tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime); 1230 } 1231 1232 template <typename SampleType> 1233 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1234 { 1235 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration); 1236 log << tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1237 << tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1238 << tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime); 1239 } 1240 1241 template <typename SampleType> 1242 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1243 { 1244 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration); 1245 log << tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1246 << tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1247 << tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.render.medianTime); 1248 } 1249 1250 template <typename SampleType> 1251 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1252 { 1253 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration); 1254 log << tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1255 << tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1256 << tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime); 1257 } 1258 1259 template <typename SampleType> 1260 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1261 { 1262 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration); 1263 log << tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1264 << tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1265 << tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME, stats.upload.medianTime); 1266 } 1267 1268 template <typename SampleType> 1269 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1270 { 1271 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration); 1272 log << tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1273 << tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1274 << tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime); 1275 } 1276 1277 template <typename SampleType> 1278 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1279 { 1280 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::firstRenderDuration); 1281 log << tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1282 << tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1283 << tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.firstRender.medianTime); 1284 } 1285 1286 template <typename SampleType> 1287 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1288 { 1289 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::secondRenderDuration); 1290 log << tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) 1291 << tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) 1292 << tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.secondRender.medianTime); 1293 } 1294 1295 template <typename SampleType> 1296 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1297 { 1298 DE_UNREF(log); 1299 DE_UNREF(samples); 1300 DE_UNREF(stats); 1301 } 1302 1303 template <typename SampleType> 1304 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1305 { 1306 DE_UNREF(log); 1307 DE_UNREF(samples); 1308 DE_UNREF(stats); 1309 } 1310 1311 template <typename SampleType> 1312 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1313 { 1314 DE_UNREF(log); 1315 DE_UNREF(samples); 1316 DE_UNREF(stats); 1317 } 1318 1319 template <typename SampleType> 1320 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1321 { 1322 DE_UNREF(log); 1323 DE_UNREF(samples); 1324 DE_UNREF(stats); 1325 } 1326 1327 template <typename SampleType> 1328 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1329 { 1330 DE_UNREF(log); 1331 DE_UNREF(samples); 1332 DE_UNREF(stats); 1333 } 1334 1335 template <typename SampleType> 1336 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1337 { 1338 DE_UNREF(log); 1339 DE_UNREF(samples); 1340 DE_UNREF(stats); 1341 } 1342 1343 template <typename SampleType> 1344 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1345 { 1346 DE_UNREF(log); 1347 DE_UNREF(samples); 1348 DE_UNREF(stats); 1349 } 1350 1351 template <typename SampleType> 1352 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1353 { 1354 DE_UNREF(log); 1355 DE_UNREF(samples); 1356 DE_UNREF(stats); 1357 } 1358 1359 template <typename SampleType> 1360 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1361 { 1362 DE_UNREF(log); 1363 DE_UNREF(samples); 1364 DE_UNREF(stats); 1365 } 1366 1367 template <typename SampleType> 1368 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1369 { 1370 DE_UNREF(log); 1371 DE_UNREF(samples); 1372 DE_UNREF(stats); 1373 } 1374 1375 template <typename SampleType> 1376 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats) 1377 { 1378 DE_UNREF(log); 1379 DE_UNREF(samples); 1380 DE_UNREF(stats); 1381 } 1382 1383 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples) 1384 { 1385 log << tcu::TestLog::SampleList("Samples", "Samples") 1386 << tcu::TestLog::SampleInfo 1387 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1388 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1389 << tcu::TestLog::ValueInfo("UploadTime", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1390 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1391 << tcu::TestLog::EndSampleInfo; 1392 1393 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1394 { 1395 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize); 1396 log << tcu::TestLog::Sample 1397 << samples[sampleNdx].writtenSize 1398 << samples[sampleNdx].bufferSize 1399 << (int)samples[sampleNdx].duration.totalDuration 1400 << fitResidual 1401 << tcu::TestLog::EndSample; 1402 } 1403 1404 log << tcu::TestLog::EndSampleList; 1405 } 1406 1407 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples) 1408 { 1409 log << tcu::TestLog::SampleList("Samples", "Samples") 1410 << tcu::TestLog::SampleInfo 1411 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1412 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1413 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1414 << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1415 << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1416 << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1417 << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1418 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1419 << tcu::TestLog::EndSampleInfo; 1420 1421 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1422 { 1423 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize); 1424 log << tcu::TestLog::Sample 1425 << samples[sampleNdx].writtenSize 1426 << samples[sampleNdx].bufferSize 1427 << (int)samples[sampleNdx].duration.totalDuration 1428 << (int)samples[sampleNdx].duration.allocDuration 1429 << (int)samples[sampleNdx].duration.mapDuration 1430 << (int)samples[sampleNdx].duration.unmapDuration 1431 << (int)samples[sampleNdx].duration.writeDuration 1432 << fitResidual 1433 << tcu::TestLog::EndSample; 1434 } 1435 1436 log << tcu::TestLog::EndSampleList; 1437 } 1438 1439 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples) 1440 { 1441 log << tcu::TestLog::SampleList("Samples", "Samples") 1442 << tcu::TestLog::SampleInfo 1443 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1444 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1445 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1446 << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1447 << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1448 << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1449 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1450 << tcu::TestLog::EndSampleInfo; 1451 1452 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1453 { 1454 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize); 1455 log << tcu::TestLog::Sample 1456 << samples[sampleNdx].writtenSize 1457 << samples[sampleNdx].bufferSize 1458 << (int)samples[sampleNdx].duration.totalDuration 1459 << (int)samples[sampleNdx].duration.mapDuration 1460 << (int)samples[sampleNdx].duration.unmapDuration 1461 << (int)samples[sampleNdx].duration.writeDuration 1462 << fitResidual 1463 << tcu::TestLog::EndSample; 1464 } 1465 1466 log << tcu::TestLog::EndSampleList; 1467 } 1468 1469 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples) 1470 { 1471 log << tcu::TestLog::SampleList("Samples", "Samples") 1472 << tcu::TestLog::SampleInfo 1473 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1474 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1475 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1476 << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1477 << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1478 << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1479 << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1480 << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1481 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1482 << tcu::TestLog::EndSampleInfo; 1483 1484 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1485 { 1486 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize); 1487 log << tcu::TestLog::Sample 1488 << samples[sampleNdx].writtenSize 1489 << samples[sampleNdx].bufferSize 1490 << (int)samples[sampleNdx].duration.totalDuration 1491 << (int)samples[sampleNdx].duration.allocDuration 1492 << (int)samples[sampleNdx].duration.mapDuration 1493 << (int)samples[sampleNdx].duration.unmapDuration 1494 << (int)samples[sampleNdx].duration.writeDuration 1495 << (int)samples[sampleNdx].duration.flushDuration 1496 << fitResidual 1497 << tcu::TestLog::EndSample; 1498 } 1499 1500 log << tcu::TestLog::EndSampleList; 1501 } 1502 1503 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples) 1504 { 1505 log << tcu::TestLog::SampleList("Samples", "Samples") 1506 << tcu::TestLog::SampleInfo 1507 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1508 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1509 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1510 << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1511 << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1512 << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1513 << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1514 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1515 << tcu::TestLog::EndSampleInfo; 1516 1517 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1518 { 1519 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize); 1520 log << tcu::TestLog::Sample 1521 << samples[sampleNdx].writtenSize 1522 << samples[sampleNdx].bufferSize 1523 << (int)samples[sampleNdx].duration.totalDuration 1524 << (int)samples[sampleNdx].duration.mapDuration 1525 << (int)samples[sampleNdx].duration.unmapDuration 1526 << (int)samples[sampleNdx].duration.writeDuration 1527 << (int)samples[sampleNdx].duration.flushDuration 1528 << fitResidual 1529 << tcu::TestLog::EndSample; 1530 } 1531 1532 log << tcu::TestLog::EndSampleList; 1533 } 1534 1535 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderReadDuration> >& samples) 1536 { 1537 log << tcu::TestLog::SampleList("Samples", "Samples") 1538 << tcu::TestLog::SampleInfo 1539 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1540 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1541 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1542 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1543 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1544 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1545 << tcu::TestLog::EndSampleInfo; 1546 1547 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1548 { 1549 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize); 1550 log << tcu::TestLog::Sample 1551 << samples[sampleNdx].renderDataSize 1552 << samples[sampleNdx].numVertices 1553 << (int)samples[sampleNdx].duration.renderReadDuration 1554 << (int)samples[sampleNdx].duration.renderDuration 1555 << (int)samples[sampleNdx].duration.readDuration 1556 << fitResidual 1557 << tcu::TestLog::EndSample; 1558 } 1559 1560 log << tcu::TestLog::EndSampleList; 1561 } 1562 1563 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples) 1564 { 1565 log << tcu::TestLog::SampleList("Samples", "Samples") 1566 << tcu::TestLog::SampleInfo 1567 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1568 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1569 << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1570 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1571 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1572 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1573 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1574 << tcu::TestLog::EndSampleInfo; 1575 1576 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1577 { 1578 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize); 1579 log << tcu::TestLog::Sample 1580 << samples[sampleNdx].renderDataSize 1581 << samples[sampleNdx].numVertices 1582 << samples[sampleNdx].unrelatedDataSize 1583 << (int)samples[sampleNdx].duration.renderReadDuration 1584 << (int)samples[sampleNdx].duration.renderDuration 1585 << (int)samples[sampleNdx].duration.readDuration 1586 << fitResidual 1587 << tcu::TestLog::EndSample; 1588 } 1589 1590 log << tcu::TestLog::EndSampleList; 1591 } 1592 1593 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples) 1594 { 1595 log << tcu::TestLog::SampleList("Samples", "Samples") 1596 << tcu::TestLog::SampleInfo 1597 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1598 << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1599 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1600 << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1601 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1602 << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1603 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1604 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1605 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1606 << tcu::TestLog::EndSampleInfo; 1607 1608 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1609 { 1610 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize); 1611 log << tcu::TestLog::Sample 1612 << samples[sampleNdx].renderDataSize 1613 << samples[sampleNdx].uploadedDataSize 1614 << samples[sampleNdx].numVertices 1615 << (int)samples[sampleNdx].duration.renderReadDuration 1616 << (int)samples[sampleNdx].duration.totalDuration 1617 << (int)samples[sampleNdx].duration.uploadDuration 1618 << (int)samples[sampleNdx].duration.renderDuration 1619 << (int)samples[sampleNdx].duration.readDuration 1620 << fitResidual 1621 << tcu::TestLog::EndSample; 1622 } 1623 1624 log << tcu::TestLog::EndSampleList; 1625 } 1626 1627 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples) 1628 { 1629 log << tcu::TestLog::SampleList("Samples", "Samples") 1630 << tcu::TestLog::SampleInfo 1631 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1632 << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1633 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1634 << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1635 << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1636 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1637 << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1638 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1639 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1640 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1641 << tcu::TestLog::EndSampleInfo; 1642 1643 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1644 { 1645 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize); 1646 log << tcu::TestLog::Sample 1647 << samples[sampleNdx].renderDataSize 1648 << samples[sampleNdx].uploadedDataSize 1649 << samples[sampleNdx].numVertices 1650 << samples[sampleNdx].unrelatedDataSize 1651 << (int)samples[sampleNdx].duration.renderReadDuration 1652 << (int)samples[sampleNdx].duration.totalDuration 1653 << (int)samples[sampleNdx].duration.uploadDuration 1654 << (int)samples[sampleNdx].duration.renderDuration 1655 << (int)samples[sampleNdx].duration.readDuration 1656 << fitResidual 1657 << tcu::TestLog::EndSample; 1658 } 1659 1660 log << tcu::TestLog::EndSampleList; 1661 } 1662 1663 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples) 1664 { 1665 log << tcu::TestLog::SampleList("Samples", "Samples") 1666 << tcu::TestLog::SampleInfo 1667 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1668 << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1669 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) 1670 << tcu::TestLog::ValueInfo("DrawReadTime", "Second draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1671 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1672 << tcu::TestLog::ValueInfo("FirstDrawCallTime", "First draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1673 << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1674 << tcu::TestLog::ValueInfo("SecondDrawCallTime", "Second draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1675 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1676 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 1677 << tcu::TestLog::EndSampleInfo; 1678 1679 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) 1680 { 1681 const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize); 1682 log << tcu::TestLog::Sample 1683 << samples[sampleNdx].renderDataSize 1684 << samples[sampleNdx].uploadedDataSize 1685 << samples[sampleNdx].numVertices 1686 << (int)samples[sampleNdx].duration.renderReadDuration 1687 << (int)samples[sampleNdx].duration.totalDuration 1688 << (int)samples[sampleNdx].duration.firstRenderDuration 1689 << (int)samples[sampleNdx].duration.uploadDuration 1690 << (int)samples[sampleNdx].duration.secondRenderDuration 1691 << (int)samples[sampleNdx].duration.readDuration 1692 << fitResidual 1693 << tcu::TestLog::EndSample; 1694 } 1695 1696 log << tcu::TestLog::EndSampleList; 1697 } 1698 1699 template <typename SampleType> 1700 static UploadSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, bool logBucketPerformance) 1701 { 1702 // Assume data is linear with some outliers, fit a line 1703 const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples); 1704 const typename SampleTypeTraits<SampleType>::StatsType resultStats = calculateSampleStatistics(theilSenFitting, samples); 1705 float approximatedTransferRate; 1706 float approximatedTransferRateNoConstant; 1707 1708 // Output raw samples 1709 { 1710 const tcu::ScopedLogSection section(log, "Samples", "Samples"); 1711 logSampleList(log, theilSenFitting, samples); 1712 } 1713 1714 // Calculate results for different ranges 1715 if (logBucketPerformance) 1716 { 1717 const int numBuckets = 4; 1718 int minBufferSize = 0; 1719 int maxBufferSize = 0; 1720 std::vector<UploadSampleResult<SampleType> > buckets[numBuckets]; 1721 1722 bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize); 1723 1724 for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx) 1725 { 1726 if (buckets[bucketNdx].empty()) 1727 continue; 1728 1729 // Print a nice result summary 1730 1731 const int bucketRangeMin = minBufferSize + (int)(( bucketNdx / (float)numBuckets) * (maxBufferSize - minBufferSize)); 1732 const int bucketRangeMax = minBufferSize + (int)(((bucketNdx+1) / (float)numBuckets) * (maxBufferSize - minBufferSize)); 1733 const typename SampleTypeTraits<SampleType>::StatsType stats = calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]); 1734 const tcu::ScopedLogSection section (log, "BufferSizeRange", std::string("Transfer performance with buffer size in range [").append(getHumanReadableByteSize(bucketRangeMin).append(", ").append(getHumanReadableByteSize(bucketRangeMax).append("]")))); 1735 1736 logMapRangeStats<SampleType>(log, stats); 1737 logUnmapStats<SampleType>(log, stats); 1738 logWriteStats<SampleType>(log, stats); 1739 logFlushStats<SampleType>(log, stats); 1740 logAllocStats<SampleType>(log, stats); 1741 1742 log << tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime) 1743 << tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime) 1744 << tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.result.min2DecileTime) 1745 << tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.result.max9DecileTime) 1746 << tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime) 1747 << tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, stats.medianRate / 1024.0f / 1024.0f) 1748 << tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiffTime) 1749 << tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiff9DecileTime) 1750 << tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME, stats.medianDiffTime) 1751 << tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.maxRelDiffTime * 100.0f) 1752 << tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f) 1753 << tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f); 1754 } 1755 } 1756 1757 // Contributions 1758 if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS) 1759 { 1760 const tcu::ScopedLogSection section(log, "Contribution", "Contributions"); 1761 1762 logMapContribution(log, samples, resultStats); 1763 logUnmapContribution(log, samples, resultStats); 1764 logWriteContribution(log, samples, resultStats); 1765 logFlushContribution(log, samples, resultStats); 1766 logAllocContribution(log, samples, resultStats); 1767 } 1768 1769 // Print results 1770 { 1771 const tcu::ScopedLogSection section(log, "Results", "Results"); 1772 1773 const int medianBufferSize = (samples.front().bufferSize + samples.back().bufferSize) / 2; 1774 const float approximatedTransferTime = (theilSenFitting.offset + theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f; 1775 const float approximatedTransferTimeNoConstant = (theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f; 1776 const float sampleLinearity = calculateSampleFitLinearity(samples); 1777 const float sampleTemporalStability = calculateSampleTemporalStability(samples); 1778 1779 approximatedTransferRateNoConstant = medianBufferSize / approximatedTransferTimeNoConstant; 1780 approximatedTransferRate = medianBufferSize / approximatedTransferTime; 1781 1782 log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f) 1783 << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f) 1784 << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset) 1785 << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower) 1786 << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper) 1787 << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f) 1788 << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f) 1789 << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f) 1790 << tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f) 1791 << tcu::TestLog::Float("ApproximatedTransferRateNoConstant", "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRateNoConstant / 1024.0f / 1024.0f) 1792 << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime) 1793 << tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f); 1794 } 1795 1796 // return approximated transfer rate 1797 { 1798 UploadSampleAnalyzeResult result; 1799 1800 result.transferRateMedian = resultStats.medianRate; 1801 result.transferRateAtRange = approximatedTransferRate; 1802 result.transferRateAtInfinity = approximatedTransferRateNoConstant; 1803 1804 return result; 1805 } 1806 } 1807 1808 template <typename SampleType> 1809 static RenderSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples) 1810 { 1811 // Assume data is linear with some outliers, fit a line 1812 const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples); 1813 const typename SampleTypeTraits<SampleType>::StatsType resultStats = calculateSampleStatistics(theilSenFitting, samples); 1814 float approximatedProcessingRate; 1815 float approximatedProcessingRateNoConstant; 1816 1817 // output raw samples 1818 { 1819 const tcu::ScopedLogSection section(log, "Samples", "Samples"); 1820 logSampleList(log, theilSenFitting, samples); 1821 } 1822 1823 // Contributions 1824 if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS) 1825 { 1826 const tcu::ScopedLogSection section(log, "Contribution", "Contributions"); 1827 1828 logFirstRenderContribution(log, samples, resultStats); 1829 logUploadContribution(log, samples, resultStats); 1830 logRenderContribution(log, samples, resultStats); 1831 logSecondRenderContribution(log, samples, resultStats); 1832 logReadContribution(log, samples, resultStats); 1833 logTotalContribution(log, samples, resultStats); 1834 } 1835 1836 // print results 1837 { 1838 const tcu::ScopedLogSection section(log, "Results", "Results"); 1839 1840 const int medianDataSize = (samples.front().renderDataSize + samples.back().renderDataSize) / 2; 1841 const float approximatedRenderTime = (theilSenFitting.offset + theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f; 1842 const float approximatedRenderTimeNoConstant = (theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f; 1843 const float sampleLinearity = calculateSampleFitLinearity(samples); 1844 const float sampleTemporalStability = calculateSampleTemporalStability(samples); 1845 1846 approximatedProcessingRateNoConstant = medianDataSize / approximatedRenderTimeNoConstant; 1847 approximatedProcessingRate = medianDataSize / approximatedRenderTime; 1848 1849 log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f) 1850 << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f) 1851 << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset) 1852 << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower) 1853 << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper) 1854 << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f) 1855 << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f) 1856 << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f) 1857 << tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f) 1858 << tcu::TestLog::Float("ApproximatedProcessRateNoConstant", "Approximated processing rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f) 1859 << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime) 1860 << tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f); 1861 } 1862 1863 // return approximated render rate 1864 { 1865 RenderSampleAnalyzeResult result; 1866 1867 result.renderRateMedian = resultStats.medianRate; 1868 result.renderRateAtRange = approximatedProcessingRate; 1869 result.renderRateAtInfinity = approximatedProcessingRateNoConstant; 1870 1871 return result; 1872 } 1873 return RenderSampleAnalyzeResult(); 1874 } 1875 1876 static void generateTwoPassRandomIterationOrder (std::vector<int>& iterationOrder, int numSamples) 1877 { 1878 de::Random rnd (0xabc); 1879 const int midPoint = (numSamples+1) / 2; // !< ceil(m_numSamples / 2) 1880 1881 DE_ASSERT((int)iterationOrder.size() == numSamples); 1882 1883 // Two "passes" over range, randomize order in both passes 1884 // This allows to us detect if iterations are not independent 1885 // (first run and later run samples differ significantly?) 1886 1887 for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx) 1888 iterationOrder[sampleNdx] = sampleNdx * 2; 1889 for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx) 1890 iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1; 1891 1892 for (int ndx = 0; ndx < midPoint; ++ndx) 1893 std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]); 1894 for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx) 1895 std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size()-1)]); 1896 } 1897 1898 template <typename SampleType> 1899 class BasicBufferCase : public TestCase 1900 { 1901 public: 1902 1903 enum Flags 1904 { 1905 FLAG_ALLOCATE_LARGER_BUFFER = 0x01, 1906 }; 1907 BasicBufferCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags); 1908 ~BasicBufferCase (void); 1909 1910 virtual void init (void); 1911 virtual void deinit (void); 1912 1913 protected: 1914 IterateResult iterate (void); 1915 1916 virtual bool runSample (int iteration, UploadSampleResult<SampleType>& sample) = 0; 1917 virtual void logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results) = 0; 1918 1919 void disableGLWarmup (void); 1920 void waitGLResults (void); 1921 1922 enum 1923 { 1924 DUMMY_RENDER_AREA_SIZE = 32 1925 }; 1926 1927 glu::ShaderProgram* m_dummyProgram; 1928 deInt32 m_dummyProgramPosLoc; 1929 deUint32 m_bufferID; 1930 1931 const int m_numSamples; 1932 const int m_bufferSizeMin; 1933 const int m_bufferSizeMax; 1934 const bool m_allocateLargerBuffer; 1935 1936 private: 1937 int m_iteration; 1938 std::vector<int> m_iterationOrder; 1939 std::vector<UploadSampleResult<SampleType> > m_results; 1940 1941 bool m_useGL; 1942 int m_bufferRandomizerTimer; 1943 }; 1944 1945 template <typename SampleType> 1946 BasicBufferCase<SampleType>::BasicBufferCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags) 1947 : TestCase (context, tcu::NODETYPE_PERFORMANCE, name, desc) 1948 , m_dummyProgram (DE_NULL) 1949 , m_dummyProgramPosLoc (-1) 1950 , m_bufferID (0) 1951 , m_numSamples (numSamples) 1952 , m_bufferSizeMin (bufferSizeMin) 1953 , m_bufferSizeMax (bufferSizeMax) 1954 , m_allocateLargerBuffer ((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0) 1955 , m_iteration (0) 1956 , m_iterationOrder (numSamples) 1957 , m_results (numSamples) 1958 , m_useGL (true) 1959 , m_bufferRandomizerTimer (0) 1960 { 1961 // "randomize" iteration order. Deterministic, patternless 1962 generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples); 1963 1964 // choose buffer sizes 1965 for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx) 1966 { 1967 const int rawBufferSize = (int)deFloatFloor(bufferSizeMin + (bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / m_numSamples)); 1968 const int bufferSize = deAlign32(rawBufferSize, 16); 1969 const int allocatedBufferSize = deAlign32((m_allocateLargerBuffer) ? ((int)(bufferSize * 1.5f)) : (bufferSize), 16); 1970 1971 m_results[sampleNdx].bufferSize = bufferSize; 1972 m_results[sampleNdx].allocatedSize = allocatedBufferSize; 1973 m_results[sampleNdx].writtenSize = -1; 1974 } 1975 } 1976 1977 template <typename SampleType> 1978 BasicBufferCase<SampleType>::~BasicBufferCase (void) 1979 { 1980 deinit(); 1981 } 1982 1983 template <typename SampleType> 1984 void BasicBufferCase<SampleType>::init (void) 1985 { 1986 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1987 1988 if (!m_useGL) 1989 return; 1990 1991 // \note Viewport size is not checked, it won't matter if the render target actually is smaller hhan DUMMY_RENDER_AREA_SIZE 1992 1993 // dummy shader 1994 1995 m_dummyProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_dummyVertexShader) << glu::FragmentSource(s_dummyFragnentShader)); 1996 if (!m_dummyProgram->isOk()) 1997 { 1998 m_testCtx.getLog() << *m_dummyProgram; 1999 throw tcu::TestError("failed to build shader program"); 2000 } 2001 2002 m_dummyProgramPosLoc = gl.getAttribLocation(m_dummyProgram->getProgram(), "a_position"); 2003 if (m_dummyProgramPosLoc == -1) 2004 throw tcu::TestError("a_position location was -1"); 2005 } 2006 2007 template <typename SampleType> 2008 void BasicBufferCase<SampleType>::deinit (void) 2009 { 2010 if (m_bufferID) 2011 { 2012 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID); 2013 m_bufferID = 0; 2014 } 2015 2016 delete m_dummyProgram; 2017 m_dummyProgram = DE_NULL; 2018 } 2019 2020 template <typename SampleType> 2021 TestCase::IterateResult BasicBufferCase<SampleType>::iterate (void) 2022 { 2023 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2024 static bool buffersWarmedUp = false; 2025 2026 static const deUint32 usages[] = 2027 { 2028 GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY, 2029 GL_STATIC_DRAW, GL_STATIC_READ, GL_STATIC_COPY, 2030 GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY, 2031 }; 2032 2033 // Allocate some random sized buffers and remove them to 2034 // make sure the first samples too have some buffers removed 2035 // just before their allocation. This is only needed by the 2036 // the first test. 2037 2038 if (m_useGL && !buffersWarmedUp) 2039 { 2040 const int numRandomBuffers = 6; 2041 const int numRepeats = 10; 2042 const int maxBufferSize = 16777216; 2043 const std::vector<deUint8> zeroData (maxBufferSize, 0x00); 2044 de::Random rnd (0x1234); 2045 deUint32 bufferIDs[numRandomBuffers] = {0}; 2046 2047 gl.useProgram(m_dummyProgram->getProgram()); 2048 gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE); 2049 gl.enableVertexAttribArray(m_dummyProgramPosLoc); 2050 2051 for (int ndx = 0; ndx < numRepeats; ++ndx) 2052 { 2053 // Create buffer and maybe draw from it 2054 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) 2055 { 2056 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4*4); 2057 const deUint32 usage = usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)]; 2058 2059 gl.genBuffers(1, &bufferIDs[randomBufferNdx]); 2060 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]); 2061 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage); 2062 2063 if (rnd.getBool()) 2064 { 2065 gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); 2066 gl.drawArrays(GL_POINTS, 0, 1); 2067 gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1); 2068 } 2069 } 2070 2071 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) 2072 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]); 2073 2074 waitGLResults(); 2075 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen"); 2076 2077 m_testCtx.touchWatchdog(); 2078 } 2079 2080 buffersWarmedUp = true; 2081 return CONTINUE; 2082 } 2083 else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0) 2084 { 2085 // Do some random buffer operations to every now and then 2086 // to make sure the previous test iterations won't affect 2087 // following test runs. 2088 2089 const int numRandomBuffers = 3; 2090 const int maxBufferSize = 16777216; 2091 const std::vector<deUint8> zeroData (maxBufferSize, 0x00); 2092 de::Random rnd (0x1234 + 0xabc * m_bufferRandomizerTimer); 2093 2094 // BufferData 2095 { 2096 deUint32 bufferIDs[numRandomBuffers] = {0}; 2097 2098 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) 2099 { 2100 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4*4); 2101 const deUint32 usage = usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)]; 2102 2103 gl.genBuffers(1, &bufferIDs[randomBufferNdx]); 2104 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]); 2105 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage); 2106 } 2107 2108 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) 2109 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]); 2110 } 2111 2112 GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops"); 2113 2114 // Do some memory mappings 2115 { 2116 deUint32 bufferIDs[numRandomBuffers] = {0}; 2117 2118 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) 2119 { 2120 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4*4); 2121 const deUint32 usage = usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)]; 2122 void* ptr; 2123 2124 gl.genBuffers(1, &bufferIDs[randomBufferNdx]); 2125 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]); 2126 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage); 2127 2128 gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); 2129 gl.drawArrays(GL_POINTS, 0, 1); 2130 gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1); 2131 2132 if (rnd.getBool()) 2133 waitGLResults(); 2134 2135 ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT); 2136 if (ptr) 2137 { 2138 medianTimeMemcpy(ptr, &zeroData[0], randomSize); 2139 gl.unmapBuffer(GL_ARRAY_BUFFER); 2140 } 2141 } 2142 2143 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) 2144 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]); 2145 2146 waitGLResults(); 2147 } 2148 2149 GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps"); 2150 return CONTINUE; 2151 } 2152 else 2153 { 2154 const int currentIteration = m_iteration; 2155 const int sampleNdx = m_iterationOrder[currentIteration]; 2156 const bool sampleRunSuccessful = runSample(currentIteration, m_results[sampleNdx]); 2157 2158 GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()"); 2159 2160 // Retry failed samples 2161 if (!sampleRunSuccessful) 2162 return CONTINUE; 2163 2164 if (++m_iteration >= m_numSamples) 2165 { 2166 logAndSetTestResult(m_results); 2167 return STOP; 2168 } 2169 else 2170 return CONTINUE; 2171 } 2172 } 2173 2174 template <typename SampleType> 2175 void BasicBufferCase<SampleType>::disableGLWarmup (void) 2176 { 2177 m_useGL = false; 2178 } 2179 2180 template <typename SampleType> 2181 void BasicBufferCase<SampleType>::waitGLResults (void) 2182 { 2183 tcu::Surface dummySurface(DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE); 2184 glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess()); 2185 } 2186 2187 template <typename SampleType> 2188 class BasicUploadCase : public BasicBufferCase<SampleType> 2189 { 2190 public: 2191 enum CaseType 2192 { 2193 CASE_NO_BUFFERS = 0, 2194 CASE_NEW_BUFFER, 2195 CASE_UNSPECIFIED_BUFFER, 2196 CASE_SPECIFIED_BUFFER, 2197 CASE_USED_BUFFER, 2198 CASE_USED_LARGER_BUFFER, 2199 2200 CASE_LAST 2201 }; 2202 2203 enum CaseFlags 2204 { 2205 FLAG_DONT_LOG_BUFFER_INFO = 0x01, 2206 FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT = 0x02, 2207 }; 2208 2209 enum ResultType 2210 { 2211 RESULT_MEDIAN_TRANSFER_RATE = 0, 2212 RESULT_ASYMPTOTIC_TRANSFER_RATE, 2213 }; 2214 2215 BasicUploadCase (Context& context, 2216 const char* name, 2217 const char* desc, 2218 int bufferSizeMin, 2219 int bufferSizeMax, 2220 int numSamples, 2221 deUint32 bufferUsage, 2222 CaseType caseType, 2223 ResultType resultType, 2224 int flags = 0); 2225 2226 ~BasicUploadCase (void); 2227 2228 virtual void init (void); 2229 virtual void deinit (void); 2230 2231 private: 2232 bool runSample (int iteration, UploadSampleResult<SampleType>& sample); 2233 void createBuffer (int bufferSize, int iteration); 2234 void deleteBuffer (int bufferSize); 2235 void useBuffer (int bufferSize); 2236 2237 virtual void testBufferUpload (UploadSampleResult<SampleType>& result, int writeSize) = 0; 2238 void logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results); 2239 2240 deUint32 m_dummyBufferID; 2241 2242 protected: 2243 const CaseType m_caseType; 2244 const ResultType m_resultType; 2245 const deUint32 m_bufferUsage; 2246 const bool m_logBufferInfo; 2247 const bool m_bufferUnspecifiedContent; 2248 std::vector<deUint8> m_zeroData; 2249 2250 using BasicBufferCase<SampleType>::m_testCtx; 2251 using BasicBufferCase<SampleType>::m_context; 2252 2253 using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE; 2254 using BasicBufferCase<SampleType>::m_dummyProgram; 2255 using BasicBufferCase<SampleType>::m_dummyProgramPosLoc; 2256 using BasicBufferCase<SampleType>::m_bufferID; 2257 using BasicBufferCase<SampleType>::m_numSamples; 2258 using BasicBufferCase<SampleType>::m_bufferSizeMin; 2259 using BasicBufferCase<SampleType>::m_bufferSizeMax; 2260 using BasicBufferCase<SampleType>::m_allocateLargerBuffer; 2261 }; 2262 2263 template <typename SampleType> 2264 BasicUploadCase<SampleType>::BasicUploadCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, deUint32 bufferUsage, CaseType caseType, ResultType resultType, int flags) 2265 : BasicBufferCase<SampleType> (context, name, desc, bufferSizeMin, bufferSizeMax, numSamples, (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0)) 2266 , m_dummyBufferID (0) 2267 , m_caseType (caseType) 2268 , m_resultType (resultType) 2269 , m_bufferUsage (bufferUsage) 2270 , m_logBufferInfo ((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0) 2271 , m_bufferUnspecifiedContent ((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0) 2272 , m_zeroData () 2273 { 2274 DE_ASSERT(m_caseType < CASE_LAST); 2275 } 2276 2277 template <typename SampleType> 2278 BasicUploadCase<SampleType>::~BasicUploadCase (void) 2279 { 2280 deinit(); 2281 } 2282 2283 template <typename SampleType> 2284 void BasicUploadCase<SampleType>::init (void) 2285 { 2286 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2287 2288 BasicBufferCase<SampleType>::init(); 2289 2290 // zero buffer as upload source 2291 m_zeroData.resize(m_bufferSizeMax, 0x00); 2292 2293 // dummy buffer 2294 2295 gl.genBuffers(1, &m_dummyBufferID); 2296 GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf"); 2297 2298 // log basic info 2299 2300 m_testCtx.getLog() 2301 << tcu::TestLog::Message 2302 << "Testing performance with " << m_numSamples << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n" 2303 << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]." 2304 << tcu::TestLog::EndMessage; 2305 2306 if (m_logBufferInfo) 2307 { 2308 switch (m_caseType) 2309 { 2310 case CASE_NO_BUFFERS: 2311 break; 2312 2313 case CASE_NEW_BUFFER: 2314 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is generated but not specified (i.e glBufferData() not called)." << tcu::TestLog::EndMessage; 2315 break; 2316 2317 case CASE_UNSPECIFIED_BUFFER: 2318 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)." << tcu::TestLog::EndMessage; 2319 break; 2320 2321 case CASE_SPECIFIED_BUFFER: 2322 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer contents are specified prior testing with glBufferData(data)." << tcu::TestLog::EndMessage; 2323 break; 2324 2325 case CASE_USED_BUFFER: 2326 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing." << tcu::TestLog::EndMessage; 2327 break; 2328 2329 case CASE_USED_LARGER_BUFFER: 2330 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is larger and has been used in drawing before testing." << tcu::TestLog::EndMessage; 2331 break; 2332 2333 default: 2334 DE_ASSERT(false); 2335 break; 2336 } 2337 } 2338 2339 if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE) 2340 m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples." << tcu::TestLog::EndMessage; 2341 else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) 2342 m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the asymptotic transfer rate as the buffer size approaches infinity." << tcu::TestLog::EndMessage; 2343 else 2344 DE_ASSERT(false); 2345 } 2346 2347 template <typename SampleType> 2348 void BasicUploadCase<SampleType>::deinit (void) 2349 { 2350 if (m_dummyBufferID) 2351 { 2352 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_dummyBufferID); 2353 m_dummyBufferID = 0; 2354 } 2355 2356 m_zeroData.clear(); 2357 2358 BasicBufferCase<SampleType>::deinit(); 2359 } 2360 2361 template <typename SampleType> 2362 bool BasicUploadCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample) 2363 { 2364 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2365 const int allocatedBufferSize = sample.allocatedSize; 2366 const int bufferSize = sample.bufferSize; 2367 2368 if (m_caseType != CASE_NO_BUFFERS) 2369 createBuffer(iteration, allocatedBufferSize); 2370 2371 // warmup CPU before the test to make sure the power management governor 2372 // keeps us in the "high performance" mode 2373 { 2374 deYield(); 2375 tcu::warmupCPU(); 2376 deYield(); 2377 } 2378 2379 testBufferUpload(sample, bufferSize); 2380 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample"); 2381 2382 if (m_caseType != CASE_NO_BUFFERS) 2383 deleteBuffer(bufferSize); 2384 2385 return true; 2386 } 2387 2388 template <typename SampleType> 2389 void BasicUploadCase<SampleType>::createBuffer (int iteration, int bufferSize) 2390 { 2391 DE_ASSERT(!m_bufferID); 2392 DE_ASSERT(m_caseType != CASE_NO_BUFFERS); 2393 2394 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2395 2396 // create buffer 2397 2398 if (m_caseType == CASE_NO_BUFFERS) 2399 return; 2400 2401 // create empty buffer 2402 2403 gl.genBuffers(1, &m_bufferID); 2404 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); 2405 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen"); 2406 2407 if (m_caseType == CASE_NEW_BUFFER) 2408 { 2409 // upload something else first, this should reduce noise in samples 2410 2411 de::Random rng (0xbadc * iteration); 2412 const int sizeDelta = rng.getInt(0, 2097140); 2413 const int dummyUploadSize = deAlign32(1048576 + sizeDelta, 4*4); // Vary buffer size to make sure it is always reallocated 2414 const std::vector<deUint8> dummyData (dummyUploadSize, 0x20); 2415 2416 gl.bindBuffer(GL_ARRAY_BUFFER, m_dummyBufferID); 2417 gl.bufferData(GL_ARRAY_BUFFER, dummyUploadSize, &dummyData[0], m_bufferUsage); 2418 2419 // make sure upload won't interfere with the test 2420 useBuffer(dummyUploadSize); 2421 2422 // don't kill the buffer so that the following upload cannot potentially reuse the buffer 2423 2424 return; 2425 } 2426 2427 // specify it 2428 2429 if (m_caseType == CASE_UNSPECIFIED_BUFFER) 2430 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage); 2431 else 2432 { 2433 const std::vector<deUint8> dummyData(bufferSize, 0x20); 2434 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage); 2435 } 2436 2437 if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER) 2438 return; 2439 2440 // use it and make sure it is uploaded 2441 2442 useBuffer(bufferSize); 2443 DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER); 2444 } 2445 2446 template <typename SampleType> 2447 void BasicUploadCase<SampleType>::deleteBuffer (int bufferSize) 2448 { 2449 DE_ASSERT(m_bufferID); 2450 DE_ASSERT(m_caseType != CASE_NO_BUFFERS); 2451 2452 // render from the buffer to make sure it actually made it to the gpu. This is to 2453 // make sure that if the upload actually happens later or is happening right now in 2454 // the background, it will not interfere with further test runs 2455 2456 // if buffer contains unspecified content, sourcing data from it results in undefined 2457 // results, possibly including program termination. Specify all data to prevent such 2458 // case from happening 2459 2460 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2461 2462 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); 2463 2464 if (m_bufferUnspecifiedContent) 2465 { 2466 const std::vector<deUint8> dummyData(bufferSize, 0x20); 2467 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage); 2468 2469 GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer"); 2470 } 2471 2472 useBuffer(bufferSize); 2473 2474 gl.deleteBuffers(1, &m_bufferID); 2475 m_bufferID = 0; 2476 } 2477 2478 template <typename SampleType> 2479 void BasicUploadCase<SampleType>::useBuffer (int bufferSize) 2480 { 2481 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2482 2483 gl.useProgram(m_dummyProgram->getProgram()); 2484 2485 gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE); 2486 gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); 2487 gl.enableVertexAttribArray(m_dummyProgramPosLoc); 2488 2489 // use whole buffer to make sure buffer is uploaded by drawing first and last 2490 DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0); 2491 gl.drawArrays(GL_POINTS, 0, 1); 2492 gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1); 2493 2494 BasicBufferCase<SampleType>::waitGLResults(); 2495 } 2496 2497 template <typename SampleType> 2498 void BasicUploadCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results) 2499 { 2500 const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, true); 2501 2502 // with small buffers, report the median transfer rate of the samples 2503 // with large buffers, report the expected preformance of infinitely large buffers 2504 const float rate = (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) : (analysis.transferRateMedian); 2505 2506 if (rate == std::numeric_limits<float>::infinity()) 2507 { 2508 // sample times are 1) invalid or 2) timer resolution too low 2509 // report speed 0 bytes / s since real value cannot be determined 2510 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str()); 2511 } 2512 else 2513 { 2514 // report transfer rate in MB / s 2515 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str()); 2516 } 2517 } 2518 2519 class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration> 2520 { 2521 public: 2522 ReferenceMemcpyCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase); 2523 ~ReferenceMemcpyCase (void); 2524 2525 void init (void); 2526 void deinit (void); 2527 private: 2528 void testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize); 2529 2530 std::vector<deUint8> m_dstBuf; 2531 }; 2532 2533 ReferenceMemcpyCase::ReferenceMemcpyCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase) 2534 : BasicUploadCase<SingleOperationDuration> (ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS, (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE)) 2535 , m_dstBuf () 2536 { 2537 disableGLWarmup(); 2538 } 2539 2540 ReferenceMemcpyCase::~ReferenceMemcpyCase (void) 2541 { 2542 } 2543 2544 void ReferenceMemcpyCase::init (void) 2545 { 2546 // Describe what the test tries to do 2547 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage; 2548 2549 m_dstBuf.resize(m_bufferSizeMax, 0x00); 2550 2551 BasicUploadCase<SingleOperationDuration>::init(); 2552 } 2553 2554 void ReferenceMemcpyCase::deinit (void) 2555 { 2556 m_dstBuf.clear(); 2557 BasicUploadCase<SingleOperationDuration>::deinit(); 2558 } 2559 2560 void ReferenceMemcpyCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize) 2561 { 2562 // write 2563 result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize); 2564 result.duration.fitResponseDuration = result.duration.totalDuration; 2565 2566 result.writtenSize = bufferSize; 2567 } 2568 2569 class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration> 2570 { 2571 public: 2572 BufferDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType); 2573 ~BufferDataUploadCase (void); 2574 2575 void init (void); 2576 private: 2577 void testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize); 2578 }; 2579 2580 BufferDataUploadCase::BufferDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType) 2581 : BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, caseType, RESULT_MEDIAN_TRANSFER_RATE) 2582 { 2583 } 2584 2585 BufferDataUploadCase::~BufferDataUploadCase (void) 2586 { 2587 } 2588 2589 void BufferDataUploadCase::init (void) 2590 { 2591 // Describe what the test tries to do 2592 m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage; 2593 2594 BasicUploadCase<SingleOperationDuration>::init(); 2595 } 2596 2597 void BufferDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize) 2598 { 2599 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2600 2601 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); 2602 2603 // upload 2604 { 2605 deUint64 startTime; 2606 deUint64 endTime; 2607 2608 startTime = deGetMicroseconds(); 2609 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); 2610 endTime = deGetMicroseconds(); 2611 2612 result.duration.totalDuration = endTime - startTime; 2613 result.duration.fitResponseDuration = result.duration.totalDuration; 2614 result.writtenSize = bufferSize; 2615 } 2616 } 2617 2618 class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration> 2619 { 2620 public: 2621 enum Flags 2622 { 2623 FLAG_FULL_UPLOAD = 0x01, 2624 FLAG_PARTIAL_UPLOAD = 0x02, 2625 FLAG_INVALIDATE_BEFORE_USE = 0x04, 2626 }; 2627 2628 BufferSubDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags); 2629 ~BufferSubDataUploadCase (void); 2630 2631 void init (void); 2632 private: 2633 void testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize); 2634 2635 const bool m_fullUpload; 2636 const bool m_invalidateBeforeUse; 2637 }; 2638 2639 BufferSubDataUploadCase::BufferSubDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags) 2640 : BasicUploadCase<SingleOperationDuration> (ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, parentCase, RESULT_MEDIAN_TRANSFER_RATE) 2641 , m_fullUpload ((flags & FLAG_FULL_UPLOAD) != 0) 2642 , m_invalidateBeforeUse ((flags & FLAG_INVALIDATE_BEFORE_USE) != 0) 2643 { 2644 DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0); 2645 DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)); 2646 } 2647 2648 BufferSubDataUploadCase::~BufferSubDataUploadCase (void) 2649 { 2650 } 2651 2652 void BufferSubDataUploadCase::init (void) 2653 { 2654 // Describe what the test tries to do 2655 m_testCtx.getLog() 2656 << tcu::TestLog::Message 2657 << "Testing glBufferSubData() function call performance. " 2658 << ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") : ("Half of the buffer data is updated with glBufferSubData. ")) 2659 << ((m_invalidateBeforeUse) ? ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") : ("")) << "\n" 2660 << tcu::TestLog::EndMessage; 2661 2662 BasicUploadCase<SingleOperationDuration>::init(); 2663 } 2664 2665 void BufferSubDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize) 2666 { 2667 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2668 2669 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); 2670 2671 // "invalidate", upload null 2672 if (m_invalidateBeforeUse) 2673 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage); 2674 2675 // upload 2676 { 2677 deUint64 startTime; 2678 deUint64 endTime; 2679 2680 startTime = deGetMicroseconds(); 2681 2682 if (m_fullUpload) 2683 gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]); 2684 else 2685 { 2686 // upload to buffer center 2687 gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]); 2688 } 2689 2690 endTime = deGetMicroseconds(); 2691 2692 result.duration.totalDuration = endTime - startTime; 2693 result.duration.fitResponseDuration = result.duration.totalDuration; 2694 2695 if (m_fullUpload) 2696 result.writtenSize = bufferSize; 2697 else 2698 result.writtenSize = bufferSize / 2; 2699 } 2700 } 2701 2702 class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration> 2703 { 2704 public: 2705 enum Flags 2706 { 2707 FLAG_PARTIAL = 0x01, 2708 FLAG_MANUAL_INVALIDATION = 0x02, 2709 FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04, 2710 FLAG_USE_UNUSED_SPECIFIED_BUFFER = 0x08, 2711 }; 2712 2713 MapBufferRangeCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags); 2714 ~MapBufferRangeCase (void); 2715 2716 void init (void); 2717 private: 2718 static CaseType getBaseCaseType (int caseFlags); 2719 static int getBaseFlags (deUint32 mapFlags, int caseFlags); 2720 2721 void testBufferUpload (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize); 2722 void attemptBufferMap (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize); 2723 2724 const bool m_manualInvalidation; 2725 const bool m_fullUpload; 2726 const bool m_useUnusedUnspecifiedBuffer; 2727 const bool m_useUnusedSpecifiedBuffer; 2728 const deUint32 m_mapFlags; 2729 int m_unmapFailures; 2730 }; 2731 2732 MapBufferRangeCase::MapBufferRangeCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags) 2733 : BasicUploadCase<MapBufferRangeDuration> (ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags)) 2734 , m_manualInvalidation ((caseFlags&FLAG_MANUAL_INVALIDATION) != 0) 2735 , m_fullUpload ((caseFlags&FLAG_PARTIAL) == 0) 2736 , m_useUnusedUnspecifiedBuffer ((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0) 2737 , m_useUnusedSpecifiedBuffer ((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0) 2738 , m_mapFlags (mapFlags) 2739 , m_unmapFailures (0) 2740 { 2741 DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer)); 2742 DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation)); 2743 } 2744 2745 MapBufferRangeCase::~MapBufferRangeCase (void) 2746 { 2747 } 2748 2749 void MapBufferRangeCase::init (void) 2750 { 2751 // Describe what the test tries to do 2752 m_testCtx.getLog() 2753 << tcu::TestLog::Message 2754 << "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n" 2755 << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n" 2756 << ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : ("")) 2757 << ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : ("")) 2758 << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : ("")) 2759 << ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : ("")) 2760 << "Map bits:\n" 2761 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : ("")) 2762 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : ("")) 2763 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : ("")) 2764 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : ("")) 2765 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : ("")) 2766 << tcu::TestLog::EndMessage; 2767 2768 BasicUploadCase<MapBufferRangeDuration>::init(); 2769 } 2770 2771 MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType (int caseFlags) 2772 { 2773 if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0) 2774 return CASE_USED_BUFFER; 2775 else 2776 return CASE_NEW_BUFFER; 2777 } 2778 2779 int MapBufferRangeCase::getBaseFlags (deUint32 mapFlags, int caseFlags) 2780 { 2781 int flags = FLAG_DONT_LOG_BUFFER_INFO; 2782 2783 // If buffer contains unspecified data when it is sourced (i.e drawn) 2784 // results are undefined, and system errors may occur. Signal parent 2785 // class to take this into account 2786 if (caseFlags & FLAG_PARTIAL) 2787 { 2788 if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || 2789 (caseFlags & FLAG_MANUAL_INVALIDATION) != 0 || 2790 (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0) 2791 { 2792 flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT; 2793 } 2794 } 2795 2796 return flags; 2797 } 2798 2799 void MapBufferRangeCase::testBufferUpload (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize) 2800 { 2801 const int unmapFailureThreshold = 4; 2802 2803 for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures) 2804 { 2805 try 2806 { 2807 attemptBufferMap(result, bufferSize); 2808 return; 2809 } 2810 catch (UnmapFailureError&) 2811 { 2812 } 2813 } 2814 2815 throw tcu::TestError("Unmapping failures exceeded limit"); 2816 } 2817 2818 void MapBufferRangeCase::attemptBufferMap (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize) 2819 { 2820 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 2821 2822 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); 2823 2824 if (m_fullUpload) 2825 result.writtenSize = bufferSize; 2826 else 2827 result.writtenSize = bufferSize / 2; 2828 2829 // Create unused buffer 2830 2831 if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer) 2832 { 2833 deUint64 startTime; 2834 deUint64 endTime; 2835 2836 // "invalidate" or allocate, upload null 2837 startTime = deGetMicroseconds(); 2838 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage); 2839 endTime = deGetMicroseconds(); 2840 2841 result.duration.allocDuration = endTime - startTime; 2842 } 2843 else if (m_useUnusedSpecifiedBuffer) 2844 { 2845 deUint64 startTime; 2846 deUint64 endTime; 2847 2848 // Specify buffer contents 2849 startTime = deGetMicroseconds(); 2850 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); 2851 endTime = deGetMicroseconds(); 2852 2853 result.duration.allocDuration = endTime - startTime; 2854 } 2855 else 2856 { 2857 // No alloc, no time 2858 result.duration.allocDuration = 0; 2859 } 2860 2861 // upload 2862 { 2863 void* mapPtr; 2864 2865 // Map 2866 { 2867 deUint64 startTime; 2868 deUint64 endTime; 2869 2870 startTime = deGetMicroseconds(); 2871 if (m_fullUpload) 2872 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags); 2873 else 2874 { 2875 // upload to buffer center 2876 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags); 2877 } 2878 endTime = deGetMicroseconds(); 2879 2880 if (!mapPtr) 2881 throw tcu::Exception("MapBufferRange returned NULL"); 2882 2883 result.duration.mapDuration = endTime - startTime; 2884 } 2885 2886 // Write 2887 { 2888 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize); 2889 } 2890 2891 // Unmap 2892 { 2893 deUint64 startTime; 2894 deUint64 endTime; 2895 glw::GLboolean unmapSuccessful; 2896 2897 startTime = deGetMicroseconds(); 2898 unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER); 2899 endTime = deGetMicroseconds(); 2900 2901 // if unmapping fails, just try again later 2902 if (!unmapSuccessful) 2903 throw UnmapFailureError(); 2904 2905 result.duration.unmapDuration = endTime - startTime; 2906 } 2907 2908 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.allocDuration; 2909 result.duration.fitResponseDuration = result.duration.totalDuration; 2910 } 2911 } 2912 2913 class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration> 2914 { 2915 public: 2916 enum Flags 2917 { 2918 FLAG_PARTIAL = 0x01, 2919 FLAG_FLUSH_IN_PARTS = 0x02, 2920 FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04, 2921 FLAG_USE_UNUSED_SPECIFIED_BUFFER = 0x08, 2922 FLAG_FLUSH_PARTIAL = 0x10, 2923 }; 2924 2925 MapBufferRangeFlushCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags); 2926 ~MapBufferRangeFlushCase (void); 2927 2928 void init (void); 2929 private: 2930 static CaseType getBaseCaseType (int caseFlags); 2931 static int getBaseFlags (deUint32 mapFlags, int caseFlags); 2932 2933 void testBufferUpload (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize); 2934 void attemptBufferMap (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize); 2935 2936 const bool m_fullUpload; 2937 const bool m_flushInParts; 2938 const bool m_flushPartial; 2939 const bool m_useUnusedUnspecifiedBuffer; 2940 const bool m_useUnusedSpecifiedBuffer; 2941 const deUint32 m_mapFlags; 2942 int m_unmapFailures; 2943 }; 2944 2945 MapBufferRangeFlushCase::MapBufferRangeFlushCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags) 2946 : BasicUploadCase<MapBufferRangeFlushDuration> (ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags)) 2947 , m_fullUpload ((caseFlags&FLAG_PARTIAL) == 0) 2948 , m_flushInParts ((caseFlags&FLAG_FLUSH_IN_PARTS) != 0) 2949 , m_flushPartial ((caseFlags&FLAG_FLUSH_PARTIAL) != 0) 2950 , m_useUnusedUnspecifiedBuffer ((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0) 2951 , m_useUnusedSpecifiedBuffer ((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0) 2952 , m_mapFlags (mapFlags) 2953 , m_unmapFailures (0) 2954 { 2955 DE_ASSERT(!(m_flushPartial && m_flushInParts)); 2956 DE_ASSERT(!(m_flushPartial && !m_fullUpload)); 2957 } 2958 2959 MapBufferRangeFlushCase::~MapBufferRangeFlushCase (void) 2960 { 2961 } 2962 2963 void MapBufferRangeFlushCase::init (void) 2964 { 2965 // Describe what the test tries to do 2966 m_testCtx.getLog() 2967 << tcu::TestLog::Message 2968 << "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n" 2969 << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n" 2970 << ((m_flushInParts) ? 2971 ("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") : 2972 (m_flushPartial) ? 2973 ("Half of the buffer range is flushed.") : 2974 ("The whole mapped range is flushed in one flush call.")) << "\n" 2975 << ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : ("")) 2976 << ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : ("")) 2977 << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : ("")) 2978 << "Map bits:\n" 2979 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : ("")) 2980 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : ("")) 2981 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : ("")) 2982 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : ("")) 2983 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : ("")) 2984 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : ("")) 2985 << tcu::TestLog::EndMessage; 2986 2987 BasicUploadCase<MapBufferRangeFlushDuration>::init(); 2988 } 2989 2990 MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType (int caseFlags) 2991 { 2992 if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0) 2993 return CASE_USED_BUFFER; 2994 else 2995 return CASE_NEW_BUFFER; 2996 } 2997 2998 int MapBufferRangeFlushCase::getBaseFlags (deUint32 mapFlags, int caseFlags) 2999 { 3000 int flags = FLAG_DONT_LOG_BUFFER_INFO; 3001 3002 // If buffer contains unspecified data when it is sourced (i.e drawn) 3003 // results are undefined, and system errors may occur. Signal parent 3004 // class to take this into account 3005 if (caseFlags & FLAG_PARTIAL) 3006 { 3007 if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || 3008 (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0 || 3009 (caseFlags & FLAG_FLUSH_PARTIAL) != 0) 3010 { 3011 flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT; 3012 } 3013 } 3014 3015 return flags; 3016 } 3017 3018 void MapBufferRangeFlushCase::testBufferUpload (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize) 3019 { 3020 const int unmapFailureThreshold = 4; 3021 3022 for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures) 3023 { 3024 try 3025 { 3026 attemptBufferMap(result, bufferSize); 3027 return; 3028 } 3029 catch (UnmapFailureError&) 3030 { 3031 } 3032 } 3033 3034 throw tcu::TestError("Unmapping failures exceeded limit"); 3035 } 3036 3037 void MapBufferRangeFlushCase::attemptBufferMap (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize) 3038 { 3039 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3040 const int mappedSize = (m_fullUpload) ? (bufferSize) : (bufferSize / 2); 3041 3042 if (m_fullUpload && !m_flushPartial) 3043 result.writtenSize = bufferSize; 3044 else 3045 result.writtenSize = bufferSize / 2; 3046 3047 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); 3048 3049 // Create unused buffer 3050 3051 if (m_useUnusedUnspecifiedBuffer) 3052 { 3053 deUint64 startTime; 3054 deUint64 endTime; 3055 3056 // Don't specify contents 3057 startTime = deGetMicroseconds(); 3058 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage); 3059 endTime = deGetMicroseconds(); 3060 3061 result.duration.allocDuration = endTime - startTime; 3062 } 3063 else if (m_useUnusedSpecifiedBuffer) 3064 { 3065 deUint64 startTime; 3066 deUint64 endTime; 3067 3068 // Specify buffer contents 3069 startTime = deGetMicroseconds(); 3070 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); 3071 endTime = deGetMicroseconds(); 3072 3073 result.duration.allocDuration = endTime - startTime; 3074 } 3075 else 3076 { 3077 // No alloc, no time 3078 result.duration.allocDuration = 0; 3079 } 3080 3081 // upload 3082 { 3083 void* mapPtr; 3084 3085 // Map 3086 { 3087 deUint64 startTime; 3088 deUint64 endTime; 3089 3090 startTime = deGetMicroseconds(); 3091 if (m_fullUpload) 3092 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags); 3093 else 3094 { 3095 // upload to buffer center 3096 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags); 3097 } 3098 endTime = deGetMicroseconds(); 3099 3100 if (!mapPtr) 3101 throw tcu::Exception("MapBufferRange returned NULL"); 3102 3103 result.duration.mapDuration = endTime - startTime; 3104 } 3105 3106 // Write 3107 { 3108 if (!m_flushPartial) 3109 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize); 3110 else 3111 result.duration.writeDuration = medianTimeMemcpy((deUint8*)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize); 3112 } 3113 3114 // Flush 3115 { 3116 deUint64 startTime; 3117 deUint64 endTime; 3118 3119 startTime = deGetMicroseconds(); 3120 3121 if (m_flushPartial) 3122 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize/4, mappedSize/2); 3123 else if (!m_flushInParts) 3124 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize); 3125 else 3126 { 3127 const int p1 = 0; 3128 const int p2 = mappedSize / 3; 3129 const int p3 = mappedSize / 2; 3130 const int p4 = mappedSize * 2 / 4; 3131 const int p5 = mappedSize; 3132 3133 // flush in mixed order 3134 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2, p3-p2); 3135 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1, p2-p1); 3136 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4, p5-p4); 3137 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3, p4-p3); 3138 } 3139 3140 endTime = deGetMicroseconds(); 3141 3142 result.duration.flushDuration = endTime - startTime; 3143 } 3144 3145 // Unmap 3146 { 3147 deUint64 startTime; 3148 deUint64 endTime; 3149 glw::GLboolean unmapSuccessful; 3150 3151 startTime = deGetMicroseconds(); 3152 unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER); 3153 endTime = deGetMicroseconds(); 3154 3155 // if unmapping fails, just try again later 3156 if (!unmapSuccessful) 3157 throw UnmapFailureError(); 3158 3159 result.duration.unmapDuration = endTime - startTime; 3160 } 3161 3162 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.flushDuration + result.duration.unmapDuration + result.duration.allocDuration; 3163 result.duration.fitResponseDuration = result.duration.totalDuration; 3164 } 3165 } 3166 3167 template <typename SampleType> 3168 class ModifyAfterBasicCase : public BasicBufferCase<SampleType> 3169 { 3170 public: 3171 ModifyAfterBasicCase (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest); 3172 ~ModifyAfterBasicCase (void); 3173 3174 void init (void); 3175 void deinit (void); 3176 3177 protected: 3178 void drawBufferRange (int begin, int end); 3179 3180 private: 3181 enum 3182 { 3183 NUM_SAMPLES = 20, 3184 }; 3185 3186 3187 bool runSample (int iteration, UploadSampleResult<SampleType>& sample); 3188 bool prepareAndRunTest (int iteration, UploadSampleResult<SampleType>& result, int bufferSize); 3189 void logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results); 3190 3191 virtual void testWithBufferSize (UploadSampleResult<SampleType>& result, int bufferSize) = 0; 3192 3193 int m_unmappingErrors; 3194 3195 protected: 3196 const bool m_bufferUnspecifiedAfterTest; 3197 const deUint32 m_bufferUsage; 3198 std::vector<deUint8> m_zeroData; 3199 3200 using BasicBufferCase<SampleType>::m_testCtx; 3201 using BasicBufferCase<SampleType>::m_context; 3202 3203 using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE; 3204 using BasicBufferCase<SampleType>::m_dummyProgram; 3205 using BasicBufferCase<SampleType>::m_dummyProgramPosLoc; 3206 using BasicBufferCase<SampleType>::m_bufferID; 3207 using BasicBufferCase<SampleType>::m_numSamples; 3208 using BasicBufferCase<SampleType>::m_bufferSizeMin; 3209 using BasicBufferCase<SampleType>::m_bufferSizeMax; 3210 using BasicBufferCase<SampleType>::m_allocateLargerBuffer; 3211 }; 3212 3213 template <typename SampleType> 3214 ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest) 3215 : BasicBufferCase<SampleType> (context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0) 3216 , m_unmappingErrors (0) 3217 , m_bufferUnspecifiedAfterTest (bufferUnspecifiedAfterTest) 3218 , m_bufferUsage (usage) 3219 , m_zeroData () 3220 { 3221 } 3222 3223 template <typename SampleType> 3224 ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase (void) 3225 { 3226 BasicBufferCase<SampleType>::deinit(); 3227 } 3228 3229 template <typename SampleType> 3230 void ModifyAfterBasicCase<SampleType>::init (void) 3231 { 3232 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3233 3234 // init parent 3235 3236 BasicBufferCase<SampleType>::init(); 3237 3238 // upload source 3239 m_zeroData.resize(m_bufferSizeMax, 0x00); 3240 3241 // log basic info 3242 3243 m_testCtx.getLog() 3244 << tcu::TestLog::Message 3245 << "Testing performance with " << (int)NUM_SAMPLES << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n" 3246 << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]." 3247 << tcu::TestLog::EndMessage; 3248 3249 // log which transfer rate is the test result and buffer info 3250 3251 m_testCtx.getLog() 3252 << tcu::TestLog::Message 3253 << "Test result is the median transfer rate of the test samples.\n" 3254 << "Buffer usage = " << glu::getUsageName(m_bufferUsage) 3255 << tcu::TestLog::EndMessage; 3256 3257 // Set state for drawing so that we don't have to change these during the iteration 3258 { 3259 gl.useProgram(m_dummyProgram->getProgram()); 3260 gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE); 3261 gl.enableVertexAttribArray(m_dummyProgramPosLoc); 3262 } 3263 } 3264 3265 template <typename SampleType> 3266 void ModifyAfterBasicCase<SampleType>::deinit (void) 3267 { 3268 m_zeroData.clear(); 3269 3270 BasicBufferCase<SampleType>::deinit(); 3271 } 3272 3273 template <typename SampleType> 3274 void ModifyAfterBasicCase<SampleType>::drawBufferRange (int begin, int end) 3275 { 3276 DE_ASSERT(begin % (int)sizeof(float[4]) == 0); 3277 DE_ASSERT(end % (int)sizeof(float[4]) == 0); 3278 3279 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3280 3281 // use given range 3282 gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1); 3283 gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1); 3284 } 3285 3286 template <typename SampleType> 3287 bool ModifyAfterBasicCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample) 3288 { 3289 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3290 const int bufferSize = sample.bufferSize; 3291 bool testOk; 3292 3293 testOk = prepareAndRunTest(iteration, sample, bufferSize); 3294 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample"); 3295 3296 if (!testOk) 3297 { 3298 const int unmapFailureThreshold = 4; 3299 3300 // only unmapping error can cause iteration failure 3301 if (++m_unmappingErrors >= unmapFailureThreshold) 3302 throw tcu::TestError("Too many unmapping errors, cannot continue."); 3303 3304 // just try again 3305 return false; 3306 } 3307 3308 return true; 3309 } 3310 3311 template <typename SampleType> 3312 bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest (int iteration, UploadSampleResult<SampleType>& result, int bufferSize) 3313 { 3314 DE_UNREF(iteration); 3315 3316 DE_ASSERT(!m_bufferID); 3317 DE_ASSERT(deIsAligned32(bufferSize, 4*4)); // aligned to vec4 3318 3319 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3320 bool testRunOk = true; 3321 bool unmappingFailed = false; 3322 3323 // Upload initial buffer to the GPU... 3324 gl.genBuffers(1, &m_bufferID); 3325 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); 3326 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); 3327 3328 // ...use it... 3329 gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); 3330 drawBufferRange(0, bufferSize); 3331 3332 // ..and make sure it is uploaded 3333 BasicBufferCase<SampleType>::waitGLResults(); 3334 3335 // warmup CPU before the test to make sure the power management governor 3336 // keeps us in the "high performance" mode 3337 { 3338 deYield(); 3339 tcu::warmupCPU(); 3340 deYield(); 3341 } 3342 3343 // test 3344 try 3345 { 3346 // buffer is uploaded to the GPU. Draw from it. 3347 drawBufferRange(0, bufferSize); 3348 3349 // and test upload 3350 testWithBufferSize(result, bufferSize); 3351 } 3352 catch (UnmapFailureError&) 3353 { 3354 testRunOk = false; 3355 unmappingFailed = true; 3356 } 3357 3358 // clean up: make sure buffer is not in upload queue and delete it 3359 3360 // sourcing unspecified data causes undefined results, possibly program termination 3361 if (m_bufferUnspecifiedAfterTest || unmappingFailed) 3362 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); 3363 3364 drawBufferRange(0, bufferSize); 3365 BasicBufferCase<SampleType>::waitGLResults(); 3366 3367 gl.deleteBuffers(1, &m_bufferID); 3368 m_bufferID = 0; 3369 3370 return testRunOk; 3371 } 3372 3373 template <typename SampleType> 3374 void ModifyAfterBasicCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results) 3375 { 3376 const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false); 3377 3378 // Return median transfer rate of the samples 3379 3380 if (analysis.transferRateMedian == std::numeric_limits<float>::infinity()) 3381 { 3382 // sample times are 1) invalid or 2) timer resolution too low 3383 // report speed 0 bytes / s since real value cannot be determined 3384 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str()); 3385 } 3386 else 3387 { 3388 // report transfer rate in MB / s 3389 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str()); 3390 } 3391 } 3392 3393 class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration> 3394 { 3395 public: 3396 3397 enum CaseFlags 3398 { 3399 FLAG_RESPECIFY_SIZE = 0x1, 3400 FLAG_UPLOAD_REPEATED = 0x2, 3401 }; 3402 3403 ModifyAfterWithBufferDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags); 3404 ~ModifyAfterWithBufferDataCase (void); 3405 3406 void init (void); 3407 void deinit (void); 3408 private: 3409 void testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize); 3410 3411 enum 3412 { 3413 NUM_REPEATS = 2 3414 }; 3415 3416 const bool m_respecifySize; 3417 const bool m_repeatedUpload; 3418 const float m_sizeDifferenceFactor; 3419 }; 3420 3421 ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags) 3422 : ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false) 3423 , m_respecifySize ((flags & FLAG_RESPECIFY_SIZE) != 0) 3424 , m_repeatedUpload ((flags & FLAG_UPLOAD_REPEATED) != 0) 3425 , m_sizeDifferenceFactor (1.3f) 3426 { 3427 DE_ASSERT(!(m_repeatedUpload && m_respecifySize)); 3428 } 3429 3430 ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase (void) 3431 { 3432 deinit(); 3433 } 3434 3435 void ModifyAfterWithBufferDataCase::init (void) 3436 { 3437 // Log the purpose of the test 3438 3439 if (m_repeatedUpload) 3440 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage; 3441 else 3442 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage; 3443 3444 m_testCtx.getLog() 3445 << tcu::TestLog::Message 3446 << ((m_respecifySize) ? 3447 ("Buffer size is increased and contents are modified with BufferData().\n") : 3448 ("Buffer contents are modified with BufferData().\n")) 3449 << tcu::TestLog::EndMessage; 3450 3451 // init parent 3452 ModifyAfterBasicCase<SingleOperationDuration>::init(); 3453 3454 // make sure our zeroBuffer is large enough 3455 if (m_respecifySize) 3456 { 3457 const int largerBufferSize = deAlign32((int)(m_bufferSizeMax * m_sizeDifferenceFactor), 4*4); 3458 m_zeroData.resize(largerBufferSize, 0x00); 3459 } 3460 } 3461 3462 void ModifyAfterWithBufferDataCase::deinit (void) 3463 { 3464 ModifyAfterBasicCase<SingleOperationDuration>::deinit(); 3465 } 3466 3467 void ModifyAfterWithBufferDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize) 3468 { 3469 // always draw the same amount to make compares between cases sensible 3470 const int drawStart = deAlign32(bufferSize / 4, 4*4); 3471 const int drawEnd = deAlign32(bufferSize * 3 / 4, 4*4); 3472 3473 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3474 const int largerBufferSize = deAlign32((int)(bufferSize * m_sizeDifferenceFactor), 4*4); 3475 const int newBufferSize = (m_respecifySize) ? (largerBufferSize) : (bufferSize); 3476 deUint64 startTime; 3477 deUint64 endTime; 3478 3479 // repeat upload-draw 3480 if (m_repeatedUpload) 3481 { 3482 for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx) 3483 { 3484 gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage); 3485 drawBufferRange(drawStart, drawEnd); 3486 } 3487 } 3488 3489 // test upload 3490 startTime = deGetMicroseconds(); 3491 gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage); 3492 endTime = deGetMicroseconds(); 3493 3494 result.duration.totalDuration = endTime - startTime; 3495 result.duration.fitResponseDuration = result.duration.totalDuration; 3496 result.writtenSize = newBufferSize; 3497 } 3498 3499 class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration> 3500 { 3501 public: 3502 3503 enum CaseFlags 3504 { 3505 FLAG_PARTIAL = 0x1, 3506 FLAG_UPLOAD_REPEATED = 0x2, 3507 }; 3508 3509 ModifyAfterWithBufferSubDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags); 3510 ~ModifyAfterWithBufferSubDataCase (void); 3511 3512 void init (void); 3513 void deinit (void); 3514 private: 3515 void testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize); 3516 3517 enum 3518 { 3519 NUM_REPEATS = 2 3520 }; 3521 3522 const bool m_partialUpload; 3523 const bool m_repeatedUpload; 3524 }; 3525 3526 ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags) 3527 : ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false) 3528 , m_partialUpload ((flags & FLAG_PARTIAL) != 0) 3529 , m_repeatedUpload ((flags & FLAG_UPLOAD_REPEATED) != 0) 3530 { 3531 } 3532 3533 ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase (void) 3534 { 3535 deinit(); 3536 } 3537 3538 void ModifyAfterWithBufferSubDataCase::init (void) 3539 { 3540 // Log the purpose of the test 3541 3542 if (m_repeatedUpload) 3543 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage; 3544 else 3545 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage; 3546 3547 m_testCtx.getLog() 3548 << tcu::TestLog::Message 3549 << ((m_partialUpload) ? 3550 ("Half of the buffer contents are modified.\n") : 3551 ("Buffer contents are fully respecified.\n")) 3552 << tcu::TestLog::EndMessage; 3553 3554 ModifyAfterBasicCase<SingleOperationDuration>::init(); 3555 } 3556 3557 void ModifyAfterWithBufferSubDataCase::deinit (void) 3558 { 3559 ModifyAfterBasicCase<SingleOperationDuration>::deinit(); 3560 } 3561 3562 void ModifyAfterWithBufferSubDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize) 3563 { 3564 // always draw the same amount to make compares between cases sensible 3565 const int drawStart = deAlign32(bufferSize / 4, 4*4); 3566 const int drawEnd = deAlign32(bufferSize * 3 / 4, 4*4); 3567 3568 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3569 const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4); 3570 const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4); 3571 deUint64 startTime; 3572 deUint64 endTime; 3573 3574 // make upload-draw stream 3575 if (m_repeatedUpload) 3576 { 3577 for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx) 3578 { 3579 gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]); 3580 drawBufferRange(drawStart, drawEnd); 3581 } 3582 } 3583 3584 // test upload 3585 startTime = deGetMicroseconds(); 3586 gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]); 3587 endTime = deGetMicroseconds(); 3588 3589 result.duration.totalDuration = endTime - startTime; 3590 result.duration.fitResponseDuration = result.duration.totalDuration; 3591 result.writtenSize = subdataSize; 3592 } 3593 3594 class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc> 3595 { 3596 public: 3597 3598 enum CaseFlags 3599 { 3600 FLAG_PARTIAL = 0x1, 3601 }; 3602 3603 ModifyAfterWithMapBufferRangeCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags); 3604 ~ModifyAfterWithMapBufferRangeCase (void); 3605 3606 void init (void); 3607 void deinit (void); 3608 private: 3609 static bool isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags); 3610 void testWithBufferSize (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize); 3611 3612 const bool m_partialUpload; 3613 const deUint32 m_mapFlags; 3614 }; 3615 3616 ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags) 3617 : ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags)) 3618 , m_partialUpload ((flags & FLAG_PARTIAL) != 0) 3619 , m_mapFlags (glMapFlags) 3620 { 3621 } 3622 3623 ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase (void) 3624 { 3625 deinit(); 3626 } 3627 3628 void ModifyAfterWithMapBufferRangeCase::init (void) 3629 { 3630 // Log the purpose of the test 3631 3632 m_testCtx.getLog() 3633 << tcu::TestLog::Message 3634 << "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n" 3635 << ((m_partialUpload) ? 3636 ("Half of the buffer is mapped.\n") : 3637 ("Whole buffer is mapped.\n")) 3638 << "Map bits:\n" 3639 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : ("")) 3640 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : ("")) 3641 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : ("")) 3642 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : ("")) 3643 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : ("")) 3644 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : ("")) 3645 << tcu::TestLog::EndMessage; 3646 3647 ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init(); 3648 } 3649 3650 void ModifyAfterWithMapBufferRangeCase::deinit (void) 3651 { 3652 ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit(); 3653 } 3654 3655 bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags) 3656 { 3657 if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0)) 3658 return true; 3659 3660 return false; 3661 } 3662 3663 void ModifyAfterWithMapBufferRangeCase::testWithBufferSize (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize) 3664 { 3665 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3666 const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4); 3667 const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4); 3668 void* mapPtr; 3669 3670 // map 3671 { 3672 deUint64 startTime; 3673 deUint64 endTime; 3674 3675 startTime = deGetMicroseconds(); 3676 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags); 3677 endTime = deGetMicroseconds(); 3678 3679 if (!mapPtr) 3680 throw tcu::TestError("mapBufferRange returned null"); 3681 3682 result.duration.mapDuration = endTime - startTime; 3683 } 3684 3685 // write 3686 { 3687 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize); 3688 } 3689 3690 // unmap 3691 { 3692 deUint64 startTime; 3693 deUint64 endTime; 3694 glw::GLboolean unmapSucceeded; 3695 3696 startTime = deGetMicroseconds(); 3697 unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER); 3698 endTime = deGetMicroseconds(); 3699 3700 if (unmapSucceeded != GL_TRUE) 3701 throw UnmapFailureError(); 3702 3703 result.duration.unmapDuration = endTime - startTime; 3704 } 3705 3706 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration; 3707 result.duration.fitResponseDuration = result.duration.totalDuration; 3708 result.writtenSize = subdataSize; 3709 } 3710 3711 class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc> 3712 { 3713 public: 3714 3715 enum CaseFlags 3716 { 3717 FLAG_PARTIAL = 0x1, 3718 }; 3719 3720 ModifyAfterWithMapBufferFlushCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags); 3721 ~ModifyAfterWithMapBufferFlushCase (void); 3722 3723 void init (void); 3724 void deinit (void); 3725 private: 3726 static bool isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags); 3727 void testWithBufferSize (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize); 3728 3729 const bool m_partialUpload; 3730 const deUint32 m_mapFlags; 3731 }; 3732 3733 ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags) 3734 : ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags)) 3735 , m_partialUpload ((flags & FLAG_PARTIAL) != 0) 3736 , m_mapFlags (glMapFlags) 3737 { 3738 } 3739 3740 ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase (void) 3741 { 3742 deinit(); 3743 } 3744 3745 void ModifyAfterWithMapBufferFlushCase::init (void) 3746 { 3747 // Log the purpose of the test 3748 3749 m_testCtx.getLog() 3750 << tcu::TestLog::Message 3751 << "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n" 3752 << ((m_partialUpload) ? 3753 ("Half of the buffer is mapped.\n") : 3754 ("Whole buffer is mapped.\n")) 3755 << "Map bits:\n" 3756 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : ("")) 3757 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : ("")) 3758 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : ("")) 3759 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : ("")) 3760 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : ("")) 3761 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : ("")) 3762 << tcu::TestLog::EndMessage; 3763 3764 ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init(); 3765 } 3766 3767 void ModifyAfterWithMapBufferFlushCase::deinit (void) 3768 { 3769 ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit(); 3770 } 3771 3772 bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags) 3773 { 3774 if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0)) 3775 return true; 3776 3777 return false; 3778 } 3779 3780 void ModifyAfterWithMapBufferFlushCase::testWithBufferSize (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize) 3781 { 3782 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3783 const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4); 3784 const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4); 3785 void* mapPtr; 3786 3787 // map 3788 { 3789 deUint64 startTime; 3790 deUint64 endTime; 3791 3792 startTime = deGetMicroseconds(); 3793 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags); 3794 endTime = deGetMicroseconds(); 3795 3796 if (!mapPtr) 3797 throw tcu::TestError("mapBufferRange returned null"); 3798 3799 result.duration.mapDuration = endTime - startTime; 3800 } 3801 3802 // write 3803 { 3804 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize); 3805 } 3806 3807 // flush 3808 { 3809 deUint64 startTime; 3810 deUint64 endTime; 3811 3812 startTime = deGetMicroseconds(); 3813 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize); 3814 endTime = deGetMicroseconds(); 3815 3816 result.duration.flushDuration = endTime - startTime; 3817 } 3818 3819 // unmap 3820 { 3821 deUint64 startTime; 3822 deUint64 endTime; 3823 glw::GLboolean unmapSucceeded; 3824 3825 startTime = deGetMicroseconds(); 3826 unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER); 3827 endTime = deGetMicroseconds(); 3828 3829 if (unmapSucceeded != GL_TRUE) 3830 throw UnmapFailureError(); 3831 3832 result.duration.unmapDuration = endTime - startTime; 3833 } 3834 3835 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.flushDuration; 3836 result.duration.fitResponseDuration = result.duration.totalDuration; 3837 result.writtenSize = subdataSize; 3838 } 3839 3840 enum DrawMethod 3841 { 3842 DRAWMETHOD_DRAW_ARRAYS = 0, 3843 DRAWMETHOD_DRAW_ELEMENTS, 3844 3845 DRAWMETHOD_LAST 3846 }; 3847 3848 enum TargetBuffer 3849 { 3850 TARGETBUFFER_VERTEX = 0, 3851 TARGETBUFFER_INDEX, 3852 3853 TARGETBUFFER_LAST 3854 }; 3855 3856 enum BufferState 3857 { 3858 BUFFERSTATE_NEW = 0, 3859 BUFFERSTATE_EXISTING, 3860 3861 BUFFERSTATE_LAST 3862 }; 3863 3864 enum UploadMethod 3865 { 3866 UPLOADMETHOD_BUFFER_DATA = 0, 3867 UPLOADMETHOD_BUFFER_SUB_DATA, 3868 UPLOADMETHOD_MAP_BUFFER_RANGE, 3869 3870 UPLOADMETHOD_LAST 3871 }; 3872 3873 enum UnrelatedBufferType 3874 { 3875 UNRELATEDBUFFERTYPE_NONE = 0, 3876 UNRELATEDBUFFERTYPE_VERTEX, 3877 3878 UNRELATEDBUFFERTYPE_LAST 3879 }; 3880 3881 enum UploadRange 3882 { 3883 UPLOADRANGE_FULL = 0, 3884 UPLOADRANGE_PARTIAL, 3885 3886 UPLOADRANGE_LAST 3887 }; 3888 3889 struct LayeredGridSpec 3890 { 3891 int gridWidth; 3892 int gridHeight; 3893 int gridLayers; 3894 }; 3895 3896 static int getLayeredGridNumVertices (const LayeredGridSpec& scene) 3897 { 3898 return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6; 3899 } 3900 3901 static void generateLayeredGridVertexAttribData4C4V (std::vector<tcu::Vec4>& vertexData, const LayeredGridSpec& scene) 3902 { 3903 // interleave color & vertex data 3904 const tcu::Vec4 green (0.0f, 1.0f, 0.0f, 0.7f); 3905 const tcu::Vec4 yellow (1.0f, 1.0f, 0.0f, 0.8f); 3906 3907 vertexData.resize(getLayeredGridNumVertices(scene) * 2); 3908 3909 for (int cellY = 0; cellY < scene.gridHeight; ++cellY) 3910 for (int cellX = 0; cellX < scene.gridWidth; ++cellX) 3911 for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ) 3912 { 3913 const tcu::Vec4 color = (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow); 3914 const float cellLeft = (float(cellX ) / scene.gridWidth - 0.5f) * 2.0f; 3915 const float cellRight = (float(cellX+1) / scene.gridWidth - 0.5f) * 2.0f; 3916 const float cellTop = (float(cellY+1) / scene.gridHeight - 0.5f) * 2.0f; 3917 const float cellBottom = (float(cellY ) / scene.gridHeight - 0.5f) * 2.0f; 3918 3919 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 0] = color; 3920 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 1] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f); 3921 3922 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 2] = color; 3923 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 3] = tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f); 3924 3925 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 4] = color; 3926 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 5] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f); 3927 3928 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 6] = color; 3929 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 7] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f); 3930 3931 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 8] = color; 3932 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 9] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f); 3933 3934 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] = color; 3935 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] = tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f); 3936 } 3937 } 3938 3939 static void generateLayeredGridIndexData (std::vector<deUint32>& indexData, const LayeredGridSpec& scene) 3940 { 3941 indexData.resize(getLayeredGridNumVertices(scene) * 2); 3942 3943 for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx) 3944 indexData[ndx] = ndx; 3945 } 3946 3947 class RenderPerformanceTestBase : public TestCase 3948 { 3949 public: 3950 RenderPerformanceTestBase (Context& context, const char* name, const char* description); 3951 ~RenderPerformanceTestBase (void); 3952 3953 protected: 3954 void init (void); 3955 void deinit (void); 3956 3957 void waitGLResults (void) const; 3958 void setupVertexAttribs (void) const; 3959 3960 enum 3961 { 3962 RENDER_AREA_SIZE = 128 3963 }; 3964 3965 private: 3966 glu::ShaderProgram* m_renderProgram; 3967 int m_colorLoc; 3968 int m_positionLoc; 3969 }; 3970 3971 RenderPerformanceTestBase::RenderPerformanceTestBase (Context& context, const char* name, const char* description) 3972 : TestCase (context, tcu::NODETYPE_PERFORMANCE, name, description) 3973 , m_renderProgram (DE_NULL) 3974 , m_colorLoc (0) 3975 , m_positionLoc (0) 3976 { 3977 } 3978 3979 RenderPerformanceTestBase::~RenderPerformanceTestBase (void) 3980 { 3981 deinit(); 3982 } 3983 3984 void RenderPerformanceTestBase::init (void) 3985 { 3986 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 3987 3988 m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_colorVertexShader) << glu::FragmentSource(s_colorFragmentShader)); 3989 if (!m_renderProgram->isOk()) 3990 { 3991 m_testCtx.getLog() << *m_renderProgram; 3992 throw tcu::TestError("could not build program"); 3993 } 3994 3995 m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color"); 3996 m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position"); 3997 3998 if (m_colorLoc == -1) 3999 throw tcu::TestError("Location of attribute a_color was -1"); 4000 if (m_positionLoc == -1) 4001 throw tcu::TestError("Location of attribute a_position was -1"); 4002 } 4003 4004 void RenderPerformanceTestBase::deinit (void) 4005 { 4006 delete m_renderProgram; 4007 m_renderProgram = DE_NULL; 4008 } 4009 4010 void RenderPerformanceTestBase::setupVertexAttribs (void) const 4011 { 4012 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 4013 4014 // buffers are bound 4015 4016 gl.enableVertexAttribArray(m_colorLoc); 4017 gl.enableVertexAttribArray(m_positionLoc); 4018 4019 gl.vertexAttribPointer(m_colorLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 0); 4020 gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 1); 4021 4022 gl.useProgram(m_renderProgram->getProgram()); 4023 4024 GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering"); 4025 } 4026 4027 void RenderPerformanceTestBase::waitGLResults (void) const 4028 { 4029 tcu::Surface dummySurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE); 4030 glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess()); 4031 } 4032 4033 template <typename SampleType> 4034 class RenderCase : public RenderPerformanceTestBase 4035 { 4036 public: 4037 RenderCase (Context& context, const char* name, const char* description, DrawMethod drawMethod); 4038 ~RenderCase (void); 4039 4040 protected: 4041 void init (void); 4042 void deinit (void); 4043 4044 private: 4045 IterateResult iterate (void); 4046 4047 protected: 4048 struct SampleResult 4049 { 4050 LayeredGridSpec scene; 4051 RenderSampleResult<SampleType> result; 4052 }; 4053 4054 int getMinWorkloadSize (void) const; 4055 int getMaxWorkloadSize (void) const; 4056 int getMinWorkloadDataSize (void) const; 4057 int getMaxWorkloadDataSize (void) const; 4058 int getVertexDataSize (void) const; 4059 int getNumSamples (void) const; 4060 void uploadScene (const LayeredGridSpec& scene); 4061 4062 virtual void runSample (SampleResult& sample) = 0; 4063 virtual void logAndSetTestResult (const std::vector<SampleResult>& results); 4064 4065 void mapResultsToRenderRateFormat (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const; 4066 4067 const DrawMethod m_drawMethod; 4068 4069 private: 4070 glw::GLuint m_attributeBufferID; 4071 glw::GLuint m_indexBufferID; 4072 int m_iterationNdx; 4073 std::vector<int> m_iterationOrder; 4074 std::vector<SampleResult> m_results; 4075 int m_numUnmapFailures; 4076 }; 4077 4078 template <typename SampleType> 4079 RenderCase<SampleType>::RenderCase (Context& context, const char* name, const char* description, DrawMethod drawMethod) 4080 : RenderPerformanceTestBase (context, name, description) 4081 , m_drawMethod (drawMethod) 4082 , m_attributeBufferID (0) 4083 , m_indexBufferID (0) 4084 , m_iterationNdx (0) 4085 , m_numUnmapFailures (0) 4086 { 4087 DE_ASSERT(drawMethod < DRAWMETHOD_LAST); 4088 } 4089 4090 template <typename SampleType> 4091 RenderCase<SampleType>::~RenderCase (void) 4092 { 4093 deinit(); 4094 } 4095 4096 template <typename SampleType> 4097 void RenderCase<SampleType>::init (void) 4098 { 4099 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 4100 4101 RenderPerformanceTestBase::init(); 4102 4103 // requirements 4104 4105 if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE || 4106 m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE) 4107 throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target"); 4108 4109 // gl state 4110 4111 gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE); 4112 4113 // enable bleding to prevent grid layers from being discarded 4114 gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 4115 gl.blendEquation(GL_FUNC_ADD); 4116 gl.enable(GL_BLEND); 4117 4118 // generate iterations 4119 4120 { 4121 const int gridSizes[] = { 20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80, 86, 92, 98, 104, 110, 116, 122, 128 }; 4122 4123 for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx) 4124 { 4125 m_results.push_back(SampleResult()); 4126 4127 m_results.back().scene.gridHeight = gridSizes[gridNdx]; 4128 m_results.back().scene.gridWidth = gridSizes[gridNdx]; 4129 m_results.back().scene.gridLayers = 5; 4130 4131 m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene); 4132 4133 // test cases set these, initialize to dummy values 4134 m_results.back().result.renderDataSize = -1; 4135 m_results.back().result.uploadedDataSize = -1; 4136 m_results.back().result.unrelatedDataSize = -1; 4137 } 4138 } 4139 4140 // randomize iteration order 4141 { 4142 m_iterationOrder.resize(m_results.size()); 4143 generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size()); 4144 } 4145 } 4146 4147 template <typename SampleType> 4148 void RenderCase<SampleType>::deinit (void) 4149 { 4150 RenderPerformanceTestBase::deinit(); 4151 4152 if (m_attributeBufferID) 4153 { 4154 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID); 4155 m_attributeBufferID = 0; 4156 } 4157 4158 if (m_indexBufferID) 4159 { 4160 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID); 4161 m_indexBufferID = 0; 4162 } 4163 } 4164 4165 template <typename SampleType> 4166 typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate (void) 4167 { 4168 const int unmapFailureThreshold = 3; 4169 const int currentIteration = m_iterationNdx; 4170 const int currentConfigNdx = m_iterationOrder[currentIteration]; 4171 SampleResult& currentSample = m_results[currentConfigNdx]; 4172 4173 try 4174 { 4175 runSample(currentSample); 4176 ++m_iterationNdx; 4177 } 4178 catch (const UnmapFailureError& ex) 4179 { 4180 DE_UNREF(ex); 4181 ++m_numUnmapFailures; 4182 } 4183 4184 if (m_numUnmapFailures > unmapFailureThreshold) 4185 throw tcu::TestError("Got too many unmap errors"); 4186 4187 if (m_iterationNdx < (int)m_iterationOrder.size()) 4188 return CONTINUE; 4189 4190 logAndSetTestResult(m_results); 4191 return STOP; 4192 } 4193 4194 template <typename SampleType> 4195 int RenderCase<SampleType>::getMinWorkloadSize (void) const 4196 { 4197 int result = getLayeredGridNumVertices(m_results[0].scene); 4198 4199 for (int ndx = 1; ndx < (int)m_results.size(); ++ndx) 4200 { 4201 const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene); 4202 result = de::min(result, workloadSize); 4203 } 4204 4205 return result; 4206 } 4207 4208 template <typename SampleType> 4209 int RenderCase<SampleType>::getMaxWorkloadSize (void) const 4210 { 4211 int result = getLayeredGridNumVertices(m_results[0].scene); 4212 4213 for (int ndx = 1; ndx < (int)m_results.size(); ++ndx) 4214 { 4215 const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene); 4216 result = de::max(result, workloadSize); 4217 } 4218 4219 return result; 4220 } 4221 4222 template <typename SampleType> 4223 int RenderCase<SampleType>::getMinWorkloadDataSize (void) const 4224 { 4225 return getMinWorkloadSize() * getVertexDataSize(); 4226 } 4227 4228 template <typename SampleType> 4229 int RenderCase<SampleType>::getMaxWorkloadDataSize (void) const 4230 { 4231 return getMaxWorkloadSize() * getVertexDataSize(); 4232 } 4233 4234 template <typename SampleType> 4235 int RenderCase<SampleType>::getVertexDataSize (void) const 4236 { 4237 const int numVectors = 2; 4238 const int vec4Size = 4 * sizeof(float); 4239 4240 return numVectors * vec4Size; 4241 } 4242 4243 template <typename SampleType> 4244 int RenderCase<SampleType>::getNumSamples (void) const 4245 { 4246 return (int)m_results.size(); 4247 } 4248 4249 template <typename SampleType> 4250 void RenderCase<SampleType>::uploadScene (const LayeredGridSpec& scene) 4251 { 4252 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 4253 4254 // vertex buffer 4255 { 4256 std::vector<tcu::Vec4> vertexData; 4257 4258 generateLayeredGridVertexAttribData4C4V(vertexData, scene); 4259 4260 if (m_attributeBufferID == 0) 4261 gl.genBuffers(1, &m_attributeBufferID); 4262 gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID); 4263 gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW); 4264 } 4265 4266 // index buffer 4267 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 4268 { 4269 std::vector<deUint32> indexData; 4270 4271 generateLayeredGridIndexData(indexData, scene); 4272 4273 if (m_indexBufferID == 0) 4274 gl.genBuffers(1, &m_indexBufferID); 4275 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID); 4276 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW); 4277 } 4278 4279 GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers"); 4280 } 4281 4282 template <typename SampleType> 4283 void RenderCase<SampleType>::logAndSetTestResult (const std::vector<SampleResult>& results) 4284 { 4285 std::vector<RenderSampleResult<SampleType> > mappedResults; 4286 4287 mapResultsToRenderRateFormat(mappedResults, results); 4288 4289 { 4290 const RenderSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), mappedResults); 4291 const float rate = analysis.renderRateAtRange; 4292 4293 if (rate == std::numeric_limits<float>::infinity()) 4294 { 4295 // sample times are 1) invalid or 2) timer resolution too low 4296 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str()); 4297 } 4298 else 4299 { 4300 // report transfer rate in millions of MiB/s 4301 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str()); 4302 } 4303 } 4304 } 4305 4306 template <typename SampleType> 4307 void RenderCase<SampleType>::mapResultsToRenderRateFormat (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const 4308 { 4309 dst.resize(src.size()); 4310 4311 for (int ndx = 0; ndx < (int)src.size(); ++ndx) 4312 dst[ndx] = src[ndx].result; 4313 } 4314 4315 class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration> 4316 { 4317 public: 4318 ReferenceRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod); 4319 4320 private: 4321 void init (void); 4322 void runSample (SampleResult& sample); 4323 }; 4324 4325 ReferenceRenderTimeCase::ReferenceRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod) 4326 : RenderCase<RenderReadDuration> (context, name, description, drawMethod) 4327 { 4328 } 4329 4330 void ReferenceRenderTimeCase::init (void) 4331 { 4332 const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements"); 4333 4334 // init parent 4335 RenderCase<RenderReadDuration>::init(); 4336 4337 // log 4338 m_testCtx.getLog() 4339 << tcu::TestLog::Message 4340 << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n" 4341 << getNumSamples() << " test samples. Sample order is randomized.\n" 4342 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n" 4343 << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n" 4344 << "Workload sizes are in the range [" 4345 << getMinWorkloadSize() << ", " 4346 << getMaxWorkloadSize() << "] vertices ([" 4347 << getHumanReadableByteSize(getMinWorkloadDataSize()) << "," 4348 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n" 4349 << "Test result is the approximated total processing rate in MiB / s.\n" 4350 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : ("")) 4351 << "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results." 4352 << tcu::TestLog::EndMessage; 4353 } 4354 4355 void ReferenceRenderTimeCase::runSample (SampleResult& sample) 4356 { 4357 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 4358 tcu::Surface resultSurface (RENDER_AREA_SIZE, RENDER_AREA_SIZE); 4359 const int numVertices = getLayeredGridNumVertices(sample.scene); 4360 const glu::Buffer arrayBuffer (m_context.getRenderContext()); 4361 const glu::Buffer indexBuffer (m_context.getRenderContext()); 4362 const glu::Buffer unrelatedBuffer (m_context.getRenderContext()); 4363 std::vector<tcu::Vec4> vertexData; 4364 std::vector<deUint32> indexData; 4365 deUint64 startTime; 4366 deUint64 endTime; 4367 4368 // generate and upload buffers 4369 4370 generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene); 4371 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); 4372 gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW); 4373 4374 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 4375 { 4376 generateLayeredGridIndexData(indexData, sample.scene); 4377 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); 4378 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW); 4379 } 4380 4381 setupVertexAttribs(); 4382 4383 // make sure data is uploaded 4384 4385 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 4386 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 4387 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 4388 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 4389 else 4390 DE_ASSERT(false); 4391 waitGLResults(); 4392 4393 gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f); 4394 gl.clear(GL_COLOR_BUFFER_BIT); 4395 waitGLResults(); 4396 4397 tcu::warmupCPU(); 4398 4399 // Measure both draw and associated readpixels 4400 { 4401 startTime = deGetMicroseconds(); 4402 4403 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 4404 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 4405 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 4406 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 4407 else 4408 DE_ASSERT(false); 4409 4410 endTime = deGetMicroseconds(); 4411 4412 sample.result.duration.renderDuration = endTime - startTime; 4413 } 4414 4415 { 4416 startTime = deGetMicroseconds(); 4417 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); 4418 endTime = deGetMicroseconds(); 4419 4420 sample.result.duration.readDuration = endTime - startTime; 4421 } 4422 4423 sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices; 4424 sample.result.uploadedDataSize = 0; 4425 sample.result.unrelatedDataSize = 0; 4426 sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; 4427 sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; 4428 sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration; 4429 } 4430 4431 class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration> 4432 { 4433 public: 4434 UnrelatedUploadRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod); 4435 4436 private: 4437 void init (void); 4438 void runSample (SampleResult& sample); 4439 4440 const UploadMethod m_unrelatedUploadMethod; 4441 }; 4442 4443 UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod) 4444 : RenderCase<UnrelatedUploadRenderReadDuration> (context, name, description, drawMethod) 4445 , m_unrelatedUploadMethod (unrelatedUploadMethod) 4446 { 4447 DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST); 4448 } 4449 4450 void UnrelatedUploadRenderTimeCase::init (void) 4451 { 4452 const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements"); 4453 tcu::MessageBuilder message (&m_testCtx.getLog()); 4454 4455 // init parent 4456 RenderCase<UnrelatedUploadRenderReadDuration>::init(); 4457 4458 // log 4459 4460 message 4461 << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n" 4462 << "Uploading an unrelated buffer just before issuing the rendering command with " 4463 << ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : 4464 (m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : 4465 (m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("mapBufferRange") : 4466 ((const char*)DE_NULL)) 4467 << ".\n" 4468 << getNumSamples() << " test samples. Sample order is randomized.\n" 4469 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n" 4470 << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n" 4471 << "Workload sizes are in the range [" 4472 << getMinWorkloadSize() << ", " 4473 << getMaxWorkloadSize() << "] vertices ([" 4474 << getHumanReadableByteSize(getMinWorkloadDataSize()) << "," 4475 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n" 4476 << "Unrelated upload sizes are in the range [" 4477 << getHumanReadableByteSize(getMinWorkloadDataSize()) << ", " 4478 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n" 4479 << "Test result is the approximated total processing rate in MiB / s.\n" 4480 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : ("")) 4481 << "Note that the data size and the time used in the unrelated upload is not included in the results.\n" 4482 << "Note! Test result may not be useful as is but instead should be compared against the reference.* group and upload_and_draw.*_and_unrelated_upload group results.\n" 4483 << tcu::TestLog::EndMessage; 4484 } 4485 4486 void UnrelatedUploadRenderTimeCase::runSample (SampleResult& sample) 4487 { 4488 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 4489 tcu::Surface resultSurface (RENDER_AREA_SIZE, RENDER_AREA_SIZE); 4490 const int numVertices = getLayeredGridNumVertices(sample.scene); 4491 const glu::Buffer arrayBuffer (m_context.getRenderContext()); 4492 const glu::Buffer indexBuffer (m_context.getRenderContext()); 4493 const glu::Buffer unrelatedBuffer (m_context.getRenderContext()); 4494 int unrelatedUploadSize = -1; 4495 int renderUploadSize; 4496 std::vector<tcu::Vec4> vertexData; 4497 std::vector<deUint32> indexData; 4498 deUint64 startTime; 4499 deUint64 endTime; 4500 4501 // generate and upload buffers 4502 4503 generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene); 4504 renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); 4505 4506 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); 4507 gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW); 4508 4509 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 4510 { 4511 generateLayeredGridIndexData(indexData, sample.scene); 4512 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); 4513 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW); 4514 } 4515 4516 setupVertexAttribs(); 4517 4518 // make sure data is uploaded 4519 4520 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 4521 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 4522 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 4523 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 4524 else 4525 DE_ASSERT(false); 4526 waitGLResults(); 4527 4528 gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f); 4529 gl.clear(GL_COLOR_BUFFER_BIT); 4530 waitGLResults(); 4531 4532 tcu::warmupCPU(); 4533 4534 // Unrelated upload 4535 if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA) 4536 { 4537 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); 4538 4539 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer); 4540 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW); 4541 } 4542 else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) 4543 { 4544 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); 4545 4546 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer); 4547 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW); 4548 gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]); 4549 } 4550 else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) 4551 { 4552 void* mapPtr; 4553 glw::GLboolean unmapSuccessful; 4554 4555 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); 4556 4557 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer); 4558 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW); 4559 4560 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT); 4561 if (!mapPtr) 4562 throw tcu::Exception("MapBufferRange returned NULL"); 4563 4564 deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize); 4565 4566 // if unmapping fails, just try again later 4567 unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER); 4568 if (!unmapSuccessful) 4569 throw UnmapFailureError(); 4570 } 4571 else 4572 DE_ASSERT(false); 4573 4574 DE_ASSERT(unrelatedUploadSize != -1); 4575 4576 // Measure both draw and associated readpixels 4577 { 4578 startTime = deGetMicroseconds(); 4579 4580 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 4581 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 4582 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 4583 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 4584 else 4585 DE_ASSERT(false); 4586 4587 endTime = deGetMicroseconds(); 4588 4589 sample.result.duration.renderDuration = endTime - startTime; 4590 } 4591 4592 { 4593 startTime = deGetMicroseconds(); 4594 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); 4595 endTime = deGetMicroseconds(); 4596 4597 sample.result.duration.readDuration = endTime - startTime; 4598 } 4599 4600 sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices; 4601 sample.result.uploadedDataSize = renderUploadSize; 4602 sample.result.unrelatedDataSize = unrelatedUploadSize; 4603 sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; 4604 sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; 4605 sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration; 4606 } 4607 4608 class ReferenceReadPixelsTimeCase : public TestCase 4609 { 4610 public: 4611 ReferenceReadPixelsTimeCase (Context& context, const char* name, const char* description); 4612 4613 private: 4614 void init (void); 4615 IterateResult iterate (void); 4616 void logAndSetTestResult (void); 4617 4618 enum 4619 { 4620 RENDER_AREA_SIZE = 128 4621 }; 4622 4623 const int m_numSamples; 4624 int m_sampleNdx; 4625 std::vector<int> m_samples; 4626 }; 4627 4628 ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase (Context& context, const char* name, const char* description) 4629 : TestCase (context, tcu::NODETYPE_PERFORMANCE, name, description) 4630 , m_numSamples (20) 4631 , m_sampleNdx (0) 4632 , m_samples (m_numSamples) 4633 { 4634 } 4635 4636 void ReferenceReadPixelsTimeCase::init (void) 4637 { 4638 m_testCtx.getLog() 4639 << tcu::TestLog::Message 4640 << "Measuring the time used in a single readPixels call with " << m_numSamples << " test samples.\n" 4641 << "Test result is the median of the samples in microseconds.\n" 4642 << "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results." 4643 << tcu::TestLog::EndMessage; 4644 } 4645 4646 ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate (void) 4647 { 4648 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 4649 tcu::Surface resultSurface (RENDER_AREA_SIZE, RENDER_AREA_SIZE); 4650 deUint64 startTime; 4651 deUint64 endTime; 4652 4653 deYield(); 4654 tcu::warmupCPU(); 4655 deYield(); 4656 4657 // "Render" something and wait for it 4658 gl.clearColor(0.0f, 1.0f, m_sampleNdx / float(m_numSamples), 1.0f); 4659 gl.clear(GL_COLOR_BUFFER_BIT); 4660 4661 // wait for results 4662 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); 4663 4664 // measure time used in readPixels 4665 startTime = deGetMicroseconds(); 4666 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); 4667 endTime = deGetMicroseconds(); 4668 4669 m_samples[m_sampleNdx] = (int)(endTime - startTime); 4670 4671 if (++m_sampleNdx < m_numSamples) 4672 return CONTINUE; 4673 4674 logAndSetTestResult(); 4675 return STOP; 4676 } 4677 4678 void ReferenceReadPixelsTimeCase::logAndSetTestResult (void) 4679 { 4680 // Log sample list 4681 { 4682 m_testCtx.getLog() 4683 << tcu::TestLog::SampleList("Samples", "Samples") 4684 << tcu::TestLog::SampleInfo 4685 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 4686 << tcu::TestLog::EndSampleInfo; 4687 4688 for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx) 4689 m_testCtx.getLog() 4690 << tcu::TestLog::Sample 4691 << m_samples[sampleNdx] 4692 << tcu::TestLog::EndSample; 4693 4694 m_testCtx.getLog() << tcu::TestLog::EndSampleList; 4695 } 4696 4697 // Log median 4698 { 4699 float median; 4700 float limit60Low; 4701 float limit60Up; 4702 4703 std::sort(m_samples.begin(), m_samples.end()); 4704 median = linearSample(m_samples, 0.5f); 4705 limit60Low = linearSample(m_samples, 0.2f); 4706 limit60Up = linearSample(m_samples, 0.8f); 4707 4708 m_testCtx.getLog() 4709 << tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median) 4710 << tcu::TestLog::Message 4711 << "60 % of samples within range:\n" 4712 << tcu::TestLog::EndMessage 4713 << tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low) 4714 << tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up); 4715 4716 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str()); 4717 } 4718 } 4719 4720 template <typename SampleType> 4721 class GenericUploadRenderTimeCase : public RenderCase<SampleType> 4722 { 4723 public: 4724 typedef typename RenderCase<SampleType>::SampleResult SampleResult; 4725 4726 GenericUploadRenderTimeCase (Context& context, 4727 const char* name, 4728 const char* description, 4729 DrawMethod method, 4730 TargetBuffer targetBuffer, 4731 UploadMethod uploadMethod, 4732 BufferState bufferState, 4733 UploadRange uploadRange, 4734 UnrelatedBufferType unrelatedBufferType); 4735 4736 private: 4737 void init (void); 4738 void runSample (SampleResult& sample); 4739 4740 using RenderCase<SampleType>::RENDER_AREA_SIZE; 4741 4742 const TargetBuffer m_targetBuffer; 4743 const BufferState m_bufferState; 4744 const UploadMethod m_uploadMethod; 4745 const UnrelatedBufferType m_unrelatedBufferType; 4746 const UploadRange m_uploadRange; 4747 4748 using RenderCase<SampleType>::m_context; 4749 using RenderCase<SampleType>::m_testCtx; 4750 using RenderCase<SampleType>::m_drawMethod; 4751 }; 4752 4753 template <typename SampleType> 4754 GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase (Context& context, 4755 const char* name, 4756 const char* description, 4757 DrawMethod method, 4758 TargetBuffer targetBuffer, 4759 UploadMethod uploadMethod, 4760 BufferState bufferState, 4761 UploadRange uploadRange, 4762 UnrelatedBufferType unrelatedBufferType) 4763 : RenderCase<SampleType> (context, name, description, method) 4764 , m_targetBuffer (targetBuffer) 4765 , m_bufferState (bufferState) 4766 , m_uploadMethod (uploadMethod) 4767 , m_unrelatedBufferType (unrelatedBufferType) 4768 , m_uploadRange (uploadRange) 4769 { 4770 DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST); 4771 DE_ASSERT(m_bufferState < BUFFERSTATE_LAST); 4772 DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST); 4773 DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST); 4774 DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST); 4775 } 4776 4777 template <typename SampleType> 4778 void GenericUploadRenderTimeCase<SampleType>::init (void) 4779 { 4780 // init parent 4781 RenderCase<SampleType>::init(); 4782 4783 // log 4784 { 4785 const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements"); 4786 const int perVertexSize = (m_targetBuffer == TARGETBUFFER_INDEX) ? (sizeof(deUint32)) : (sizeof(tcu::Vec4[2])); 4787 const int fullMinUploadSize = RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize; 4788 const int fullMaxUploadSize = RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize; 4789 const int minUploadSize = (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize/2, 4)); 4790 const int maxUploadSize = (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize/2, 4)); 4791 const int minUnrelatedUploadSize = RenderCase<SampleType>::getMinWorkloadSize() * sizeof(tcu::Vec4[2]); 4792 const int maxUnrelatedUploadSize = RenderCase<SampleType>::getMaxWorkloadSize() * sizeof(tcu::Vec4[2]); 4793 4794 m_testCtx.getLog() 4795 << tcu::TestLog::Message 4796 << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n" 4797 << "The " 4798 << ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib")) 4799 << " buffer " 4800 << ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents ")) 4801 << "sourced by the rendering command " 4802 << ((m_bufferState == BUFFERSTATE_NEW) ? ("is uploaded ") : 4803 (m_uploadRange == UPLOADRANGE_FULL) ? ("are specified ") : 4804 (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("are updated (partial upload) ") : 4805 ((const char*)DE_NULL)) 4806 << "just before issuing the rendering command.\n" 4807 << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") : ("The buffer is generated just before uploading.\n")) 4808 << "Buffer " 4809 << ((m_bufferState == BUFFERSTATE_NEW) ? ("is uploaded") : 4810 (m_uploadRange == UPLOADRANGE_FULL) ? ("contents are specified") : 4811 (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("contents are partially updated") : 4812 ((const char*)DE_NULL)) 4813 << " with " 4814 << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange")) 4815 << " command. Usage of the target buffer is DYNAMIC_DRAW.\n" 4816 << ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | MAP_UNSYNCHRONIZED_BIT\n") : ("")) 4817 << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") : ("")) 4818 << RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n" 4819 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n" 4820 << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n" 4821 << "Workload sizes are in the range [" 4822 << RenderCase<SampleType>::getMinWorkloadSize() << ", " 4823 << RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices " 4824 << "([" 4825 << getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << "," 4826 << getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n" 4827 << "Upload sizes are in the range [" 4828 << getHumanReadableByteSize(minUploadSize) << "," 4829 << getHumanReadableByteSize(maxUploadSize) << "].\n" 4830 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? 4831 ("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) + ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") : 4832 ("")) 4833 << "Test result is the approximated processing rate in MiB / s.\n" 4834 << "Note that while upload time is measured, the time used is not included in the results.\n" 4835 << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Note that the data size and the time used in the unrelated upload is not included in the results.\n") : ("")) 4836 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : ("")) 4837 << "Note! Test result may not be useful as is but instead should be compared against the reference.* group and other upload_and_draw.* group results.\n" 4838 << tcu::TestLog::EndMessage; 4839 } 4840 } 4841 4842 template <typename SampleType> 4843 void GenericUploadRenderTimeCase<SampleType>::runSample (SampleResult& sample) 4844 { 4845 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 4846 const glu::Buffer arrayBuffer (m_context.getRenderContext()); 4847 const glu::Buffer indexBuffer (m_context.getRenderContext()); 4848 const glu::Buffer unrelatedBuffer (m_context.getRenderContext()); 4849 const int numVertices = getLayeredGridNumVertices(sample.scene); 4850 tcu::Surface resultSurface (RENDER_AREA_SIZE, RENDER_AREA_SIZE); 4851 deUint64 startTime; 4852 deUint64 endTime; 4853 std::vector<tcu::Vec4> vertexData; 4854 std::vector<deUint32> indexData; 4855 4856 // create data 4857 4858 generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene); 4859 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 4860 generateLayeredGridIndexData(indexData, sample.scene); 4861 4862 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); 4863 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); 4864 RenderCase<SampleType>::setupVertexAttribs(); 4865 4866 // target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu 4867 4868 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING) 4869 { 4870 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_DYNAMIC_DRAW); 4871 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 4872 } 4873 else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW) 4874 { 4875 // do not touch the vertex buffer 4876 } 4877 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING) 4878 { 4879 // hint that the target buffer will be modified soon 4880 const glw::GLenum vertexDataUsage = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW); 4881 const glw::GLenum indexDataUsage = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW); 4882 4883 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], vertexDataUsage); 4884 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], indexDataUsage); 4885 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 4886 } 4887 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW) 4888 { 4889 if (m_targetBuffer == TARGETBUFFER_VERTEX) 4890 { 4891 // make the index buffer present on the gpu 4892 // use another vertex buffer to keep original buffer in unused state 4893 const glu::Buffer vertexCopyBuffer(m_context.getRenderContext()); 4894 4895 gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer); 4896 RenderCase<SampleType>::setupVertexAttribs(); 4897 4898 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW); 4899 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW); 4900 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 4901 4902 // restore original state 4903 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); 4904 RenderCase<SampleType>::setupVertexAttribs(); 4905 } 4906 else if (m_targetBuffer == TARGETBUFFER_INDEX) 4907 { 4908 // make the vertex buffer present on the gpu 4909 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW); 4910 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 4911 } 4912 else 4913 DE_ASSERT(false); 4914 } 4915 else 4916 DE_ASSERT(false); 4917 4918 RenderCase<SampleType>::waitGLResults(); 4919 GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare"); 4920 4921 gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f); 4922 gl.clear(GL_COLOR_BUFFER_BIT); 4923 RenderCase<SampleType>::waitGLResults(); 4924 4925 tcu::warmupCPU(); 4926 4927 // upload 4928 4929 { 4930 glw::GLenum target; 4931 glw::GLsizeiptr size; 4932 glw::GLintptr offset = 0; 4933 const void* source; 4934 4935 if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL) 4936 { 4937 target = GL_ARRAY_BUFFER; 4938 size = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)); 4939 source = &vertexData[0]; 4940 } 4941 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL) 4942 { 4943 target = GL_ELEMENT_ARRAY_BUFFER; 4944 size = (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)); 4945 source = &indexData[0]; 4946 } 4947 else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL) 4948 { 4949 DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING); 4950 4951 target = GL_ARRAY_BUFFER; 4952 size = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4); 4953 offset = (glw::GLintptr)deAlign32((int)size / 2, 4); 4954 source = (const deUint8*)&vertexData[0] + offset; 4955 } 4956 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL) 4957 { 4958 DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING); 4959 4960 // upload to 25% - 75% range 4961 target = GL_ELEMENT_ARRAY_BUFFER; 4962 size = (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4); 4963 offset = (glw::GLintptr)deAlign32((int)size / 2, 4); 4964 source = (const deUint8*)&indexData[0] + offset; 4965 } 4966 else 4967 { 4968 DE_ASSERT(false); 4969 return; 4970 } 4971 4972 startTime = deGetMicroseconds(); 4973 4974 if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) 4975 gl.bufferData(target, size, source, GL_DYNAMIC_DRAW); 4976 else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) 4977 { 4978 // create buffer storage 4979 if (m_bufferState == BUFFERSTATE_NEW) 4980 gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW); 4981 gl.bufferSubData(target, offset, size, source); 4982 } 4983 else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) 4984 { 4985 void* mapPtr; 4986 glw::GLboolean unmapSuccessful; 4987 4988 // create buffer storage 4989 if (m_bufferState == BUFFERSTATE_NEW) 4990 gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW); 4991 4992 mapPtr = gl.mapBufferRange(target, offset, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT); 4993 if (!mapPtr) 4994 throw tcu::Exception("MapBufferRange returned NULL"); 4995 4996 deMemcpy(mapPtr, source, (int)size); 4997 4998 // if unmapping fails, just try again later 4999 unmapSuccessful = gl.unmapBuffer(target); 5000 if (!unmapSuccessful) 5001 throw UnmapFailureError(); 5002 } 5003 else 5004 DE_ASSERT(false); 5005 5006 endTime = deGetMicroseconds(); 5007 5008 sample.result.uploadedDataSize = (int)size; 5009 sample.result.duration.uploadDuration = endTime - startTime; 5010 } 5011 5012 // unrelated 5013 if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) 5014 { 5015 const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); 5016 5017 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer); 5018 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW); 5019 // Attibute pointers are not modified, no need restore state 5020 5021 sample.result.unrelatedDataSize = unrelatedUploadSize; 5022 } 5023 5024 // draw 5025 { 5026 startTime = deGetMicroseconds(); 5027 5028 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 5029 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 5030 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5031 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 5032 else 5033 DE_ASSERT(false); 5034 5035 endTime = deGetMicroseconds(); 5036 5037 sample.result.duration.renderDuration = endTime - startTime; 5038 } 5039 5040 // read 5041 { 5042 startTime = deGetMicroseconds(); 5043 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); 5044 endTime = deGetMicroseconds(); 5045 5046 sample.result.duration.readDuration = endTime - startTime; 5047 } 5048 5049 // set results 5050 5051 sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices; 5052 5053 sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; 5054 sample.result.duration.totalDuration = sample.result.duration.uploadDuration + sample.result.duration.renderDuration + sample.result.duration.readDuration; 5055 sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration; 5056 } 5057 5058 class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration> 5059 { 5060 public: 5061 enum MapFlags 5062 { 5063 MAPFLAG_NONE = 0, 5064 MAPFLAG_INVALIDATE_BUFFER, 5065 MAPFLAG_INVALIDATE_RANGE, 5066 5067 MAPFLAG_LAST 5068 }; 5069 enum UploadBufferTarget 5070 { 5071 UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0, 5072 UPLOADBUFFERTARGET_SAME_BUFFER, 5073 5074 UPLOADBUFFERTARGET_LAST 5075 }; 5076 BufferInUseRenderTimeCase (Context& context, 5077 const char* name, 5078 const char* description, 5079 DrawMethod method, 5080 MapFlags mapFlags, 5081 TargetBuffer targetBuffer, 5082 UploadMethod uploadMethod, 5083 UploadRange uploadRange, 5084 UploadBufferTarget uploadTarget); 5085 5086 private: 5087 void init (void); 5088 void runSample (SampleResult& sample); 5089 5090 const TargetBuffer m_targetBuffer; 5091 const UploadMethod m_uploadMethod; 5092 const UploadRange m_uploadRange; 5093 const MapFlags m_mapFlags; 5094 const UploadBufferTarget m_uploadBufferTarget; 5095 }; 5096 5097 BufferInUseRenderTimeCase::BufferInUseRenderTimeCase (Context& context, 5098 const char* name, 5099 const char* description, 5100 DrawMethod method, 5101 MapFlags mapFlags, 5102 TargetBuffer targetBuffer, 5103 UploadMethod uploadMethod, 5104 UploadRange uploadRange, 5105 UploadBufferTarget uploadTarget) 5106 : RenderCase<RenderUploadRenderReadDuration> (context, name, description, method) 5107 , m_targetBuffer (targetBuffer) 5108 , m_uploadMethod (uploadMethod) 5109 , m_uploadRange (uploadRange) 5110 , m_mapFlags (mapFlags) 5111 , m_uploadBufferTarget (uploadTarget) 5112 { 5113 DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST); 5114 DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST); 5115 DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST); 5116 DE_ASSERT(m_mapFlags < MAPFLAG_LAST); 5117 DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST); 5118 } 5119 5120 void BufferInUseRenderTimeCase::init (void) 5121 { 5122 RenderCase<RenderUploadRenderReadDuration>::init(); 5123 5124 // log 5125 { 5126 const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements"); 5127 const char* const uploadFunctionName = (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange"); 5128 const bool isReferenceCase = (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER); 5129 tcu::MessageBuilder message (&m_testCtx.getLog()); 5130 5131 message << "Measuring the time used in " << targetFunctionName << " call, a buffer upload, " 5132 << targetFunctionName << " call using the uploaded buffer and readPixels call with different upload sizes.\n"; 5133 5134 if (isReferenceCase) 5135 message << "Rendering:\n" 5136 << " before test: create and use buffers B and C\n" 5137 << " first draw: render using buffer B\n" 5138 << ((m_uploadRange == UPLOADRANGE_FULL) ? (" upload: respecify buffer C contents\n") : 5139 (m_uploadRange == UPLOADRANGE_PARTIAL) ? (" upload: modify buffer C contents\n") : 5140 ((const char*)DE_NULL)) 5141 << " second draw: render using buffer C\n" 5142 << " read: readPixels\n"; 5143 else 5144 message << "Rendering:\n" 5145 << " before test: create and use buffer B\n" 5146 << " first draw: render using buffer B\n" 5147 << ((m_uploadRange == UPLOADRANGE_FULL) ? (" upload: respecify buffer B contents\n") : 5148 (m_uploadRange == UPLOADRANGE_PARTIAL) ? (" upload: modify buffer B contents\n") : 5149 ((const char*)DE_NULL)) 5150 << " second draw: render using buffer B\n" 5151 << " read: readPixels\n"; 5152 5153 message << "Uploading using " << uploadFunctionName 5154 << ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE) ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT") : 5155 (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT") : 5156 (m_mapFlags == MAPFLAG_NONE) ? ("") : 5157 ((const char*)DE_NULL)) 5158 << "\n" 5159 << getNumSamples() << " test samples. Sample order is randomized.\n" 5160 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n" 5161 << "Workload sizes are in the range [" 5162 << getMinWorkloadSize() << ", " 5163 << getMaxWorkloadSize() << "] vertices " 5164 << "([" 5165 << getHumanReadableByteSize(getMinWorkloadDataSize()) << "," 5166 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n" 5167 << "Test result is the approximated processing rate in MiB / s of the second draw call and the readPixels call.\n"; 5168 5169 if (isReferenceCase) 5170 message << "Note! Test result should only be used as a baseline reference result for buffer.render_after_upload.draw_modify_draw test group results."; 5171 else 5172 message << "Note! Test result may not be useful as is but instead should be compared against the buffer.render_after_upload.reference.draw_upload_draw group results.\n"; 5173 5174 message << tcu::TestLog::EndMessage; 5175 } 5176 } 5177 5178 void BufferInUseRenderTimeCase::runSample (SampleResult& sample) 5179 { 5180 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 5181 const glu::Buffer arrayBuffer (m_context.getRenderContext()); 5182 const glu::Buffer indexBuffer (m_context.getRenderContext()); 5183 const glu::Buffer alternativeUploadBuffer (m_context.getRenderContext()); 5184 const int numVertices = getLayeredGridNumVertices(sample.scene); 5185 tcu::Surface resultSurface (RENDER_AREA_SIZE, RENDER_AREA_SIZE); 5186 deUint64 startTime; 5187 deUint64 endTime; 5188 std::vector<tcu::Vec4> vertexData; 5189 std::vector<deUint32> indexData; 5190 5191 // create data 5192 5193 generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene); 5194 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5195 generateLayeredGridIndexData(indexData, sample.scene); 5196 5197 // make buffers used 5198 5199 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); 5200 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); 5201 setupVertexAttribs(); 5202 5203 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 5204 { 5205 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW); 5206 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 5207 } 5208 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5209 { 5210 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW); 5211 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW); 5212 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 5213 } 5214 else 5215 DE_ASSERT(false); 5216 5217 // another pair of buffers for reference case 5218 if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER) 5219 { 5220 if (m_targetBuffer == TARGETBUFFER_VERTEX) 5221 { 5222 gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer); 5223 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW); 5224 5225 setupVertexAttribs(); 5226 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 5227 } 5228 else if (m_targetBuffer == TARGETBUFFER_INDEX) 5229 { 5230 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer); 5231 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW); 5232 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 5233 } 5234 else 5235 DE_ASSERT(false); 5236 5237 // restore state 5238 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); 5239 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); 5240 setupVertexAttribs(); 5241 } 5242 5243 waitGLResults(); 5244 GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare"); 5245 5246 gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f); 5247 gl.clear(GL_COLOR_BUFFER_BIT); 5248 waitGLResults(); 5249 5250 tcu::warmupCPU(); 5251 5252 // first draw 5253 { 5254 startTime = deGetMicroseconds(); 5255 5256 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 5257 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 5258 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5259 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 5260 else 5261 DE_ASSERT(false); 5262 5263 endTime = deGetMicroseconds(); 5264 5265 sample.result.duration.firstRenderDuration = endTime - startTime; 5266 } 5267 5268 // upload 5269 { 5270 glw::GLenum target; 5271 glw::GLsizeiptr size; 5272 glw::GLintptr offset = 0; 5273 const void* source; 5274 5275 if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL) 5276 { 5277 target = GL_ARRAY_BUFFER; 5278 size = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)); 5279 source = &vertexData[0]; 5280 } 5281 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL) 5282 { 5283 target = GL_ELEMENT_ARRAY_BUFFER; 5284 size = (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)); 5285 source = &indexData[0]; 5286 } 5287 else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL) 5288 { 5289 target = GL_ARRAY_BUFFER; 5290 size = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4); 5291 offset = (glw::GLintptr)deAlign32((int)size / 2, 4); 5292 source = (const deUint8*)&vertexData[0] + offset; 5293 } 5294 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL) 5295 { 5296 // upload to 25% - 75% range 5297 target = GL_ELEMENT_ARRAY_BUFFER; 5298 size = (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4); 5299 offset = (glw::GLintptr)deAlign32((int)size / 2, 4); 5300 source = (const deUint8*)&indexData[0] + offset; 5301 } 5302 else 5303 { 5304 DE_ASSERT(false); 5305 return; 5306 } 5307 5308 // reference case? don't modify the buffer in use 5309 if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER) 5310 gl.bindBuffer(target, *alternativeUploadBuffer); 5311 5312 startTime = deGetMicroseconds(); 5313 5314 if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) 5315 gl.bufferData(target, size, source, GL_STREAM_DRAW); 5316 else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) 5317 gl.bufferSubData(target, offset, size, source); 5318 else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) 5319 { 5320 const int mapFlags = (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT) : 5321 (m_mapFlags == MAPFLAG_INVALIDATE_RANGE) ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT) : 5322 (-1); 5323 void* mapPtr; 5324 glw::GLboolean unmapSuccessful; 5325 5326 mapPtr = gl.mapBufferRange(target, offset, size, mapFlags); 5327 if (!mapPtr) 5328 throw tcu::Exception("MapBufferRange returned NULL"); 5329 5330 deMemcpy(mapPtr, source, (int)size); 5331 5332 // if unmapping fails, just try again later 5333 unmapSuccessful = gl.unmapBuffer(target); 5334 if (!unmapSuccessful) 5335 throw UnmapFailureError(); 5336 } 5337 else 5338 DE_ASSERT(false); 5339 5340 endTime = deGetMicroseconds(); 5341 5342 sample.result.uploadedDataSize = (int)size; 5343 sample.result.duration.uploadDuration = endTime - startTime; 5344 } 5345 5346 // second draw 5347 { 5348 // Source vertex data from alternative buffer in refernce case 5349 if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX) 5350 setupVertexAttribs(); 5351 5352 startTime = deGetMicroseconds(); 5353 5354 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 5355 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 5356 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5357 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL); 5358 else 5359 DE_ASSERT(false); 5360 5361 endTime = deGetMicroseconds(); 5362 5363 sample.result.duration.secondRenderDuration = endTime - startTime; 5364 } 5365 5366 // read 5367 { 5368 startTime = deGetMicroseconds(); 5369 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); 5370 endTime = deGetMicroseconds(); 5371 5372 sample.result.duration.readDuration = endTime - startTime; 5373 } 5374 5375 // set results 5376 5377 sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices; 5378 5379 sample.result.duration.renderReadDuration = sample.result.duration.secondRenderDuration + sample.result.duration.readDuration; 5380 sample.result.duration.totalDuration = sample.result.duration.firstRenderDuration + 5381 sample.result.duration.uploadDuration + 5382 sample.result.duration.secondRenderDuration + 5383 sample.result.duration.readDuration; 5384 sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration; 5385 } 5386 5387 class UploadWaitDrawCase : public RenderPerformanceTestBase 5388 { 5389 public: 5390 struct Sample 5391 { 5392 int numFrames; 5393 deUint64 uploadCallEndTime; 5394 }; 5395 struct Result 5396 { 5397 deUint64 uploadDuration; 5398 deUint64 renderDuration; 5399 deUint64 readDuration; 5400 deUint64 renderReadDuration; 5401 5402 deUint64 timeBeforeUse; 5403 }; 5404 5405 UploadWaitDrawCase (Context& context, 5406 const char* name, 5407 const char* description, 5408 DrawMethod drawMethod, 5409 TargetBuffer targetBuffer, 5410 UploadMethod uploadMethod, 5411 BufferState bufferState); 5412 ~UploadWaitDrawCase (void); 5413 5414 private: 5415 void init (void); 5416 void deinit (void); 5417 IterateResult iterate (void); 5418 5419 void uploadBuffer (Sample& sample, Result& result); 5420 void drawFromBuffer (Sample& sample, Result& result); 5421 void reuseAndDeleteBuffer (void); 5422 void logAndSetTestResult (void); 5423 void logSamples (void); 5424 void drawMisc (void); 5425 int findStabilizationSample (deUint64 (Result::*target), const char* description); 5426 bool checkSampleTemporalStability (deUint64 (Result::*target), const char* description); 5427 5428 const DrawMethod m_drawMethod; 5429 const TargetBuffer m_targetBuffer; 5430 const UploadMethod m_uploadMethod; 5431 const BufferState m_bufferState; 5432 5433 const int m_numSamplesPerSwap; 5434 const int m_numMaxSwaps; 5435 5436 int m_frameNdx; 5437 int m_sampleNdx; 5438 int m_numVertices; 5439 5440 std::vector<tcu::Vec4> m_vertexData; 5441 std::vector<deUint32> m_indexData; 5442 std::vector<Sample> m_samples; 5443 std::vector<Result> m_results; 5444 std::vector<int> m_iterationOrder; 5445 5446 deUint32 m_vertexBuffer; 5447 deUint32 m_indexBuffer; 5448 deUint32 m_miscBuffer; 5449 int m_numMiscVertices; 5450 }; 5451 5452 UploadWaitDrawCase::UploadWaitDrawCase (Context& context, 5453 const char* name, 5454 const char* description, 5455 DrawMethod drawMethod, 5456 TargetBuffer targetBuffer, 5457 UploadMethod uploadMethod, 5458 BufferState bufferState) 5459 : RenderPerformanceTestBase (context, name, description) 5460 , m_drawMethod (drawMethod) 5461 , m_targetBuffer (targetBuffer) 5462 , m_uploadMethod (uploadMethod) 5463 , m_bufferState (bufferState) 5464 , m_numSamplesPerSwap (10) 5465 , m_numMaxSwaps (4) 5466 , m_frameNdx (0) 5467 , m_sampleNdx (0) 5468 , m_numVertices (-1) 5469 , m_vertexBuffer (0) 5470 , m_indexBuffer (0) 5471 , m_miscBuffer (0) 5472 , m_numMiscVertices (-1) 5473 { 5474 } 5475 5476 UploadWaitDrawCase::~UploadWaitDrawCase (void) 5477 { 5478 deinit(); 5479 } 5480 5481 void UploadWaitDrawCase::init (void) 5482 { 5483 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 5484 const int vertexAttribSize = (int)sizeof(tcu::Vec4) * 2; // color4, position4 5485 const int vertexIndexSize = (int)sizeof(deUint32); 5486 const int vertexUploadDataSize = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize); 5487 5488 RenderPerformanceTestBase::init(); 5489 5490 // requirements 5491 5492 if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE || 5493 m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE) 5494 throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target"); 5495 5496 // gl state 5497 5498 gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE); 5499 5500 // enable bleding to prevent grid layers from being discarded 5501 5502 gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 5503 gl.blendEquation(GL_FUNC_ADD); 5504 gl.enable(GL_BLEND); 5505 5506 // scene 5507 5508 { 5509 LayeredGridSpec scene; 5510 5511 // create ~8MB workload with similar characteristics as in the other test 5512 // => makes comparison to other results more straightforward 5513 scene.gridWidth = 93; 5514 scene.gridHeight = 93; 5515 scene.gridLayers = 5; 5516 5517 generateLayeredGridVertexAttribData4C4V(m_vertexData, scene); 5518 generateLayeredGridIndexData(m_indexData, scene); 5519 m_numVertices = getLayeredGridNumVertices(scene); 5520 } 5521 5522 // buffers 5523 5524 if (m_bufferState == BUFFERSTATE_NEW) 5525 { 5526 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5527 { 5528 // reads from two buffers, prepare the static buffer 5529 5530 if (m_targetBuffer == TARGETBUFFER_VERTEX) 5531 { 5532 // index buffer is static, use another vertex buffer to keep original buffer in unused state 5533 const glu::Buffer vertexCopyBuffer(m_context.getRenderContext()); 5534 5535 gl.genBuffers(1, &m_indexBuffer); 5536 gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer); 5537 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); 5538 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW); 5539 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], GL_STATIC_DRAW); 5540 5541 setupVertexAttribs(); 5542 gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL); 5543 } 5544 else if (m_targetBuffer == TARGETBUFFER_INDEX) 5545 { 5546 // vertex buffer is static 5547 gl.genBuffers(1, &m_vertexBuffer); 5548 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); 5549 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW); 5550 5551 setupVertexAttribs(); 5552 gl.drawArrays(GL_TRIANGLES, 0, m_numVertices); 5553 } 5554 else 5555 DE_ASSERT(false); 5556 } 5557 } 5558 else if (m_bufferState == BUFFERSTATE_EXISTING) 5559 { 5560 const glw::GLenum vertexUsage = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW); 5561 const glw::GLenum indexUsage = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW); 5562 5563 gl.genBuffers(1, &m_vertexBuffer); 5564 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); 5565 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], vertexUsage); 5566 5567 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5568 { 5569 gl.genBuffers(1, &m_indexBuffer); 5570 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); 5571 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], indexUsage); 5572 } 5573 5574 setupVertexAttribs(); 5575 5576 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 5577 gl.drawArrays(GL_TRIANGLES, 0, m_numVertices); 5578 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5579 gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL); 5580 else 5581 DE_ASSERT(false); 5582 } 5583 else 5584 DE_ASSERT(false); 5585 5586 // misc draw buffer 5587 { 5588 std::vector<tcu::Vec4> vertexData; 5589 LayeredGridSpec scene; 5590 5591 // create ~1.5MB workload with similar characteristics 5592 scene.gridWidth = 40; 5593 scene.gridHeight = 40; 5594 scene.gridLayers = 5; 5595 5596 generateLayeredGridVertexAttribData4C4V(vertexData, scene); 5597 5598 gl.genBuffers(1, &m_miscBuffer); 5599 gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer); 5600 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0], GL_STATIC_DRAW); 5601 5602 m_numMiscVertices = getLayeredGridNumVertices(scene); 5603 } 5604 5605 // iterations 5606 { 5607 m_samples.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap); 5608 m_results.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap); 5609 5610 for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps) 5611 for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx) 5612 { 5613 const int index = numSwaps*m_numSamplesPerSwap + sampleNdx; 5614 5615 m_samples[index].numFrames = numSwaps; 5616 } 5617 5618 m_iterationOrder.resize(m_samples.size()); 5619 generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size()); 5620 } 5621 5622 // log 5623 m_testCtx.getLog() 5624 << tcu::TestLog::Message 5625 << "Measuring time used in " << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n" 5626 << "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, " << m_numMaxSwaps << "].\n" 5627 << "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index")) << " buffer.\n" 5628 << "Uploading using " 5629 << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : 5630 (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : 5631 (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT") : 5632 ((const char*)DE_NULL)) 5633 << "\n" 5634 << "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n" 5635 << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : ("")) 5636 << "Test result is the number of frames (swaps) required for the render time to stabilize.\n" 5637 << "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n" 5638 << tcu::TestLog::EndMessage; 5639 } 5640 5641 void UploadWaitDrawCase::deinit (void) 5642 { 5643 RenderPerformanceTestBase::deinit(); 5644 5645 if (m_vertexBuffer) 5646 { 5647 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer); 5648 m_vertexBuffer = 0; 5649 } 5650 if (m_indexBuffer) 5651 { 5652 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer); 5653 m_indexBuffer = 0; 5654 } 5655 if (m_miscBuffer) 5656 { 5657 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer); 5658 m_miscBuffer = 0; 5659 } 5660 } 5661 5662 UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate (void) 5663 { 5664 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 5665 const int betweenIterationDummyFrameCount = 5; // draw misc between test samples 5666 const int frameNdx = m_frameNdx++; 5667 const int currentSampleNdx = m_iterationOrder[m_sampleNdx]; 5668 5669 // Simulate work for about 8ms 5670 busyWait(8000); 5671 5672 // Dummy rendering during dummy frames 5673 if (frameNdx != m_samples[currentSampleNdx].numFrames) 5674 { 5675 // draw similar from another buffer 5676 drawMisc(); 5677 } 5678 5679 if (frameNdx == 0) 5680 { 5681 // upload and start the clock 5682 uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]); 5683 } 5684 5685 if (frameNdx == m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0 5686 { 5687 // draw using the uploaded buffer 5688 drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]); 5689 5690 // re-use buffer for something else to make sure test iteration do not affect each other 5691 if (m_bufferState == BUFFERSTATE_NEW) 5692 reuseAndDeleteBuffer(); 5693 } 5694 else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationDummyFrameCount) 5695 { 5696 // next sample 5697 ++m_sampleNdx; 5698 m_frameNdx = 0; 5699 } 5700 5701 GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate"); 5702 5703 if (m_sampleNdx < (int)m_samples.size()) 5704 return CONTINUE; 5705 5706 logAndSetTestResult(); 5707 return STOP; 5708 } 5709 5710 void UploadWaitDrawCase::uploadBuffer (Sample& sample, Result& result) 5711 { 5712 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 5713 deUint64 startTime; 5714 deUint64 endTime; 5715 glw::GLenum target; 5716 glw::GLsizeiptr size; 5717 const void* source; 5718 5719 // data source 5720 5721 if (m_targetBuffer == TARGETBUFFER_VERTEX) 5722 { 5723 DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW)); 5724 5725 target = GL_ARRAY_BUFFER; 5726 size = (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)); 5727 source = &m_vertexData[0]; 5728 } 5729 else if (m_targetBuffer == TARGETBUFFER_INDEX) 5730 { 5731 DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW)); 5732 5733 target = GL_ELEMENT_ARRAY_BUFFER; 5734 size = (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)); 5735 source = &m_indexData[0]; 5736 } 5737 else 5738 { 5739 DE_ASSERT(false); 5740 return; 5741 } 5742 5743 // gen buffer 5744 5745 if (m_bufferState == BUFFERSTATE_NEW) 5746 { 5747 if (m_targetBuffer == TARGETBUFFER_VERTEX) 5748 { 5749 gl.genBuffers(1, &m_vertexBuffer); 5750 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); 5751 } 5752 else if (m_targetBuffer == TARGETBUFFER_INDEX) 5753 { 5754 gl.genBuffers(1, &m_indexBuffer); 5755 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); 5756 } 5757 else 5758 DE_ASSERT(false); 5759 5760 if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA || 5761 m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) 5762 { 5763 gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW); 5764 } 5765 } 5766 else if (m_bufferState == BUFFERSTATE_EXISTING) 5767 { 5768 if (m_targetBuffer == TARGETBUFFER_VERTEX) 5769 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); 5770 else if (m_targetBuffer == TARGETBUFFER_INDEX) 5771 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); 5772 else 5773 DE_ASSERT(false); 5774 } 5775 else 5776 DE_ASSERT(false); 5777 5778 // upload 5779 5780 startTime = deGetMicroseconds(); 5781 5782 if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) 5783 gl.bufferData(target, size, source, GL_STATIC_DRAW); 5784 else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) 5785 gl.bufferSubData(target, 0, size, source); 5786 else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) 5787 { 5788 void* mapPtr; 5789 glw::GLboolean unmapSuccessful; 5790 5791 mapPtr = gl.mapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT); 5792 if (!mapPtr) 5793 throw tcu::Exception("MapBufferRange returned NULL"); 5794 5795 deMemcpy(mapPtr, source, (int)size); 5796 5797 // if unmapping fails, just try again later 5798 unmapSuccessful = gl.unmapBuffer(target); 5799 if (!unmapSuccessful) 5800 throw UnmapFailureError(); 5801 } 5802 else 5803 DE_ASSERT(false); 5804 5805 endTime = deGetMicroseconds(); 5806 5807 sample.uploadCallEndTime = endTime; 5808 result.uploadDuration = endTime - startTime; 5809 } 5810 5811 void UploadWaitDrawCase::drawFromBuffer (Sample& sample, Result& result) 5812 { 5813 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 5814 tcu::Surface resultSurface (RENDER_AREA_SIZE, RENDER_AREA_SIZE); 5815 deUint64 startTime; 5816 deUint64 endTime; 5817 5818 DE_ASSERT(m_vertexBuffer != 0); 5819 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 5820 DE_ASSERT(m_indexBuffer == 0); 5821 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5822 DE_ASSERT(m_indexBuffer != 0); 5823 else 5824 DE_ASSERT(false); 5825 5826 // draw 5827 { 5828 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); 5829 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5830 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); 5831 5832 setupVertexAttribs(); 5833 5834 // microseconds passed since return from upload call 5835 result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime; 5836 5837 startTime = deGetMicroseconds(); 5838 5839 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) 5840 gl.drawArrays(GL_TRIANGLES, 0, m_numVertices); 5841 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) 5842 gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL); 5843 else 5844 DE_ASSERT(false); 5845 5846 endTime = deGetMicroseconds(); 5847 5848 result.renderDuration = endTime - startTime; 5849 } 5850 5851 // read 5852 { 5853 startTime = deGetMicroseconds(); 5854 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); 5855 endTime = deGetMicroseconds(); 5856 5857 result.readDuration = endTime - startTime; 5858 } 5859 5860 result.renderReadDuration = result.renderDuration + result.readDuration; 5861 } 5862 5863 void UploadWaitDrawCase::reuseAndDeleteBuffer (void) 5864 { 5865 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 5866 5867 if (m_targetBuffer == TARGETBUFFER_INDEX) 5868 { 5869 // respecify and delete index buffer 5870 static const deUint32 indices[3] = {1, 3, 8}; 5871 5872 DE_ASSERT(m_indexBuffer != 0); 5873 5874 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); 5875 gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL); 5876 gl.deleteBuffers(1, &m_indexBuffer); 5877 m_indexBuffer = 0; 5878 } 5879 else if (m_targetBuffer == TARGETBUFFER_VERTEX) 5880 { 5881 // respecify and delete vertex buffer 5882 static const tcu::Vec4 coloredTriangle[6] = 5883 { 5884 tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f), 5885 tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.2f, 0.4f, 0.0f, 1.0f), 5886 tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4( 0.8f, -0.1f, 0.0f, 1.0f), 5887 }; 5888 5889 DE_ASSERT(m_vertexBuffer != 0); 5890 5891 gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW); 5892 gl.drawArrays(GL_TRIANGLES, 0, 3); 5893 gl.deleteBuffers(1, &m_vertexBuffer); 5894 m_vertexBuffer = 0; 5895 } 5896 5897 waitGLResults(); 5898 } 5899 5900 void UploadWaitDrawCase::logAndSetTestResult (void) 5901 { 5902 int uploadStabilization; 5903 int renderReadStabilization; 5904 int renderStabilization; 5905 int readStabilization; 5906 bool temporallyStable; 5907 5908 { 5909 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples"); 5910 logSamples(); 5911 } 5912 5913 { 5914 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability"); 5915 5916 // log stabilization points 5917 renderReadStabilization = findStabilizationSample(&Result::renderReadDuration, "Combined draw and read"); 5918 uploadStabilization = findStabilizationSample(&Result::uploadDuration, "Upload time"); 5919 renderStabilization = findStabilizationSample(&Result::renderDuration, "Draw call time"); 5920 readStabilization = findStabilizationSample(&Result::readDuration, "ReadPixels time"); 5921 5922 temporallyStable = true; 5923 temporallyStable &= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read"); 5924 temporallyStable &= checkSampleTemporalStability(&Result::uploadDuration, "Upload time"); 5925 temporallyStable &= checkSampleTemporalStability(&Result::renderDuration, "Draw call time"); 5926 temporallyStable &= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time"); 5927 } 5928 5929 { 5930 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results"); 5931 5932 // Check result sanily 5933 if (uploadStabilization != 0) 5934 m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Upload times are not stable, test result may not be accurate." << tcu::TestLog::EndMessage; 5935 if (!temporallyStable) 5936 m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Time samples do not seem to be temporally stable, sample times seem to drift to one direction during test execution." << tcu::TestLog::EndMessage; 5937 5938 // render & read 5939 if (renderReadStabilization == -1) 5940 m_testCtx.getLog() << tcu::TestLog::Message << "Combined time used in draw call and ReadPixels did not stabilize." << tcu::TestLog::EndMessage; 5941 else 5942 m_testCtx.getLog() << tcu::TestLog::Integer("RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, renderReadStabilization); 5943 5944 // draw call 5945 if (renderStabilization == -1) 5946 m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize." << tcu::TestLog::EndMessage; 5947 else 5948 m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint", "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME, renderStabilization); 5949 5950 // readpixels 5951 if (readStabilization == -1) 5952 m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize." << tcu::TestLog::EndMessage; 5953 else 5954 m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint", "ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, readStabilization); 5955 5956 // Report renderReadStabilization 5957 if (renderReadStabilization != -1) 5958 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str()); 5959 else 5960 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1 5961 } 5962 } 5963 5964 void UploadWaitDrawCase::logSamples (void) 5965 { 5966 // Inverse m_iterationOrder 5967 5968 std::vector<int> runOrder(m_iterationOrder.size()); 5969 for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx) 5970 runOrder[m_iterationOrder[ndx]] = ndx; 5971 5972 // Log samples 5973 5974 m_testCtx.getLog() 5975 << tcu::TestLog::SampleList("Samples", "Samples") 5976 << tcu::TestLog::SampleInfo 5977 << tcu::TestLog::ValueInfo("NumSwaps", "SwapBuffers before use", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 5978 << tcu::TestLog::ValueInfo("Delay", "Time before use", "us", QP_SAMPLE_VALUE_TAG_PREDICTOR) 5979 << tcu::TestLog::ValueInfo("RunOrder", "Sample run order", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 5980 << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 5981 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 5982 << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 5983 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 5984 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 5985 << tcu::TestLog::EndSampleInfo; 5986 5987 for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx) 5988 m_testCtx.getLog() 5989 << tcu::TestLog::Sample 5990 << m_samples[sampleNdx].numFrames 5991 << (int)m_results[sampleNdx].timeBeforeUse 5992 << runOrder[sampleNdx] 5993 << (int)m_results[sampleNdx].renderReadDuration 5994 << (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration) 5995 << (int)m_results[sampleNdx].uploadDuration 5996 << (int)m_results[sampleNdx].renderDuration 5997 << (int)m_results[sampleNdx].readDuration 5998 << tcu::TestLog::EndSample; 5999 6000 m_testCtx.getLog() << tcu::TestLog::EndSampleList; 6001 } 6002 6003 void UploadWaitDrawCase::drawMisc (void) 6004 { 6005 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 6006 6007 gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer); 6008 setupVertexAttribs(); 6009 gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices); 6010 } 6011 6012 struct DistributionCompareResult 6013 { 6014 bool equal; 6015 float standardDeviations; 6016 }; 6017 6018 template <typename Comparer> 6019 static float sumOfRanks (const std::vector<deUint64>& testSamples, const std::vector<deUint64>& allSamples, const Comparer& comparer) 6020 { 6021 float sum = 0; 6022 6023 for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx) 6024 { 6025 const deUint64 testSample = testSamples[sampleNdx]; 6026 const int lowerIndex = (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin()); 6027 const int upperIndex = (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin()); 6028 const int lowerRank = lowerIndex + 1; // convert zero-indexed to rank 6029 const int upperRank = upperIndex; // convert zero-indexed to rank, upperIndex is last equal + 1 6030 const float rankMidpoint = (lowerRank + upperRank) / 2.0f; 6031 6032 sum += rankMidpoint; 6033 } 6034 6035 return sum; 6036 } 6037 6038 template <typename Comparer> 6039 static DistributionCompareResult distributionCompare (const std::vector<deUint64>& orderedObservationsA, const std::vector<deUint64>& orderedObservationsB, const Comparer& comparer) 6040 { 6041 // MannWhitney U test 6042 6043 const int n1 = (int)orderedObservationsA.size(); 6044 const int n2 = (int)orderedObservationsB.size(); 6045 std::vector<deUint64> allSamples (n1 + n2); 6046 6047 std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin()); 6048 std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1); 6049 std::sort(allSamples.begin(), allSamples.end()); 6050 6051 { 6052 const float R1 = sumOfRanks(orderedObservationsA, allSamples, comparer); 6053 6054 const float U1 = n1*n2 + n1*(n1 + 1)/2 - R1; 6055 const float U2 = (n1 * n2) - U1; 6056 const float U = de::min(U1, U2); 6057 6058 // \note: sample sizes might not be large enough to expect normal distribution but we do it anyway 6059 6060 const float mU = n1*n2 / 2.0f; 6061 const float sigmaU = deFloatSqrt((n1*n2*(n1+n2+1)) / 12.0f); 6062 const float z = (U - mU) / sigmaU; 6063 6064 DistributionCompareResult result; 6065 6066 result.equal = (de::abs(z) <= 1.96f); // accept within 95% confidence interval 6067 result.standardDeviations = z; 6068 6069 return result; 6070 } 6071 } 6072 6073 template <typename T> 6074 struct ThresholdComparer 6075 { 6076 float relativeThreshold; 6077 T absoluteThreshold; 6078 6079 bool operator() (const T& a, const T& b) const 6080 { 6081 const float diff = de::abs((float)a - (float)b); 6082 6083 // thresholds 6084 if (diff <= (float)absoluteThreshold) 6085 return false; 6086 if (diff <= a*relativeThreshold || 6087 diff <= b*relativeThreshold) 6088 return false; 6089 6090 // cmp 6091 return a < b; 6092 } 6093 }; 6094 6095 int UploadWaitDrawCase::findStabilizationSample (deUint64 (UploadWaitDrawCase::Result::*target), const char* description) 6096 { 6097 std::vector<std::vector<deUint64> > sampleObservations(m_numMaxSwaps+1); 6098 ThresholdComparer<deUint64> comparer; 6099 6100 comparer.relativeThreshold = 0.15f; // 15% 6101 comparer.absoluteThreshold = 100; // (us), assumed sampling precision 6102 6103 // get observations and order them 6104 6105 for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx) 6106 { 6107 int insertNdx = 0; 6108 6109 sampleObservations[swapNdx].resize(m_numSamplesPerSwap); 6110 6111 for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx) 6112 if (m_samples[ndx].numFrames == swapNdx) 6113 sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target; 6114 6115 DE_ASSERT(insertNdx == m_numSamplesPerSwap); 6116 6117 std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end()); 6118 } 6119 6120 // find stabilization point 6121 6122 for (int sampleNdx = m_numMaxSwaps-1; sampleNdx != -1; --sampleNdx ) 6123 { 6124 // Distribution is equal to all following distributions 6125 for (int cmpTargetDistribution = sampleNdx+1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution) 6126 { 6127 // Stable section ends here? 6128 const DistributionCompareResult result = distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer); 6129 if (!result.equal) 6130 { 6131 // Last two samples are not equal? Samples never stabilized 6132 if (sampleNdx == m_numMaxSwaps-1) 6133 { 6134 m_testCtx.getLog() 6135 << tcu::TestLog::Message 6136 << description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n" 6137 << "\tDifference in standard deviations: " << result.standardDeviations << "\n" 6138 << "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n" 6139 << "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n" 6140 << tcu::TestLog::EndMessage; 6141 return -1; 6142 } 6143 else 6144 { 6145 m_testCtx.getLog() 6146 << tcu::TestLog::Message 6147 << description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n" 6148 << "\tSamples with swap count " << sampleNdx << " are not part of the tail of stable results.\n" 6149 << "\tDifference in standard deviations: " << result.standardDeviations << "\n" 6150 << "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n" 6151 << "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n" 6152 << tcu::TestLog::EndMessage; 6153 6154 return sampleNdx+1; 6155 } 6156 } 6157 } 6158 } 6159 6160 m_testCtx.getLog() 6161 << tcu::TestLog::Message 6162 << description << ": All samples seem to have the same distribution" 6163 << tcu::TestLog::EndMessage; 6164 6165 // all distributions equal 6166 return 0; 6167 } 6168 6169 bool UploadWaitDrawCase::checkSampleTemporalStability (deUint64 (UploadWaitDrawCase::Result::*target), const char* description) 6170 { 6171 // Try to find correlation with sample order and sample times 6172 6173 const int numDataPoints = (int)m_iterationOrder.size(); 6174 std::vector<tcu::Vec2> dataPoints (m_iterationOrder.size()); 6175 LineParametersWithConfidence lineFit; 6176 6177 for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx) 6178 { 6179 dataPoints[m_iterationOrder[ndx]].x() = (float)ndx; 6180 dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target); 6181 } 6182 6183 lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f); 6184 6185 // Difference of more than 25% of the offset along the whole sample range 6186 if (de::abs(lineFit.coefficient) * numDataPoints > de::abs(lineFit.offset) * 0.25f) 6187 { 6188 m_testCtx.getLog() 6189 << tcu::TestLog::Message 6190 << description << ": Correlation with data point observation order and result time. Results are not temporally stable, observations are not independent.\n" 6191 << "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n" 6192 << tcu::TestLog::EndMessage; 6193 6194 return false; 6195 } 6196 else 6197 return true; 6198 } 6199 6200 } // anonymous 6201 6202 BufferDataUploadTests::BufferDataUploadTests (Context& context) 6203 : TestCaseGroup(context, "data_upload", "Buffer data upload performance tests") 6204 { 6205 } 6206 6207 BufferDataUploadTests::~BufferDataUploadTests (void) 6208 { 6209 } 6210 6211 void BufferDataUploadTests::init (void) 6212 { 6213 static const struct BufferUsage 6214 { 6215 const char* name; 6216 deUint32 usage; 6217 bool primaryUsage; 6218 } bufferUsages[] = 6219 { 6220 { "stream_draw", GL_STREAM_DRAW, true }, 6221 { "stream_read", GL_STREAM_READ, false }, 6222 { "stream_copy", GL_STREAM_COPY, false }, 6223 { "static_draw", GL_STATIC_DRAW, true }, 6224 { "static_read", GL_STATIC_READ, false }, 6225 { "static_copy", GL_STATIC_COPY, false }, 6226 { "dynamic_draw", GL_DYNAMIC_DRAW, true }, 6227 { "dynamic_read", GL_DYNAMIC_READ, false }, 6228 { "dynamic_copy", GL_DYNAMIC_COPY, false }, 6229 }; 6230 6231 tcu::TestCaseGroup* const referenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Reference functions"); 6232 tcu::TestCaseGroup* const functionCallGroup = new tcu::TestCaseGroup(m_testCtx, "function_call", "Function call timing"); 6233 tcu::TestCaseGroup* const modifyAfterUseGroup = new tcu::TestCaseGroup(m_testCtx, "modify_after_use", "Function call time after buffer has been used"); 6234 tcu::TestCaseGroup* const renderAfterUploadGroup = new tcu::TestCaseGroup(m_testCtx, "render_after_upload", "Function call time of draw commands after buffer has been modified"); 6235 6236 addChild(referenceGroup); 6237 addChild(functionCallGroup); 6238 addChild(modifyAfterUseGroup); 6239 addChild(renderAfterUploadGroup); 6240 6241 // .reference 6242 { 6243 static const struct BufferSizeRange 6244 { 6245 const char* name; 6246 int minBufferSize; 6247 int maxBufferSize; 6248 int numSamples; 6249 bool largeBuffersCase; 6250 } sizeRanges[] = 6251 { 6252 { "small_buffers", 0, 1 << 18, 64, false }, // !< 0kB - 256kB 6253 { "large_buffers", 1 << 18, 1 << 24, 32, true }, // !< 256kB - 16MB 6254 }; 6255 6256 for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx) 6257 { 6258 referenceGroup->addChild(new ReferenceMemcpyCase(m_context, 6259 std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(), 6260 "Test memcpy performance", 6261 sizeRanges[bufferSizeRangeNdx].minBufferSize, 6262 sizeRanges[bufferSizeRangeNdx].maxBufferSize, 6263 sizeRanges[bufferSizeRangeNdx].numSamples, 6264 sizeRanges[bufferSizeRangeNdx].largeBuffersCase)); 6265 } 6266 } 6267 6268 // .function_call 6269 { 6270 const int minBufferSize = 0; // !< 0kiB 6271 const int maxBufferSize = 1 << 24; // !< 16MiB 6272 const int numDataSamples = 25; 6273 const int numMapSamples = 25; 6274 6275 tcu::TestCaseGroup* const bufferDataMethodGroup = new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData"); 6276 tcu::TestCaseGroup* const bufferSubDataMethodGroup = new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData"); 6277 tcu::TestCaseGroup* const mapBufferRangeMethodGroup = new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange"); 6278 6279 functionCallGroup->addChild(bufferDataMethodGroup); 6280 functionCallGroup->addChild(bufferSubDataMethodGroup); 6281 functionCallGroup->addChild(mapBufferRangeMethodGroup); 6282 6283 // .buffer_data 6284 { 6285 static const struct TargetCase 6286 { 6287 tcu::TestCaseGroup* group; 6288 BufferDataUploadCase::CaseType caseType; 6289 bool allUsages; 6290 } targetCases[] = 6291 { 6292 { new tcu::TestCaseGroup(m_testCtx, "new_buffer", "Target new buffer"), BufferDataUploadCase::CASE_NEW_BUFFER, true }, 6293 { new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer", "Target new unspecified buffer"), BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER, true }, 6294 { new tcu::TestCaseGroup(m_testCtx, "specified_buffer", "Target new specified buffer"), BufferDataUploadCase::CASE_SPECIFIED_BUFFER, true }, 6295 { new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Target buffer that was used in draw"), BufferDataUploadCase::CASE_USED_BUFFER, true }, 6296 { new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer", "Target larger buffer that was used in draw"), BufferDataUploadCase::CASE_USED_LARGER_BUFFER, false }, 6297 }; 6298 6299 for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx) 6300 { 6301 bufferDataMethodGroup->addChild(targetCases[targetNdx].group); 6302 6303 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx) 6304 if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages) 6305 targetCases[targetNdx].group->addChild(new BufferDataUploadCase(m_context, 6306 std::string("usage_").append(bufferUsages[usageNdx].name).c_str(), 6307 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(), 6308 minBufferSize, 6309 maxBufferSize, 6310 numDataSamples, 6311 bufferUsages[usageNdx].usage, 6312 targetCases[targetNdx].caseType)); 6313 } 6314 } 6315 6316 // .buffer_sub_data 6317 { 6318 static const struct FlagCase 6319 { 6320 tcu::TestCaseGroup* group; 6321 BufferSubDataUploadCase::CaseType parentCase; 6322 bool allUsages; 6323 int flags; 6324 } flagCases[] = 6325 { 6326 { new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload", ""), BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_FULL_UPLOAD }, 6327 { new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload", "Clear buffer with bufferData(...,NULL) before sub data call"), BufferSubDataUploadCase::CASE_USED_BUFFER, false, BufferSubDataUploadCase::FLAG_FULL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE }, 6328 { new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload", ""), BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD }, 6329 { new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload", "Clear buffer with bufferData(...,NULL) before sub data call"), BufferSubDataUploadCase::CASE_USED_BUFFER, false, BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE }, 6330 }; 6331 6332 for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx) 6333 { 6334 bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group); 6335 6336 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx) 6337 if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages) 6338 flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(m_context, 6339 std::string("usage_").append(bufferUsages[usageNdx].name).c_str(), 6340 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(), 6341 minBufferSize, 6342 maxBufferSize, 6343 numDataSamples, 6344 bufferUsages[usageNdx].usage, 6345 flagCases[flagNdx].parentCase, 6346 flagCases[flagNdx].flags)); 6347 } 6348 } 6349 6350 // .map_buffer_range 6351 { 6352 static const struct FlagCase 6353 { 6354 const char* name; 6355 bool usefulForUnusedBuffers; 6356 bool allUsages; 6357 int glFlags; 6358 int caseFlags; 6359 } flagCases[] = 6360 { 6361 { "flag_write_full", true, true, GL_MAP_WRITE_BIT, 0 }, 6362 { "flag_write_partial", true, true, GL_MAP_WRITE_BIT, MapBufferRangeCase::FLAG_PARTIAL }, 6363 { "flag_read_write_full", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT, 0 }, 6364 { "flag_read_write_partial", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT, MapBufferRangeCase::FLAG_PARTIAL }, 6365 { "flag_invalidate_range_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, 0 }, 6366 { "flag_invalidate_range_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_PARTIAL }, 6367 { "flag_invalidate_buffer_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0 }, 6368 { "flag_invalidate_buffer_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, MapBufferRangeCase::FLAG_PARTIAL }, 6369 { "flag_write_full_manual_invalidate_buffer", false, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_MANUAL_INVALIDATION }, 6370 { "flag_write_partial_manual_invalidate_buffer", false, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION }, 6371 { "flag_unsynchronized_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT, 0 }, 6372 { "flag_unsynchronized_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT, MapBufferRangeCase::FLAG_PARTIAL }, 6373 { "flag_unsynchronized_and_invalidate_buffer_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0 }, 6374 { "flag_unsynchronized_and_invalidate_buffer_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, MapBufferRangeCase::FLAG_PARTIAL }, 6375 }; 6376 static const struct FlushCases 6377 { 6378 const char* name; 6379 int glFlags; 6380 int caseFlags; 6381 } flushCases[] = 6382 { 6383 { "flag_flush_explicit_map_full", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, 0 }, 6384 { "flag_flush_explicit_map_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, MapBufferRangeFlushCase::FLAG_PARTIAL }, 6385 { "flag_flush_explicit_map_full_flush_in_parts", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS }, 6386 { "flag_flush_explicit_map_full_flush_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL }, 6387 }; 6388 static const struct MapTestGroup 6389 { 6390 int flags; 6391 bool unusedBufferCase; 6392 tcu::TestCaseGroup* group; 6393 } groups[] = 6394 { 6395 { MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER, true, new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer", "Test with unused, unspecified buffers"), }, 6396 { MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER, true, new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"), }, 6397 { 0, false, new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Test with used (data has been sourced from a buffer) buffers") }, 6398 }; 6399 6400 // we OR same flags to both range and flushRange cases, make sure it is legal 6401 DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER); 6402 DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER); 6403 6404 for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx) 6405 { 6406 tcu::TestCaseGroup* const bufferTypeGroup = groups[groupNdx].group; 6407 6408 mapBufferRangeMethodGroup->addChild(bufferTypeGroup); 6409 6410 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx) 6411 { 6412 if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers) 6413 continue; 6414 6415 tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, ""); 6416 bufferTypeGroup->addChild(bufferUsageGroup); 6417 6418 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx) 6419 if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages) 6420 bufferUsageGroup->addChild(new MapBufferRangeCase(m_context, 6421 bufferUsages[usageNdx].name, 6422 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(), 6423 minBufferSize, 6424 maxBufferSize, 6425 numMapSamples, 6426 bufferUsages[usageNdx].usage, 6427 flagCases[caseNdx].glFlags, 6428 flagCases[caseNdx].caseFlags | groups[groupNdx].flags)); 6429 } 6430 6431 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx) 6432 { 6433 tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, ""); 6434 bufferTypeGroup->addChild(bufferUsageGroup); 6435 6436 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx) 6437 if (bufferUsages[usageNdx].primaryUsage) 6438 bufferUsageGroup->addChild(new MapBufferRangeFlushCase(m_context, 6439 bufferUsages[usageNdx].name, 6440 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(), 6441 minBufferSize, 6442 maxBufferSize, 6443 numMapSamples, 6444 bufferUsages[usageNdx].usage, 6445 flushCases[caseNdx].glFlags, 6446 flushCases[caseNdx].caseFlags | groups[groupNdx].flags)); 6447 } 6448 } 6449 } 6450 } 6451 6452 // .modify_after_use 6453 { 6454 const int minBufferSize = 0; // !< 0kiB 6455 const int maxBufferSize = 1 << 24; // !< 16MiB 6456 6457 static const struct Usage 6458 { 6459 const char* name; 6460 const char* description; 6461 deUint32 usage; 6462 } usages[] = 6463 { 6464 { "static_draw", "Test with GL_STATIC_DRAW", GL_STATIC_DRAW }, 6465 { "dynamic_draw", "Test with GL_DYNAMIC_DRAW", GL_DYNAMIC_DRAW }, 6466 { "stream_draw", "Test with GL_STREAM_DRAW", GL_STREAM_DRAW }, 6467 6468 }; 6469 6470 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx) 6471 { 6472 tcu::TestCaseGroup* const usageGroup = new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description); 6473 modifyAfterUseGroup->addChild(usageGroup); 6474 6475 usageGroup->addChild(new ModifyAfterWithBufferDataCase (m_context, "buffer_data", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0)); 6476 usageGroup->addChild(new ModifyAfterWithBufferDataCase (m_context, "buffer_data_different_size", "Respecify buffer contents and size after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE)); 6477 usageGroup->addChild(new ModifyAfterWithBufferDataCase (m_context, "buffer_data_repeated", "Respecify buffer contents after upload and use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED)); 6478 6479 usageGroup->addChild(new ModifyAfterWithBufferSubDataCase (m_context, "buffer_sub_data_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0)); 6480 usageGroup->addChild(new ModifyAfterWithBufferSubDataCase (m_context, "buffer_sub_data_partial", "Respecify buffer contents partially use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL)); 6481 usageGroup->addChild(new ModifyAfterWithBufferSubDataCase (m_context, "buffer_sub_data_full_repeated", "Respecify buffer contents after upload and use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED)); 6482 usageGroup->addChild(new ModifyAfterWithBufferSubDataCase (m_context, "buffer_sub_data_partial_repeated", "Respecify buffer contents partially upload and use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED | ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL)); 6483 6484 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_write_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT)); 6485 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_write_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT)); 6486 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_read_write_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)); 6487 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_read_write_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)); 6488 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_invalidate_range_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)); 6489 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_invalidate_range_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)); 6490 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_invalidate_buffer_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)); 6491 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_invalidate_buffer_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)); 6492 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_unsynchronized_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); 6493 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase (m_context, "map_flag_unsynchronized_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); 6494 6495 usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase (m_context, "map_flag_flush_explicit_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); 6496 usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase (m_context, "map_flag_flush_explicit_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); 6497 } 6498 } 6499 6500 // .render_after_upload 6501 { 6502 // .reference 6503 { 6504 tcu::TestCaseGroup* const renderReferenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results"); 6505 renderAfterUploadGroup->addChild(renderReferenceGroup); 6506 6507 // .draw 6508 { 6509 tcu::TestCaseGroup* const drawGroup = new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers"); 6510 renderReferenceGroup->addChild(drawGroup); 6511 6512 // Time consumed by readPixels 6513 drawGroup->addChild(new ReferenceReadPixelsTimeCase (m_context, "read_pixels", "Measure time consumed by readPixels() function call")); 6514 6515 // Time consumed by rendering 6516 drawGroup->addChild(new ReferenceRenderTimeCase (m_context, "draw_arrays", "Measure time consumed by drawArrays() function call", DRAWMETHOD_DRAW_ARRAYS)); 6517 drawGroup->addChild(new ReferenceRenderTimeCase (m_context, "draw_elements", "Measure time consumed by drawElements() function call", DRAWMETHOD_DRAW_ELEMENTS)); 6518 } 6519 6520 // .draw_upload_draw 6521 { 6522 static const struct 6523 { 6524 const char* name; 6525 const char* description; 6526 DrawMethod drawMethod; 6527 TargetBuffer targetBuffer; 6528 bool partial; 6529 } uploadTargets[] = 6530 { 6531 { 6532 "draw_arrays_upload_vertices", 6533 "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.", 6534 DRAWMETHOD_DRAW_ARRAYS, 6535 TARGETBUFFER_VERTEX, 6536 false 6537 }, 6538 { 6539 "draw_arrays_upload_vertices_partial", 6540 "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.", 6541 DRAWMETHOD_DRAW_ARRAYS, 6542 TARGETBUFFER_VERTEX, 6543 true 6544 }, 6545 { 6546 "draw_elements_upload_vertices", 6547 "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.", 6548 DRAWMETHOD_DRAW_ELEMENTS, 6549 TARGETBUFFER_VERTEX, 6550 false 6551 }, 6552 { 6553 "draw_elements_upload_indices", 6554 "Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.", 6555 DRAWMETHOD_DRAW_ELEMENTS, 6556 TARGETBUFFER_INDEX, 6557 false 6558 }, 6559 { 6560 "draw_elements_upload_indices_partial", 6561 "Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.", 6562 DRAWMETHOD_DRAW_ELEMENTS, 6563 TARGETBUFFER_INDEX, 6564 true 6565 }, 6566 }; 6567 static const struct 6568 { 6569 const char* name; 6570 const char* description; 6571 UploadMethod uploadMethod; 6572 BufferInUseRenderTimeCase::MapFlags mapFlags; 6573 bool supportsPartialUpload; 6574 } uploadMethods[] = 6575 { 6576 { "buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, false }, 6577 { "buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, true }, 6578 { "map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true }, 6579 { "map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false }, 6580 }; 6581 6582 tcu::TestCaseGroup* const drawUploadDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw"); 6583 renderReferenceGroup->addChild(drawUploadDrawGroup); 6584 6585 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx) 6586 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) 6587 { 6588 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name; 6589 6590 if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload) 6591 continue; 6592 6593 drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context, 6594 name.c_str(), 6595 uploadTargets[uploadTargetNdx].description, 6596 uploadTargets[uploadTargetNdx].drawMethod, 6597 uploadMethods[uploadMethodNdx].mapFlags, 6598 uploadTargets[uploadTargetNdx].targetBuffer, 6599 uploadMethods[uploadMethodNdx].uploadMethod, 6600 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL), 6601 BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER)); 6602 } 6603 } 6604 } 6605 6606 // .upload_unrelated_and_draw 6607 { 6608 static const struct 6609 { 6610 const char* name; 6611 const char* description; 6612 DrawMethod drawMethod; 6613 } drawMethods[] = 6614 { 6615 { "draw_arrays", "drawArrays", DRAWMETHOD_DRAW_ARRAYS }, 6616 { "draw_elements", "drawElements", DRAWMETHOD_DRAW_ELEMENTS }, 6617 }; 6618 6619 static const struct 6620 { 6621 const char* name; 6622 UploadMethod uploadMethod; 6623 } uploadMethods[] = 6624 { 6625 { "buffer_data", UPLOADMETHOD_BUFFER_DATA }, 6626 { "buffer_sub_data", UPLOADMETHOD_BUFFER_SUB_DATA }, 6627 { "map_buffer_range", UPLOADMETHOD_MAP_BUFFER_RANGE }, 6628 }; 6629 6630 tcu::TestCaseGroup* const uploadUnrelatedGroup = new tcu::TestCaseGroup(m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload"); 6631 renderAfterUploadGroup->addChild(uploadUnrelatedGroup); 6632 6633 for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx) 6634 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) 6635 { 6636 const std::string name = std::string() + drawMethods[drawMethodNdx].name + "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name; 6637 const std::string desc = std::string() + "Measure time consumed by " + drawMethods[drawMethodNdx].description + " function call after an unrelated upload"; 6638 6639 // Time consumed by rendering command after an unrelated upload 6640 6641 uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod, uploadMethods[uploadMethodNdx].uploadMethod)); 6642 } 6643 } 6644 6645 // .upload_and_draw 6646 { 6647 static const struct 6648 { 6649 const char* name; 6650 const char* description; 6651 BufferState bufferState; 6652 UnrelatedBufferType unrelatedBuffer; 6653 bool supportsPartialUpload; 6654 } bufferConfigs[] = 6655 { 6656 { "used_buffer", "Upload to an used buffer", BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_NONE, true }, 6657 { "new_buffer", "Upload to a new buffer", BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_NONE, false }, 6658 { "used_buffer_and_unrelated_upload", "Upload to an used buffer and an unrelated buffer and then draw", BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_VERTEX, true }, 6659 { "new_buffer_and_unrelated_upload", "Upload to a new buffer and an unrelated buffer and then draw", BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_VERTEX, false }, 6660 }; 6661 6662 tcu::TestCaseGroup* const uploadAndDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers"); 6663 renderAfterUploadGroup->addChild(uploadAndDrawGroup); 6664 6665 // .used_buffer 6666 // .new_buffer 6667 // .used_buffer_and_unrelated_upload 6668 // .new_buffer_and_unrelated_upload 6669 for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx) 6670 { 6671 static const struct 6672 { 6673 const char* name; 6674 const char* description; 6675 DrawMethod drawMethod; 6676 TargetBuffer targetBuffer; 6677 bool partial; 6678 } uploadTargets[] = 6679 { 6680 { 6681 "draw_arrays_upload_vertices", 6682 "Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls", 6683 DRAWMETHOD_DRAW_ARRAYS, 6684 TARGETBUFFER_VERTEX, 6685 false 6686 }, 6687 { 6688 "draw_arrays_upload_vertices_partial", 6689 "Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function calls", 6690 DRAWMETHOD_DRAW_ARRAYS, 6691 TARGETBUFFER_VERTEX, 6692 true 6693 }, 6694 { 6695 "draw_elements_upload_vertices", 6696 "Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls", 6697 DRAWMETHOD_DRAW_ELEMENTS, 6698 TARGETBUFFER_VERTEX, 6699 false 6700 }, 6701 { 6702 "draw_elements_upload_indices", 6703 "Measure time consumed by index upload, drawElements, and readPixels function calls", 6704 DRAWMETHOD_DRAW_ELEMENTS, 6705 TARGETBUFFER_INDEX, 6706 false 6707 }, 6708 { 6709 "draw_elements_upload_indices_partial", 6710 "Measure time consumed by partial index upload, drawElements, and readPixels function calls", 6711 DRAWMETHOD_DRAW_ELEMENTS, 6712 TARGETBUFFER_INDEX, 6713 true 6714 }, 6715 }; 6716 static const struct 6717 { 6718 const char* name; 6719 const char* description; 6720 UploadMethod uploadMethod; 6721 bool supportsPartialUpload; 6722 } uploadMethods[] = 6723 { 6724 { "buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, false }, 6725 { "buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, true }, 6726 { "map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, true }, 6727 }; 6728 6729 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name, bufferConfigs[stateNdx].description); 6730 uploadAndDrawGroup->addChild(group); 6731 6732 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx) 6733 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) 6734 { 6735 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name; 6736 6737 if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload) 6738 continue; 6739 if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload) 6740 continue; 6741 6742 // Don't log unrelated buffer information to samples if there is no such buffer 6743 6744 if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE) 6745 { 6746 typedef UploadRenderReadDuration SampleType; 6747 typedef GenericUploadRenderTimeCase<SampleType> TestType; 6748 6749 group->addChild(new TestType(m_context, 6750 name.c_str(), 6751 uploadTargets[uploadTargetNdx].description, 6752 uploadTargets[uploadTargetNdx].drawMethod, 6753 uploadTargets[uploadTargetNdx].targetBuffer, 6754 uploadMethods[uploadMethodNdx].uploadMethod, 6755 bufferConfigs[stateNdx].bufferState, 6756 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL), 6757 bufferConfigs[stateNdx].unrelatedBuffer)); 6758 } 6759 else 6760 { 6761 typedef UploadRenderReadDurationWithUnrelatedUploadSize SampleType; 6762 typedef GenericUploadRenderTimeCase<SampleType> TestType; 6763 6764 group->addChild(new TestType(m_context, 6765 name.c_str(), 6766 uploadTargets[uploadTargetNdx].description, 6767 uploadTargets[uploadTargetNdx].drawMethod, 6768 uploadTargets[uploadTargetNdx].targetBuffer, 6769 uploadMethods[uploadMethodNdx].uploadMethod, 6770 bufferConfigs[stateNdx].bufferState, 6771 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL), 6772 bufferConfigs[stateNdx].unrelatedBuffer)); 6773 } 6774 } 6775 } 6776 } 6777 6778 // .draw_modify_draw 6779 { 6780 static const struct 6781 { 6782 const char* name; 6783 const char* description; 6784 DrawMethod drawMethod; 6785 TargetBuffer targetBuffer; 6786 bool partial; 6787 } uploadTargets[] = 6788 { 6789 { 6790 "draw_arrays_upload_vertices", 6791 "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.", 6792 DRAWMETHOD_DRAW_ARRAYS, 6793 TARGETBUFFER_VERTEX, 6794 false 6795 }, 6796 { 6797 "draw_arrays_upload_vertices_partial", 6798 "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.", 6799 DRAWMETHOD_DRAW_ARRAYS, 6800 TARGETBUFFER_VERTEX, 6801 true 6802 }, 6803 { 6804 "draw_elements_upload_vertices", 6805 "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.", 6806 DRAWMETHOD_DRAW_ELEMENTS, 6807 TARGETBUFFER_VERTEX, 6808 false 6809 }, 6810 { 6811 "draw_elements_upload_indices", 6812 "Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.", 6813 DRAWMETHOD_DRAW_ELEMENTS, 6814 TARGETBUFFER_INDEX, 6815 false 6816 }, 6817 { 6818 "draw_elements_upload_indices_partial", 6819 "Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.", 6820 DRAWMETHOD_DRAW_ELEMENTS, 6821 TARGETBUFFER_INDEX, 6822 true 6823 }, 6824 }; 6825 static const struct 6826 { 6827 const char* name; 6828 const char* description; 6829 UploadMethod uploadMethod; 6830 BufferInUseRenderTimeCase::MapFlags mapFlags; 6831 bool supportsPartialUpload; 6832 } uploadMethods[] = 6833 { 6834 { "buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, false }, 6835 { "buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, true }, 6836 { "map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true }, 6837 { "map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false }, 6838 }; 6839 6840 tcu::TestCaseGroup* const drawModifyDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_modify_draw", "Time used in rendering functions with modified buffers while original buffer is still in use"); 6841 renderAfterUploadGroup->addChild(drawModifyDrawGroup); 6842 6843 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx) 6844 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) 6845 { 6846 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name; 6847 6848 if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload) 6849 continue; 6850 6851 drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context, 6852 name.c_str(), 6853 uploadTargets[uploadTargetNdx].description, 6854 uploadTargets[uploadTargetNdx].drawMethod, 6855 uploadMethods[uploadMethodNdx].mapFlags, 6856 uploadTargets[uploadTargetNdx].targetBuffer, 6857 uploadMethods[uploadMethodNdx].uploadMethod, 6858 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL), 6859 BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER)); 6860 } 6861 } 6862 6863 // .upload_wait_draw 6864 { 6865 static const struct 6866 { 6867 const char* name; 6868 const char* description; 6869 BufferState bufferState; 6870 } bufferStates[] = 6871 { 6872 { "new_buffer", "Uploading to just generated name", BUFFERSTATE_NEW }, 6873 { "used_buffer", "Uploading to a used buffer", BUFFERSTATE_EXISTING }, 6874 }; 6875 static const struct 6876 { 6877 const char* name; 6878 const char* description; 6879 DrawMethod drawMethod; 6880 TargetBuffer targetBuffer; 6881 } uploadTargets[] = 6882 { 6883 { "draw_arrays_vertices", "Upload vertex data, draw with drawArrays", DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX }, 6884 { "draw_elements_vertices", "Upload vertex data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX }, 6885 { "draw_elements_indices", "Upload index data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX }, 6886 }; 6887 static const struct 6888 { 6889 const char* name; 6890 const char* description; 6891 UploadMethod uploadMethod; 6892 } uploadMethods[] = 6893 { 6894 { "buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA }, 6895 { "buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA }, 6896 { "map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE }, 6897 }; 6898 6899 tcu::TestCaseGroup* const uploadSwapDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago"); 6900 renderAfterUploadGroup->addChild(uploadSwapDrawGroup); 6901 6902 for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx) 6903 { 6904 tcu::TestCaseGroup* const bufferGroup = new tcu::TestCaseGroup(m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description); 6905 uploadSwapDrawGroup->addChild(bufferGroup); 6906 6907 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx) 6908 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) 6909 { 6910 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name; 6911 6912 bufferGroup->addChild(new UploadWaitDrawCase(m_context, 6913 name.c_str(), 6914 uploadTargets[uploadTargetNdx].description, 6915 uploadTargets[uploadTargetNdx].drawMethod, 6916 uploadTargets[uploadTargetNdx].targetBuffer, 6917 uploadMethods[uploadMethodNdx].uploadMethod, 6918 bufferStates[bufferStateNdx].bufferState)); 6919 } 6920 } 6921 } 6922 } 6923 } 6924 6925 } // Performance 6926 } // gles3 6927 } // deqp 6928