1 /*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL ES 3.0 Module 3 * ------------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Shader operator performance tests. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "es3pShaderOperatorTests.hpp" 25 #include "glsCalibration.hpp" 26 #include "gluShaderUtil.hpp" 27 #include "gluShaderProgram.hpp" 28 #include "gluPixelTransfer.hpp" 29 #include "tcuTestLog.hpp" 30 #include "tcuRenderTarget.hpp" 31 #include "tcuCommandLine.hpp" 32 #include "tcuSurface.hpp" 33 #include "deStringUtil.hpp" 34 #include "deSharedPtr.hpp" 35 #include "deClock.h" 36 #include "deMath.h" 37 38 #include "glwEnums.hpp" 39 #include "glwFunctions.hpp" 40 41 #include <map> 42 #include <algorithm> 43 #include <limits> 44 #include <set> 45 46 namespace deqp 47 { 48 namespace gles3 49 { 50 namespace Performance 51 { 52 53 using namespace gls; 54 using namespace glu; 55 using tcu::Vec2; 56 using tcu::Vec4; 57 using tcu::TestLog; 58 using de::SharedPtr; 59 60 using std::string; 61 using std::vector; 62 63 #define MEASUREMENT_FAIL() throw tcu::InternalError("Unable to get sensible measurements for estimation", DE_NULL, __FILE__, __LINE__) 64 65 // Number of measurements in OperatorPerformanceCase for each workload size, unless specified otherwise by a command line argument. 66 static const int DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD = 3; 67 // How many different workload sizes are used by OperatorPerformanceCase. 68 static const int NUM_WORKLOADS = 8; 69 // Maximum workload size that can be attempted. In a sensible case, this most likely won't be reached. 70 static const int MAX_WORKLOAD_SIZE = 1<<29; 71 72 // BinaryOpCase-specific constants for shader generation. 73 static const int BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS = 4; 74 static const int BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT = 2; 75 static const int BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT = 4; 76 77 // FunctionCase-specific constants for shader generation. 78 static const int FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS = 4; 79 80 static const char* const s_swizzles[][4] = 81 { 82 { "x", "yx", "yzx", "wzyx" }, 83 { "y", "zy", "wyz", "xwzy" }, 84 { "z", "wy", "zxy", "yzwx" }, 85 { "w", "xw", "yxw", "zyxw" } 86 }; 87 88 template <int N> 89 static tcu::Vector<float, N> mean (const vector<tcu::Vector<float, N> >& data) 90 { 91 tcu::Vector<float, N> sum(0.0f); 92 for (int i = 0; i < (int)data.size(); i++) 93 sum += data[i]; 94 return sum / tcu::Vector<float, N>((float)data.size()); 95 } 96 97 static void uniformNfv (const glw::Functions& gl, int n, int location, int count, const float* data) 98 { 99 switch (n) 100 { 101 case 1: gl.uniform1fv(location, count, data); break; 102 case 2: gl.uniform2fv(location, count, data); break; 103 case 3: gl.uniform3fv(location, count, data); break; 104 case 4: gl.uniform4fv(location, count, data); break; 105 default: DE_ASSERT(false); 106 } 107 } 108 109 static void uniformNiv (const glw::Functions& gl, int n, int location, int count, const int* data) 110 { 111 switch (n) 112 { 113 case 1: gl.uniform1iv(location, count, data); break; 114 case 2: gl.uniform2iv(location, count, data); break; 115 case 3: gl.uniform3iv(location, count, data); break; 116 case 4: gl.uniform4iv(location, count, data); break; 117 default: DE_ASSERT(false); 118 } 119 } 120 121 static void uniformMatrixNfv (const glw::Functions& gl, int n, int location, int count, const float* data) 122 { 123 switch (n) 124 { 125 case 2: gl.uniformMatrix2fv(location, count, GL_FALSE, &data[0]); break; 126 case 3: gl.uniformMatrix3fv(location, count, GL_FALSE, &data[0]); break; 127 case 4: gl.uniformMatrix4fv(location, count, GL_FALSE, &data[0]); break; 128 default: DE_ASSERT(false); 129 } 130 } 131 132 static glu::DataType getDataTypeFloatOrVec (int size) 133 { 134 return size == 1 ? glu::TYPE_FLOAT : glu::getDataTypeFloatVec(size); 135 } 136 137 static int getIterationCountOrDefault (const tcu::CommandLine& cmdLine, int def) 138 { 139 const int cmdLineVal = cmdLine.getTestIterationCount(); 140 return cmdLineVal > 0 ? cmdLineVal : def; 141 } 142 143 static string lineParamsString (const LineParameters& params) 144 { 145 return "y = " + de::toString(params.offset) + " + " + de::toString(params.coefficient) + "*x"; 146 } 147 148 namespace 149 { 150 151 /*--------------------------------------------------------------------*//*! 152 * \brief Abstract class for measuring shader operator performance. 153 * 154 * This class draws multiple times with different workload sizes (set 155 * via a uniform, by subclass). Time for each frame is measured, and the 156 * slope of the workload size vs frame time data is estimated. This slope 157 * tells us the estimated increase in frame time caused by a workload 158 * increase of 1 unit (what 1 workload unit means is up to subclass). 159 * 160 * Generally, the shaders contain not just the operation we're interested 161 * in (e.g. addition) but also some other stuff (e.g. loop overhead). To 162 * eliminate this cost, we actually do the stuff described in the above 163 * paragraph with multiple programs (usually two), which contain different 164 * kinds of workload (e.g. different loop contents). Then we can (in 165 * theory) compute the cost of just one operation in a subclass-dependent 166 * manner. 167 * 168 * At this point, the result tells us the increase in frame time caused 169 * by the addition of one operation. Dividing this by the amount of 170 * draw calls in a frame, and further by the amount of vertices or 171 * fragments in a draw call, we get the time cost of one operation. 172 * 173 * In reality, there sometimes isn't just a trivial linear dependence 174 * between workload size and frame time. Instead, there tends to be some 175 * amount of initial "free" operations. That is, it may be that all 176 * workload sizes below some positive integer C yield the same frame time, 177 * and only workload sizes beyond C increase the frame time in a supposedly 178 * linear manner. Graphically, this means that there graph consists of two 179 * parts: a horizontal left part, and a linearly increasing right part; the 180 * right part starts where the left parts ends. The principal task of these 181 * tests is to look at the slope of the increasing right part. Additionally 182 * an estimate for the amount of initial free operations is calculated. 183 * Note that it is also normal to get graphs where the horizontal left part 184 * is of zero width, i.e. there are no free operations. 185 *//*--------------------------------------------------------------------*/ 186 class OperatorPerformanceCase : public tcu::TestCase 187 { 188 public: 189 enum CaseType 190 { 191 CASETYPE_VERTEX = 0, 192 CASETYPE_FRAGMENT, 193 194 CASETYPE_LAST 195 }; 196 197 struct InitialCalibration 198 { 199 int initialNumCalls; 200 InitialCalibration (void) : initialNumCalls(1) {} 201 }; 202 203 typedef SharedPtr<InitialCalibration> InitialCalibrationStorage; 204 205 OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description, 206 CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage); 207 ~OperatorPerformanceCase (void); 208 209 void init (void); 210 void deinit (void); 211 212 IterateResult iterate (void); 213 214 struct AttribSpec 215 { 216 AttribSpec (const char* name_, const tcu::Vec4& p00_, const tcu::Vec4& p01_, const tcu::Vec4& p10_, const tcu::Vec4& p11_) 217 : name (name_) 218 , p00 (p00_) 219 , p01 (p01_) 220 , p10 (p10_) 221 , p11 (p11_) 222 { 223 } 224 225 AttribSpec (void) {} 226 227 std::string name; 228 tcu::Vec4 p00; //!< Bottom left. 229 tcu::Vec4 p01; //!< Bottom right. 230 tcu::Vec4 p10; //!< Top left. 231 tcu::Vec4 p11; //!< Top right. 232 }; 233 234 protected: 235 struct ProgramContext 236 { 237 string vertShaderSource; 238 string fragShaderSource; 239 vector<AttribSpec> attributes; 240 241 string description; 242 243 ProgramContext (void) {} 244 ProgramContext (const string& vs, const string& fs, const vector<AttribSpec>& attrs, const string& desc) 245 : vertShaderSource(vs), fragShaderSource(fs), attributes(attrs), description(desc) {} 246 }; 247 248 virtual vector<ProgramContext> generateProgramData (void) const = 0; 249 //! Sets program-specific uniforms that don't depend on the workload size. 250 virtual void setGeneralUniforms (deUint32 program) const = 0; 251 //! Sets the uniform(s) that specifies the workload size in the shader. 252 virtual void setWorkloadSizeUniform (deUint32 program, int workload) const = 0; 253 //! Computes the cost of a single operation, given the workload costs per program. 254 virtual float computeSingleOperationTime (const vector<float>& perProgramWorkloadCosts) const = 0; 255 //! Logs a human-readable description of what computeSingleOperationTime does. 256 virtual void logSingleOperationCalculationInfo (void) const = 0; 257 258 glu::RenderContext& m_renderCtx; 259 260 CaseType m_caseType; 261 262 private: 263 enum State 264 { 265 STATE_CALIBRATING = 0, //!< Calibrate draw call count, using first program in m_programs, with workload size 1. 266 STATE_FIND_HIGH_WORKLOAD, //!< Find an appropriate lower bound for the highest workload size we intend to use (one with high-enough frame time compared to workload size 1) for each program. 267 STATE_MEASURING, //!< Do actual measurements, for each program in m_programs. 268 STATE_REPORTING, //!< Measurements are done; calculate results and log. 269 STATE_FINISHED, //!< All done. 270 271 STATE_LAST 272 }; 273 274 struct WorkloadRecord 275 { 276 int workloadSize; 277 vector<float> frameTimes; //!< In microseconds. 278 279 WorkloadRecord (int workloadSize_) : workloadSize(workloadSize_) {} 280 bool operator< (const WorkloadRecord& other) const { return this->workloadSize < other.workloadSize; } 281 void addFrameTime (float time) { frameTimes.push_back(time); } 282 float getMedianTime (void) const 283 { 284 vector<float> times = frameTimes; 285 std::sort(times.begin(), times.end()); 286 return times.size() % 2 == 0 ? 287 (times[times.size()/2-1] + times[times.size()/2])*0.5f : 288 times[times.size()/2]; 289 } 290 }; 291 292 void prepareProgram (int progNdx); //!< Sets attributes and uniforms for m_programs[progNdx]. 293 void prepareWorkload (int progNdx, int workload); //!< Calls setWorkloadSizeUniform and draws, in case the implementation does some draw-time compilation. 294 void prepareNextRound (void); //!< Increases workload and/or updates m_state. 295 void render (int numDrawCalls); 296 deUint64 renderAndMeasure (int numDrawCalls); 297 void adjustAndLogGridAndViewport (void); //!< Log grid and viewport sizes, after possibly reducing them to reduce draw time. 298 299 vector<Vec2> getWorkloadMedianDataPoints (int progNdx) const; //!< [ Vec2(r.workloadSize, r.getMedianTime()) for r in m_workloadRecords[progNdx] ] 300 301 const int m_numMeasurementsPerWorkload; 302 const int m_numWorkloads; //!< How many different workload sizes are used for measurement for each program. 303 304 int m_workloadNdx; //!< Runs from 0 to m_numWorkloads-1. 305 306 int m_workloadMeasurementNdx; 307 vector<vector<WorkloadRecord> > m_workloadRecordsFindHigh; //!< The measurements done during STATE_FIND_HIGH_WORKLOAD. 308 vector<vector<WorkloadRecord> > m_workloadRecords; //!< The measurements of each program in m_programs. Generated during STATE_MEASURING, into index specified by m_measureProgramNdx. 309 310 State m_state; 311 int m_measureProgramNdx; //!< When m_state is STATE_FIND_HIGH_WORKLOAD or STATE_MEASURING, this tells which program in m_programs is being measured. 312 313 vector<int> m_highWorkloadSizes; //!< The first workload size encountered during STATE_FIND_HIGH_WORKLOAD that was determined suitable, for each program. 314 315 TheilSenCalibrator m_calibrator; 316 InitialCalibrationStorage m_initialCalibrationStorage; 317 318 int m_viewportWidth; 319 int m_viewportHeight; 320 int m_gridSizeX; 321 int m_gridSizeY; 322 323 vector<ProgramContext> m_programData; 324 vector<SharedPtr<ShaderProgram> > m_programs; 325 326 std::vector<deUint32> m_attribBuffers; 327 }; 328 329 static inline float triangleInterpolate (float v0, float v1, float v2, float x, float y) 330 { 331 return v0 + (v2-v0)*x + (v1-v0)*y; 332 } 333 334 static inline float triQuadInterpolate (float x, float y, const tcu::Vec4& quad) 335 { 336 // \note Top left fill rule. 337 if (x + y < 1.0f) 338 return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y); 339 else 340 return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f-x, 1.0f-y); 341 } 342 343 static inline int getNumVertices (int gridSizeX, int gridSizeY) 344 { 345 return gridSizeX * gridSizeY * 2 * 3; 346 } 347 348 static void generateVertices (std::vector<float>& dst, int gridSizeX, int gridSizeY, const OperatorPerformanceCase::AttribSpec& spec) 349 { 350 const int numComponents = 4; 351 352 DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1); 353 dst.resize(getNumVertices(gridSizeX, gridSizeY) * numComponents); 354 355 { 356 int dstNdx = 0; 357 358 for (int baseY = 0; baseY < gridSizeY; baseY++) 359 for (int baseX = 0; baseX < gridSizeX; baseX++) 360 { 361 const float xf0 = (float)(baseX + 0) / (float)gridSizeX; 362 const float yf0 = (float)(baseY + 0) / (float)gridSizeY; 363 const float xf1 = (float)(baseX + 1) / (float)gridSizeX; 364 const float yf1 = (float)(baseY + 1) / (float)gridSizeY; 365 366 #define ADD_VERTEX(XF, YF) \ 367 for (int compNdx = 0; compNdx < numComponents; compNdx++) \ 368 dst[dstNdx++] = triQuadInterpolate((XF), (YF), tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx])) 369 370 ADD_VERTEX(xf0, yf0); 371 ADD_VERTEX(xf1, yf0); 372 ADD_VERTEX(xf0, yf1); 373 374 ADD_VERTEX(xf1, yf0); 375 ADD_VERTEX(xf1, yf1); 376 ADD_VERTEX(xf0, yf1); 377 378 #undef ADD_VERTEX 379 } 380 } 381 } 382 383 static float intersectionX (const gls::LineParameters& a, const gls::LineParameters& b) 384 { 385 return (a.offset - b.offset) / (b.coefficient - a.coefficient); 386 } 387 388 static int numDistinctX (const vector<Vec2>& data) 389 { 390 std::set<float> xs; 391 for (int i = 0; i < (int)data.size(); i++) 392 xs.insert(data[i].x()); 393 return (int)xs.size(); 394 } 395 396 static gls::LineParameters simpleLinearRegression (const vector<Vec2>& data) 397 { 398 const Vec2 mid = mean(data); 399 400 float slopeNumerator = 0.0f; 401 float slopeDenominator = 0.0f; 402 403 for (int i = 0; i < (int)data.size(); i++) 404 { 405 const Vec2 diff = data[i] - mid; 406 407 slopeNumerator += diff.x()*diff.y(); 408 slopeDenominator += diff.x()*diff.x(); 409 } 410 411 const float slope = slopeNumerator / slopeDenominator; 412 const float offset = mid.y() - slope*mid.x(); 413 414 return gls::LineParameters(offset, slope); 415 } 416 417 static float simpleLinearRegressionError (const vector<Vec2>& data) 418 { 419 if (numDistinctX(data) <= 2) 420 return 0.0f; 421 else 422 { 423 const gls::LineParameters estimator = simpleLinearRegression(data); 424 float error = 0.0f; 425 426 for (int i = 0; i < (int)data.size(); i++) 427 { 428 const float estY = estimator.offset + estimator.coefficient*data[i].x(); 429 const float diff = estY - data[i].y(); 430 error += diff*diff; 431 } 432 433 return error / (float)data.size(); 434 } 435 } 436 437 static float verticalVariance (const vector<Vec2>& data) 438 { 439 if (numDistinctX(data) <= 2) 440 return 0.0f; 441 else 442 { 443 const float meanY = mean(data).y(); 444 float error = 0.0f; 445 446 for (int i = 0; i < (int)data.size(); i++) 447 { 448 const float diff = meanY - data[i].y(); 449 error += diff*diff; 450 } 451 452 return error / (float)data.size(); 453 } 454 } 455 456 /*--------------------------------------------------------------------*//*! 457 * \brief Find the x coord that divides the input data into two slopes. 458 * 459 * The operator performance measurements tend to produce results where 460 * we get small operation counts "for free" (e.g. because the operations 461 * are performed during some memory transfer overhead or something), 462 * resulting in a curve with two parts: an initial horizontal line segment, 463 * and a rising line. 464 * 465 * This function finds the x coordinate that divides the input data into 466 * two parts such that the sum of the mean square errors for the 467 * least-squares estimated lines for the two parts is minimized, under the 468 * additional condition that the left line is horizontal. 469 * 470 * This function returns a number X s.t. { pt | pt is in data, pt.x >= X } 471 * is the right line, and the rest of data is the left line. 472 *//*--------------------------------------------------------------------*/ 473 static float findSlopePivotX (const vector<Vec2>& data) 474 { 475 std::set<float> xCoords; 476 for (int i = 0; i < (int)data.size(); i++) 477 xCoords.insert(data[i].x()); 478 479 float lowestError = std::numeric_limits<float>::infinity(); 480 float bestPivotX = -std::numeric_limits<float>::infinity(); 481 482 for (std::set<float>::const_iterator pivotX = xCoords.begin(); pivotX != xCoords.end(); ++pivotX) 483 { 484 vector<Vec2> leftData; 485 vector<Vec2> rightData; 486 for (int i = 0; i < (int)data.size(); i++) 487 { 488 if (data[i].x() < *pivotX) 489 leftData.push_back(data[i]); 490 else 491 rightData.push_back(data[i]); 492 } 493 494 if (numDistinctX(rightData) < 3) // We don't trust the right data if there's too little of it. 495 break; 496 497 { 498 const float totalError = verticalVariance(leftData) + simpleLinearRegressionError(rightData); 499 500 if (totalError < lowestError) 501 { 502 lowestError = totalError; 503 bestPivotX = *pivotX; 504 } 505 } 506 } 507 508 DE_ASSERT(lowestError < std::numeric_limits<float>::infinity()); 509 510 return bestPivotX; 511 } 512 513 struct SegmentedEstimator 514 { 515 float pivotX; //!< Value returned by findSlopePivotX, or -infinity if only single line. 516 gls::LineParameters left; 517 gls::LineParameters right; 518 SegmentedEstimator (const gls::LineParameters& l, const gls::LineParameters& r, float pivotX_) : pivotX(pivotX_), left(l), right(r) {} 519 }; 520 521 /*--------------------------------------------------------------------*//*! 522 * \brief Compute line estimators for (potentially) two-segment data. 523 * 524 * Splits the given data into left and right parts (using findSlopePivotX) 525 * and returns the line estimates for them. 526 * 527 * Sometimes, however (especially in fragment shader cases) the data is 528 * in fact not segmented, but a straight line. This function attempts to 529 * detect if this the case, and if so, sets left.offset = right.offset and 530 * left.slope = 0, meaning essentially that the initial "flat" part of the 531 * data has zero width. 532 *//*--------------------------------------------------------------------*/ 533 static SegmentedEstimator computeSegmentedEstimator (const vector<Vec2>& data) 534 { 535 const float pivotX = findSlopePivotX(data); 536 vector<Vec2> leftData; 537 vector<Vec2> rightData; 538 539 for (int i = 0; i < (int)data.size(); i++) 540 { 541 if (data[i].x() < pivotX) 542 leftData.push_back(data[i]); 543 else 544 rightData.push_back(data[i]); 545 } 546 547 { 548 const gls::LineParameters leftLine = gls::theilSenLinearRegression(leftData); 549 const gls::LineParameters rightLine = gls::theilSenLinearRegression(rightData); 550 551 if (numDistinctX(leftData) < 2 || leftLine.coefficient > rightLine.coefficient*0.5f) 552 { 553 // Left data doesn't seem credible; assume the data is just a single line. 554 const gls::LineParameters entireLine = gls::theilSenLinearRegression(data); 555 return SegmentedEstimator(gls::LineParameters(entireLine.offset, 0.0f), entireLine, -std::numeric_limits<float>::infinity()); 556 } 557 else 558 return SegmentedEstimator(leftLine, rightLine, pivotX); 559 } 560 } 561 562 OperatorPerformanceCase::OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description, 563 CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage) 564 : tcu::TestCase (testCtx, tcu::NODETYPE_PERFORMANCE, name, description) 565 , m_renderCtx (renderCtx) 566 , m_caseType (caseType) 567 , m_numMeasurementsPerWorkload (getIterationCountOrDefault(m_testCtx.getCommandLine(), DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD)) 568 , m_numWorkloads (numWorkloads) 569 , m_workloadNdx (-1) 570 , m_workloadMeasurementNdx (-1) 571 , m_state (STATE_LAST) 572 , m_measureProgramNdx (-1) 573 , m_initialCalibrationStorage (initialCalibrationStorage) 574 , m_viewportWidth (caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getWidth()) 575 , m_viewportHeight (caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getHeight()) 576 , m_gridSizeX (caseType == CASETYPE_FRAGMENT ? 1 : 100) 577 , m_gridSizeY (caseType == CASETYPE_FRAGMENT ? 1 : 100) 578 { 579 DE_ASSERT(m_numWorkloads > 0); 580 } 581 582 OperatorPerformanceCase::~OperatorPerformanceCase (void) 583 { 584 if (!m_attribBuffers.empty()) 585 { 586 m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]); 587 m_attribBuffers.clear(); 588 } 589 } 590 591 static void logRenderTargetInfo (TestLog& log, const tcu::RenderTarget& renderTarget) 592 { 593 log << TestLog::Section("RenderTarget", "Render target") 594 << TestLog::Message << "size: " << renderTarget.getWidth() << "x" << renderTarget.getHeight() << TestLog::EndMessage 595 << TestLog::Message << "bits:" 596 << " R" << renderTarget.getPixelFormat().redBits 597 << " G" << renderTarget.getPixelFormat().greenBits 598 << " B" << renderTarget.getPixelFormat().blueBits 599 << " A" << renderTarget.getPixelFormat().alphaBits 600 << " D" << renderTarget.getDepthBits() 601 << " S" << renderTarget.getStencilBits() 602 << TestLog::EndMessage; 603 604 if (renderTarget.getNumSamples() != 0) 605 log << TestLog::Message << renderTarget.getNumSamples() << "x MSAA" << TestLog::EndMessage; 606 else 607 log << TestLog::Message << "No MSAA" << TestLog::EndMessage; 608 609 log << TestLog::EndSection; 610 } 611 612 vector<Vec2> OperatorPerformanceCase::getWorkloadMedianDataPoints (int progNdx) const 613 { 614 const vector<WorkloadRecord>& records = m_workloadRecords[progNdx]; 615 vector<Vec2> result; 616 617 for (int i = 0; i < (int)records.size(); i++) 618 result.push_back(Vec2((float)records[i].workloadSize, records[i].getMedianTime())); 619 620 return result; 621 } 622 623 void OperatorPerformanceCase::prepareProgram (int progNdx) 624 { 625 DE_ASSERT(progNdx < (int)m_programs.size()); 626 DE_ASSERT(m_programData.size() == m_programs.size()); 627 628 const glw::Functions& gl = m_renderCtx.getFunctions(); 629 const ShaderProgram& program = *m_programs[progNdx]; 630 631 vector<AttribSpec> attributes = m_programData[progNdx].attributes; 632 633 attributes.push_back(AttribSpec("a_position", 634 Vec4(-1.0f, -1.0f, 0.0f, 1.0f), 635 Vec4( 1.0f, -1.0f, 0.0f, 1.0f), 636 Vec4(-1.0f, 1.0f, 0.0f, 1.0f), 637 Vec4( 1.0f, 1.0f, 0.0f, 1.0f))); 638 639 DE_ASSERT(program.isOk()); 640 641 // Generate vertices. 642 if (!m_attribBuffers.empty()) 643 gl.deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]); 644 m_attribBuffers.resize(attributes.size(), 0); 645 gl.genBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]); 646 GLU_EXPECT_NO_ERROR(gl.getError(), "glGenBuffers()"); 647 648 for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++) 649 { 650 std::vector<float> vertices; 651 generateVertices(vertices, m_gridSizeX, m_gridSizeY, attributes[attribNdx]); 652 653 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]); 654 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertices.size()*sizeof(float)), &vertices[0], GL_STATIC_DRAW); 655 GLU_EXPECT_NO_ERROR(gl.getError(), "Upload buffer data"); 656 } 657 658 // Setup attribute bindings. 659 for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++) 660 { 661 int location = gl.getAttribLocation(program.getProgram(), attributes[attribNdx].name.c_str()); 662 663 if (location >= 0) 664 { 665 gl.enableVertexAttribArray(location); 666 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]); 667 gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); 668 } 669 } 670 GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex input state"); 671 672 gl.useProgram(program.getProgram()); 673 setGeneralUniforms(program.getProgram()); 674 gl.viewport(0, 0, m_viewportWidth, m_viewportHeight); 675 } 676 677 void OperatorPerformanceCase::prepareWorkload (int progNdx, int workload) 678 { 679 setWorkloadSizeUniform(m_programs[progNdx]->getProgram(), workload); 680 render(m_calibrator.getCallCount()); 681 } 682 683 void OperatorPerformanceCase::prepareNextRound (void) 684 { 685 DE_ASSERT(m_state == STATE_CALIBRATING || 686 m_state == STATE_FIND_HIGH_WORKLOAD || 687 m_state == STATE_MEASURING); 688 689 TestLog& log = m_testCtx.getLog(); 690 691 if (m_state == STATE_CALIBRATING && m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED) 692 { 693 m_measureProgramNdx = 0; 694 m_state = STATE_FIND_HIGH_WORKLOAD; 695 } 696 697 if (m_state == STATE_CALIBRATING) 698 prepareWorkload(0, 1); 699 else if (m_state == STATE_FIND_HIGH_WORKLOAD) 700 { 701 vector<WorkloadRecord>& records = m_workloadRecordsFindHigh[m_measureProgramNdx]; 702 703 if (records.empty() || records.back().getMedianTime() < 2.0f*records[0].getMedianTime()) 704 { 705 int workloadSize; 706 707 if (records.empty()) 708 workloadSize = 1; 709 else 710 { 711 workloadSize = records.back().workloadSize*2; 712 713 if (workloadSize > MAX_WORKLOAD_SIZE) 714 { 715 log << TestLog::Message << "Even workload size " << records.back().workloadSize 716 << " doesn't give high enough frame time for program " << m_measureProgramNdx 717 << ". Can't get sensible result." << TestLog::EndMessage; 718 MEASUREMENT_FAIL(); 719 } 720 } 721 722 records.push_back(WorkloadRecord(workloadSize)); 723 prepareWorkload(0, workloadSize); 724 m_workloadMeasurementNdx = 0; 725 } 726 else 727 { 728 m_highWorkloadSizes[m_measureProgramNdx] = records.back().workloadSize; 729 m_measureProgramNdx++; 730 731 if (m_measureProgramNdx >= (int)m_programs.size()) 732 { 733 m_state = STATE_MEASURING; 734 m_workloadNdx = -1; 735 m_measureProgramNdx = 0; 736 } 737 738 prepareProgram(m_measureProgramNdx); 739 prepareNextRound(); 740 } 741 } 742 else 743 { 744 m_workloadNdx++; 745 746 if (m_workloadNdx < m_numWorkloads) 747 { 748 DE_ASSERT(m_numWorkloads > 1); 749 const int highWorkload = m_highWorkloadSizes[m_measureProgramNdx]; 750 const int workload = highWorkload > m_numWorkloads ? 751 1 + m_workloadNdx*(highWorkload-1)/(m_numWorkloads-1) : 752 1 + m_workloadNdx; 753 754 prepareWorkload(m_measureProgramNdx, workload); 755 756 m_workloadMeasurementNdx = 0; 757 758 m_workloadRecords[m_measureProgramNdx].push_back(WorkloadRecord(workload)); 759 } 760 else 761 { 762 m_measureProgramNdx++; 763 764 if (m_measureProgramNdx < (int)m_programs.size()) 765 { 766 m_workloadNdx = -1; 767 m_workloadMeasurementNdx = 0; 768 prepareProgram(m_measureProgramNdx); 769 prepareNextRound(); 770 } 771 else 772 m_state = STATE_REPORTING; 773 } 774 } 775 } 776 777 void OperatorPerformanceCase::init (void) 778 { 779 TestLog& log = m_testCtx.getLog(); 780 const glw::Functions& gl = m_renderCtx.getFunctions(); 781 782 // Validate that we have sane grid and viewport setup. 783 DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256)); 784 TCU_CHECK(de::inRange(m_viewportWidth, 1, m_renderCtx.getRenderTarget().getWidth()) && 785 de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight())); 786 787 logRenderTargetInfo(log, m_renderCtx.getRenderTarget()); 788 789 log << TestLog::Message << "Using additive blending." << TestLog::EndMessage; 790 gl.enable(GL_BLEND); 791 gl.blendEquation(GL_FUNC_ADD); 792 gl.blendFunc(GL_ONE, GL_ONE); 793 794 // Generate programs. 795 DE_ASSERT(m_programs.empty()); 796 m_programData = generateProgramData(); 797 DE_ASSERT(!m_programData.empty()); 798 799 for (int progNdx = 0; progNdx < (int)m_programData.size(); progNdx++) 800 { 801 const string& vert = m_programData[progNdx].vertShaderSource; 802 const string& frag = m_programData[progNdx].fragShaderSource; 803 804 m_programs.push_back(SharedPtr<ShaderProgram>(new ShaderProgram(m_renderCtx, glu::makeVtxFragSources(vert, frag)))); 805 806 if (!m_programs.back()->isOk()) 807 { 808 log << *m_programs.back(); 809 TCU_FAIL("Compile failed"); 810 } 811 } 812 813 // Log all programs. 814 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++) 815 log << TestLog::Section("Program" + de::toString(progNdx), "Program " + de::toString(progNdx)) 816 << TestLog::Message << m_programData[progNdx].description << TestLog::EndMessage 817 << *m_programs[progNdx] 818 << TestLog::EndSection; 819 820 m_highWorkloadSizes.resize(m_programData.size()); 821 m_workloadRecordsFindHigh.resize(m_programData.size()); 822 m_workloadRecords.resize(m_programData.size()); 823 824 m_calibrator.clear(CalibratorParameters(m_initialCalibrationStorage->initialNumCalls, 10 /* calibrate iteration frames */, 2000.0f /* calibrate iteration shortcut threshold (ms) */, 16 /* max calibrate iterations */, 825 1000.0f/30.0f /* frame time (ms) */, 1000.0f/60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */)); 826 m_state = STATE_CALIBRATING; 827 828 prepareProgram(0); 829 prepareNextRound(); 830 } 831 832 void OperatorPerformanceCase::deinit (void) 833 { 834 if (!m_attribBuffers.empty()) 835 { 836 m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]); 837 m_attribBuffers.clear(); 838 } 839 840 m_programs.clear(); 841 } 842 843 void OperatorPerformanceCase::render (int numDrawCalls) 844 { 845 const glw::Functions& gl = m_renderCtx.getFunctions(); 846 const int numVertices = getNumVertices(m_gridSizeX, m_gridSizeY); 847 848 for (int callNdx = 0; callNdx < numDrawCalls; callNdx++) 849 gl.drawArrays(GL_TRIANGLES, 0, numVertices); 850 851 glu::readPixels(m_renderCtx, 0, 0, tcu::Surface(1, 1).getAccess()); // \note Serves as a more reliable replacement for glFinish(). 852 } 853 854 deUint64 OperatorPerformanceCase::renderAndMeasure (int numDrawCalls) 855 { 856 const deUint64 startTime = deGetMicroseconds(); 857 render(numDrawCalls); 858 return deGetMicroseconds() - startTime; 859 } 860 861 void OperatorPerformanceCase::adjustAndLogGridAndViewport (void) 862 { 863 TestLog& log = m_testCtx.getLog(); 864 865 // If call count is just 1, and the target frame time still wasn't reached, reduce grid or viewport size. 866 if (m_calibrator.getCallCount() == 1) 867 { 868 const gls::MeasureState& calibratorMeasure = m_calibrator.getMeasureState(); 869 const float drawCallTime = (float)calibratorMeasure.getTotalTime() / (float)calibratorMeasure.frameTimes.size(); 870 const float targetDrawCallTime = m_calibrator.getParameters().targetFrameTimeUs; 871 const float targetRatio = targetDrawCallTime / drawCallTime; 872 873 if (targetRatio < 0.95f) 874 { 875 // Reduce grid or viewport size assuming draw call time scales proportionally. 876 if (m_caseType == CASETYPE_VERTEX) 877 { 878 const float targetRatioSqrt = deFloatSqrt(targetRatio); 879 m_gridSizeX = (int)(targetRatioSqrt * (float)m_gridSizeX); 880 m_gridSizeY = (int)(targetRatioSqrt * (float)m_gridSizeY); 881 TCU_CHECK_MSG(m_gridSizeX >= 1 && m_gridSizeY >= 1, "Can't decrease grid size enough to achieve low-enough draw times"); 882 log << TestLog::Message << "Note: triangle grid size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage; 883 } 884 else 885 { 886 const float targetRatioSqrt = deFloatSqrt(targetRatio); 887 m_viewportWidth = (int)(targetRatioSqrt * (float)m_viewportWidth); 888 m_viewportHeight = (int)(targetRatioSqrt * (float)m_viewportHeight); 889 TCU_CHECK_MSG(m_viewportWidth >= 1 && m_viewportHeight >= 1, "Can't decrease viewport size enough to achieve low-enough draw times"); 890 log << TestLog::Message << "Note: viewport size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage; 891 } 892 } 893 } 894 895 prepareProgram(0); 896 897 // Log grid and viewport sizes. 898 log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage; 899 log << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage; 900 } 901 902 OperatorPerformanceCase::IterateResult OperatorPerformanceCase::iterate (void) 903 { 904 const TheilSenCalibrator::State calibratorState = m_calibrator.getState(); 905 906 if (calibratorState != TheilSenCalibrator::STATE_FINISHED) 907 { 908 if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS) 909 m_calibrator.recomputeParameters(); 910 else if (calibratorState == TheilSenCalibrator::STATE_MEASURE) 911 m_calibrator.recordIteration(renderAndMeasure(m_calibrator.getCallCount())); 912 else 913 DE_ASSERT(false); 914 915 if (m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED) 916 { 917 logCalibrationInfo(m_testCtx.getLog(), m_calibrator); 918 adjustAndLogGridAndViewport(); 919 prepareNextRound(); 920 m_initialCalibrationStorage->initialNumCalls = m_calibrator.getCallCount(); 921 } 922 } 923 else if (m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING) 924 { 925 if (m_workloadMeasurementNdx < m_numMeasurementsPerWorkload) 926 { 927 vector<WorkloadRecord>& records = m_state == STATE_FIND_HIGH_WORKLOAD ? m_workloadRecordsFindHigh[m_measureProgramNdx] : m_workloadRecords[m_measureProgramNdx]; 928 records.back().addFrameTime((float)renderAndMeasure(m_calibrator.getCallCount())); 929 m_workloadMeasurementNdx++; 930 } 931 else 932 prepareNextRound(); 933 } 934 else 935 { 936 DE_ASSERT(m_state == STATE_REPORTING); 937 938 TestLog& log = m_testCtx.getLog(); 939 const int drawCallCount = m_calibrator.getCallCount(); 940 941 { 942 // Compute per-program estimators for measurements. 943 vector<SegmentedEstimator> estimators; 944 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++) 945 estimators.push_back(computeSegmentedEstimator(getWorkloadMedianDataPoints(progNdx))); 946 947 // Log measurements and their estimators for all programs. 948 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++) 949 { 950 const SegmentedEstimator& estimator = estimators[progNdx]; 951 const string progNdxStr = de::toString(progNdx); 952 vector<WorkloadRecord> records = m_workloadRecords[progNdx]; 953 std::sort(records.begin(), records.end()); 954 955 { 956 const tcu::ScopedLogSection section(log, 957 "Program" + progNdxStr + "Measurements", 958 "Measurements for program " + progNdxStr); 959 960 // Sample list of individual frame times. 961 962 log << TestLog::SampleList("Program" + progNdxStr + "IndividualFrameTimes", "Individual frame times") 963 << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 964 << TestLog::ValueInfo("FrameTime", "Frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 965 << TestLog::EndSampleInfo; 966 967 for (int i = 0; i < (int)records.size(); i++) 968 for (int j = 0; j < (int)records[i].frameTimes.size(); j++) 969 log << TestLog::Sample << records[i].workloadSize << records[i].frameTimes[j] << TestLog::EndSample; 970 971 log << TestLog::EndSampleList; 972 973 // Sample list of median frame times. 974 975 log << TestLog::SampleList("Program" + progNdxStr + "MedianFrameTimes", "Median frame times") 976 << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 977 << TestLog::ValueInfo("MedianFrameTime", "Median frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 978 << TestLog::EndSampleInfo; 979 980 for (int i = 0; i < (int)records.size(); i++) 981 log << TestLog::Sample << records[i].workloadSize << records[i].getMedianTime() << TestLog::EndSample; 982 983 log << TestLog::EndSampleList; 984 985 log << TestLog::Float("Program" + progNdxStr + "WorkloadCostEstimate", "Workload cost estimate", "us / workload", QP_KEY_TAG_TIME, estimator.right.coefficient); 986 987 if (estimator.pivotX > -std::numeric_limits<float>::infinity()) 988 log << TestLog::Message << "Note: the data points with x coordinate greater than or equal to " << estimator.pivotX 989 << " seem to form a rising line, and the rest of data points seem to form a near-horizontal line" << TestLog::EndMessage 990 << TestLog::Message << "Note: the left line is estimated to be " << lineParamsString(estimator.left) 991 << " and the right line " << lineParamsString(estimator.right) << TestLog::EndMessage; 992 else 993 log << TestLog::Message << "Note: the data seem to form a single line: " << lineParamsString(estimator.right) << TestLog::EndMessage; 994 } 995 } 996 997 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++) 998 { 999 if (estimators[progNdx].right.coefficient <= 0.0f) 1000 { 1001 log << TestLog::Message << "Slope of measurements for program " << progNdx << " isn't positive. Can't get sensible result." << TestLog::EndMessage; 1002 MEASUREMENT_FAIL(); 1003 } 1004 } 1005 1006 // \note For each estimator, .right.coefficient is the increase in draw time (in microseconds) when 1007 // incrementing shader workload size by 1, when D draw calls are done, with a vertex/fragment count 1008 // of R. 1009 // 1010 // The measurements of any single program can't tell us the final result (time of single operation), 1011 // so we use computeSingleOperationTime to compute it from multiple programs' measurements in a 1012 // subclass-defined manner. 1013 // 1014 // After that, microseconds per operation can be calculated as singleOperationTime / (D * R). 1015 1016 { 1017 vector<float> perProgramSlopes; 1018 for (int i = 0; i < (int)m_programs.size(); i++) 1019 perProgramSlopes.push_back(estimators[i].right.coefficient); 1020 1021 logSingleOperationCalculationInfo(); 1022 1023 const float maxSlope = *std::max_element(perProgramSlopes.begin(), perProgramSlopes.end()); 1024 const float usecsPerFramePerOp = computeSingleOperationTime(perProgramSlopes); 1025 const int vertexOrFragmentCount = m_caseType == CASETYPE_VERTEX ? 1026 getNumVertices(m_gridSizeX, m_gridSizeY) : 1027 m_viewportWidth*m_viewportHeight; 1028 const double usecsPerDrawCallPerOp = usecsPerFramePerOp / (double)drawCallCount; 1029 const double usecsPerSingleOp = usecsPerDrawCallPerOp / (double)vertexOrFragmentCount; 1030 const double megaOpsPerSecond = (double)(drawCallCount*vertexOrFragmentCount) / usecsPerFramePerOp; 1031 const int numFreeOps = de::max(0, (int)deFloatFloor(intersectionX(estimators[0].left, 1032 LineParameters(estimators[0].right.offset, 1033 usecsPerFramePerOp)))); 1034 1035 log << TestLog::Integer("VertexOrFragmentCount", 1036 "R = " + string(m_caseType == CASETYPE_VERTEX ? "Vertex" : "Fragment") + " count", 1037 "", QP_KEY_TAG_NONE, vertexOrFragmentCount) 1038 1039 << TestLog::Integer("DrawCallsPerFrame", "D = Draw calls per frame", "", QP_KEY_TAG_NONE, drawCallCount) 1040 1041 << TestLog::Integer("VerticesOrFragmentsPerFrame", 1042 "R*D = " + string(m_caseType == CASETYPE_VERTEX ? "Vertices" : "Fragments") + " per frame", 1043 "", QP_KEY_TAG_NONE, vertexOrFragmentCount*drawCallCount) 1044 1045 << TestLog::Float("TimePerFramePerOp", 1046 "Estimated cost of R*D " + string(m_caseType == CASETYPE_VERTEX ? "vertices" : "fragments") 1047 + " (i.e. one frame) with one shader operation", 1048 "us", QP_KEY_TAG_TIME, (float)usecsPerFramePerOp) 1049 1050 << TestLog::Float("TimePerDrawcallPerOp", 1051 "Estimated cost of one draw call with one shader operation", 1052 "us", QP_KEY_TAG_TIME, (float)usecsPerDrawCallPerOp) 1053 1054 << TestLog::Float("TimePerSingleOp", 1055 "Estimated cost of a single shader operation", 1056 "us", QP_KEY_TAG_TIME, (float)usecsPerSingleOp); 1057 1058 // \note Sometimes, when the operation is free or very cheap, it can happen that the shader with the operation runs, 1059 // for some reason, a bit faster than the shader without the operation, and thus we get a negative result. The 1060 // following threshold values for accepting a negative or almost-zero result are rather quick and dirty. 1061 if (usecsPerFramePerOp <= -0.1f*maxSlope) 1062 { 1063 log << TestLog::Message << "Got strongly negative result." << TestLog::EndMessage; 1064 MEASUREMENT_FAIL(); 1065 } 1066 else if (usecsPerFramePerOp <= 0.001*maxSlope) 1067 { 1068 log << TestLog::Message << "Cost of operation seems to be approximately zero." << TestLog::EndMessage; 1069 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1070 } 1071 else 1072 { 1073 log << TestLog::Float("OpsPerSecond", 1074 "Operations per second", 1075 "Million/s", QP_KEY_TAG_PERFORMANCE, (float)megaOpsPerSecond) 1076 1077 << TestLog::Integer("NumFreeOps", 1078 "Estimated number of \"free\" operations", 1079 "", QP_KEY_TAG_PERFORMANCE, numFreeOps); 1080 1081 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString((float)megaOpsPerSecond, 2).c_str()); 1082 } 1083 1084 m_state = STATE_FINISHED; 1085 } 1086 } 1087 1088 return STOP; 1089 } 1090 1091 return CONTINUE; 1092 } 1093 1094 // Binary operator case. 1095 class BinaryOpCase : public OperatorPerformanceCase 1096 { 1097 public: 1098 BinaryOpCase (Context& context, const char* name, const char* description, const char* op, 1099 glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration); 1100 1101 protected: 1102 vector<ProgramContext> generateProgramData (void) const; 1103 void setGeneralUniforms (deUint32 program) const; 1104 void setWorkloadSizeUniform (deUint32 program, int numOperations) const; 1105 float computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const; 1106 void logSingleOperationCalculationInfo (void) const; 1107 1108 private: 1109 enum ProgramID 1110 { 1111 // \note 0-based sequential numbering is relevant, because these are also used as vector indices. 1112 // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow. 1113 PROGRAM_WITH_BIGGER_LOOP = 0, 1114 PROGRAM_WITH_SMALLER_LOOP, 1115 1116 PROGRAM_LAST 1117 }; 1118 1119 ProgramContext generateSingleProgramData (ProgramID) const; 1120 1121 const string m_op; 1122 const glu::DataType m_type; 1123 const glu::Precision m_precision; 1124 const bool m_useSwizzle; 1125 }; 1126 1127 BinaryOpCase::BinaryOpCase (Context& context, const char* name, const char* description, const char* op, 1128 glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration) 1129 : OperatorPerformanceCase (context.getTestContext(), context.getRenderContext(), name, description, 1130 isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration) 1131 , m_op (op) 1132 , m_type (type) 1133 , m_precision (precision) 1134 , m_useSwizzle (useSwizzle) 1135 { 1136 } 1137 1138 BinaryOpCase::ProgramContext BinaryOpCase::generateSingleProgramData (ProgramID programID) const 1139 { 1140 DE_ASSERT(glu::isDataTypeFloatOrVec(m_type) || glu::isDataTypeIntOrIVec(m_type)); 1141 1142 const bool isVertexCase = m_caseType == CASETYPE_VERTEX; 1143 const char* const precision = glu::getPrecisionName(m_precision); 1144 const char* const inputPrecision = glu::isDataTypeIntOrIVec(m_type) && m_precision == glu::PRECISION_LOWP ? "mediump" : precision; 1145 const char* const typeName = getDataTypeName(m_type); 1146 1147 std::ostringstream vtx; 1148 std::ostringstream frag; 1149 std::ostringstream& op = isVertexCase ? vtx : frag; 1150 1151 vtx << "#version 300 es\n"; 1152 frag << "#version 300 es\n" 1153 << "layout (location = 0) out mediump vec4 o_color;\n"; 1154 1155 // Attributes. 1156 vtx << "in highp vec4 a_position;\n"; 1157 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) 1158 vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n"; 1159 1160 if (isVertexCase) 1161 { 1162 vtx << "out mediump vec4 v_color;\n"; 1163 frag << "in mediump vec4 v_color;\n"; 1164 } 1165 else 1166 { 1167 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) 1168 { 1169 vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n"; 1170 frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n"; 1171 } 1172 } 1173 1174 op << "uniform mediump int u_numLoopIterations;\n"; 1175 if (isVertexCase) 1176 op << "uniform mediump float u_zero;\n"; 1177 1178 vtx << "\n"; 1179 vtx << "void main()\n"; 1180 vtx << "{\n"; 1181 1182 if (!isVertexCase) 1183 vtx << "\tgl_Position = a_position;\n"; 1184 1185 frag << "\n"; 1186 frag << "void main()\n"; 1187 frag << "{\n"; 1188 1189 // Expression inputs. 1190 const char* const prefix = isVertexCase ? "a_" : "v_"; 1191 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) 1192 { 1193 const int inSize = getDataTypeScalarSize(m_type); 1194 const bool isInt = de::inRange<int>(m_type, TYPE_INT, TYPE_INT_VEC4); 1195 const bool cast = isInt || (!m_useSwizzle && m_type != TYPE_FLOAT_VEC4); 1196 1197 op << "\t" << precision << " " << typeName << " in" << i << " = "; 1198 1199 if (cast) 1200 op << typeName << "("; 1201 1202 op << prefix << "in" << i; 1203 1204 if (m_useSwizzle) 1205 op << "." << s_swizzles[i % DE_LENGTH_OF_ARRAY(s_swizzles)][inSize-1]; 1206 1207 if (cast) 1208 op << ")"; 1209 1210 op << ";\n"; 1211 } 1212 1213 // Operation accumulation variables. 1214 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) 1215 { 1216 op << "\t" << precision << " " << typeName << " acc" << i << "a" << " = in" << i+0 << ";\n"; 1217 op << "\t" << precision << " " << typeName << " acc" << i << "b" << " = in" << i+1 << ";\n"; 1218 } 1219 1220 // Loop, with expressions in it. 1221 op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n"; 1222 op << "\t{\n"; 1223 { 1224 const int unrollAmount = programID == PROGRAM_WITH_SMALLER_LOOP ? BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT : BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT; 1225 for (int unrollNdx = 0; unrollNdx < unrollAmount; unrollNdx++) 1226 { 1227 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) 1228 { 1229 if (i > 0 || unrollNdx > 0) 1230 op << "\n"; 1231 op << "\t\tacc" << i << "a = acc" << i << "b " << m_op << " acc" << i << "a" << ";\n"; 1232 op << "\t\tacc" << i << "b = acc" << i << "a " << m_op << " acc" << i << "b" << ";\n"; 1233 } 1234 } 1235 } 1236 op << "\t}\n"; 1237 op << "\n"; 1238 1239 // Result variable (sum of accumulation variables). 1240 op << "\t" << precision << " " << typeName << " res ="; 1241 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) 1242 op << (i > 0 ? " "+m_op : "") << " acc" << i << "b"; 1243 op << ";\n"; 1244 1245 // Convert to color. 1246 op << "\tmediump vec4 color = "; 1247 if (m_type == TYPE_FLOAT_VEC4) 1248 op << "res"; 1249 else 1250 { 1251 int size = getDataTypeScalarSize(m_type); 1252 op << "vec4(res"; 1253 1254 for (int i = size; i < 4; i++) 1255 op << ", " << (i == 3 ? "1.0" : "0.0"); 1256 1257 op << ")"; 1258 } 1259 op << ";\n"; 1260 op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n"; 1261 1262 if (isVertexCase) 1263 { 1264 vtx << " gl_Position = a_position + u_zero*color;\n"; 1265 frag << " o_color = v_color;\n"; 1266 } 1267 else 1268 { 1269 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) 1270 vtx << " v_in" << i << " = a_in" << i << ";\n"; 1271 } 1272 1273 vtx << "}\n"; 1274 frag << "}\n"; 1275 1276 { 1277 vector<AttribSpec> attributes; 1278 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) 1279 attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(), 1280 Vec4(2.0f, 2.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4), 1281 Vec4(1.0f, 2.0f, 1.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4), 1282 Vec4(2.0f, 1.0f, 2.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4), 1283 Vec4(1.0f, 1.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4))); 1284 1285 { 1286 string description = "This is the program with the "; 1287 1288 description += programID == PROGRAM_WITH_SMALLER_LOOP ? "smaller" 1289 : programID == PROGRAM_WITH_BIGGER_LOOP ? "bigger" 1290 : DE_NULL; 1291 1292 description += " loop.\n" 1293 "Note: workload size for this program means the number of loop iterations."; 1294 1295 return ProgramContext(vtx.str(), frag.str(), attributes, description); 1296 } 1297 } 1298 } 1299 1300 vector<BinaryOpCase::ProgramContext> BinaryOpCase::generateProgramData (void) const 1301 { 1302 vector<ProgramContext> progData; 1303 for (int i = 0; i < PROGRAM_LAST; i++) 1304 progData.push_back(generateSingleProgramData((ProgramID)i)); 1305 return progData; 1306 } 1307 1308 void BinaryOpCase::setGeneralUniforms (deUint32 program) const 1309 { 1310 const glw::Functions& gl = m_renderCtx.getFunctions(); 1311 gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f); 1312 } 1313 1314 void BinaryOpCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const 1315 { 1316 const glw::Functions& gl = m_renderCtx.getFunctions(); 1317 gl.uniform1i(gl.getUniformLocation(program, "u_numLoopIterations"), numLoopIterations); 1318 } 1319 1320 float BinaryOpCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const 1321 { 1322 DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST); 1323 1324 const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; 1325 const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT; 1326 const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT; 1327 DE_STATIC_ASSERT(numOpsInsideLoopInBigProgram > numOpsInsideLoopInSmallProgram); 1328 const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram; 1329 const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_BIGGER_LOOP] - perProgramOperationCosts[PROGRAM_WITH_SMALLER_LOOP]; 1330 1331 return programOperationCostDiff / (float)opDiff; 1332 } 1333 1334 void BinaryOpCase::logSingleOperationCalculationInfo (void) const 1335 { 1336 const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; 1337 const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT; 1338 const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT; 1339 const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram; 1340 const char* const opName = m_op == "+" ? "addition" 1341 : m_op == "-" ? "subtraction" 1342 : m_op == "*" ? "multiplication" 1343 : m_op == "/" ? "division" 1344 : DE_NULL; 1345 DE_ASSERT(opName != DE_NULL); 1346 1347 m_testCtx.getLog() << TestLog::Message << "Note: the bigger program contains " << opDiff << " more " 1348 << opName << " operations in one loop iteration than the small program; " 1349 << "cost of one operation is calculated as (cost_of_bigger_workload - cost_of_smaller_workload) / " << opDiff 1350 << TestLog::EndMessage; 1351 } 1352 1353 // Built-in function case. 1354 class FunctionCase : public OperatorPerformanceCase 1355 { 1356 public: 1357 enum 1358 { 1359 MAX_PARAMS = 3 1360 }; 1361 1362 FunctionCase (Context& context, 1363 const char* name, 1364 const char* description, 1365 const char* func, 1366 glu::DataType returnType, 1367 const glu::DataType paramTypes[MAX_PARAMS], 1368 const Vec4& attribute, 1369 int modifyParamNdx, //!< Add a compile-time constant (2.0) to the parameter at this index. This is ignored if negative. 1370 bool useNearlyConstantINputs, //!< Function inputs shouldn't be much bigger than 'attribute'. 1371 glu::Precision precision, 1372 bool isVertex, 1373 const InitialCalibrationStorage& initialCalibration); 1374 1375 protected: 1376 vector<ProgramContext> generateProgramData (void) const; 1377 void setGeneralUniforms (deUint32 program) const; 1378 void setWorkloadSizeUniform (deUint32 program, int numOperations) const; 1379 float computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const; 1380 void logSingleOperationCalculationInfo (void) const; 1381 1382 private: 1383 enum ProgramID 1384 { 1385 // \note 0-based sequential numbering is relevant, because these are also used as vector indices. 1386 // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow. 1387 PROGRAM_WITH_FUNCTION_CALLS = 0, 1388 PROGRAM_WITHOUT_FUNCTION_CALLS, 1389 1390 PROGRAM_LAST 1391 }; 1392 1393 //! Forms a "sum" expression from aExpr and bExpr; for booleans, this is "equal(a,b)", otherwise actual sum. 1394 static string sumExpr (const string& aExpr, const string& bExpr, glu::DataType type); 1395 //! Forms an expression used to increment an input value in the shader. If type is boolean, this is just 1396 //! baseExpr; otherwise, baseExpr is modified by multiplication or division by a loop index, 1397 //! to prevent simple compiler optimizations. See m_useNearlyConstantInputs for more explanation. 1398 static string incrementExpr (const string& baseExpr, glu::DataType type, bool divide); 1399 1400 ProgramContext generateSingleProgramData (ProgramID) const; 1401 1402 const string m_func; 1403 const glu::DataType m_returnType; 1404 glu::DataType m_paramTypes[MAX_PARAMS]; 1405 // \note m_modifyParamNdx, if not negative, specifies the index of the parameter to which a 1406 // compile-time constant (2.0) is added. This is a quick and dirty way to deal with 1407 // functions like clamp or smoothstep that require that a certain parameter is 1408 // greater than a certain other parameter. 1409 const int m_modifyParamNdx; 1410 // \note m_useNearlyConstantInputs determines whether the inputs given to the function 1411 // should increase (w.r.t m_attribute) only by very small amounts. This is relevant 1412 // for functions like asin, which requires its inputs to be in a specific range. 1413 // In practice, this affects whether expressions used to increment the input 1414 // variables use division instead of multiplication; normally, multiplication is used, 1415 // but it's hard to keep the increments very small that way, and division shouldn't 1416 // be the default, since for many functions (probably not asin, luckily), division 1417 // is too heavy and dominates time-wise. 1418 const bool m_useNearlyConstantInputs; 1419 const Vec4 m_attribute; 1420 const glu::Precision m_precision; 1421 }; 1422 1423 FunctionCase::FunctionCase (Context& context, 1424 const char* name, 1425 const char* description, 1426 const char* func, 1427 glu::DataType returnType, 1428 const glu::DataType paramTypes[MAX_PARAMS], 1429 const Vec4& attribute, 1430 int modifyParamNdx, 1431 bool useNearlyConstantInputs, 1432 glu::Precision precision, 1433 bool isVertex, 1434 const InitialCalibrationStorage& initialCalibration) 1435 : OperatorPerformanceCase (context.getTestContext(), context.getRenderContext(), name, description, 1436 isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration) 1437 , m_func (func) 1438 , m_returnType (returnType) 1439 , m_modifyParamNdx (modifyParamNdx) 1440 , m_useNearlyConstantInputs (useNearlyConstantInputs) 1441 , m_attribute (attribute) 1442 , m_precision (precision) 1443 { 1444 for (int i = 0; i < MAX_PARAMS; i++) 1445 m_paramTypes[i] = paramTypes[i]; 1446 } 1447 1448 string FunctionCase::sumExpr (const string& aExpr, const string& bExpr, glu::DataType type) 1449 { 1450 if (glu::isDataTypeBoolOrBVec(type)) 1451 { 1452 if (type == glu::TYPE_BOOL) 1453 return "(" + aExpr + " == " + bExpr + ")"; 1454 else 1455 return "equal(" + aExpr + ", " + bExpr + ")"; 1456 } 1457 else 1458 return "(" + aExpr + " + " + bExpr + ")"; 1459 } 1460 1461 string FunctionCase::incrementExpr (const string& baseExpr, glu::DataType type, bool divide) 1462 { 1463 const string mulOrDiv = divide ? "/" : "*"; 1464 1465 return glu::isDataTypeBoolOrBVec(type) ? baseExpr 1466 : glu::isDataTypeIntOrIVec(type) ? "(" + baseExpr + mulOrDiv + "(i+1))" 1467 : "(" + baseExpr + mulOrDiv + "float(i+1))"; 1468 } 1469 1470 FunctionCase::ProgramContext FunctionCase::generateSingleProgramData (ProgramID programID) const 1471 { 1472 const bool isVertexCase = m_caseType == CASETYPE_VERTEX; 1473 const char* const precision = glu::getPrecisionName(m_precision); 1474 const char* const returnTypeName = getDataTypeName(m_returnType); 1475 const string returnPrecisionMaybe = glu::isDataTypeBoolOrBVec(m_returnType) ? "" : string() + precision + " "; 1476 const char* inputPrecision = DE_NULL; 1477 const bool isMatrixReturn = isDataTypeMatrix(m_returnType); 1478 int numParams = 0; 1479 const char* paramTypeNames[MAX_PARAMS]; 1480 string paramPrecisionsMaybe[MAX_PARAMS]; 1481 1482 for (int i = 0; i < MAX_PARAMS; i++) 1483 { 1484 paramTypeNames[i] = getDataTypeName(m_paramTypes[i]); 1485 paramPrecisionsMaybe[i] = glu::isDataTypeBoolOrBVec(m_paramTypes[i]) ? "" : string() + precision + " "; 1486 1487 if (inputPrecision == DE_NULL && isDataTypeIntOrIVec(m_paramTypes[i]) && m_precision == glu::PRECISION_LOWP) 1488 inputPrecision = "mediump"; 1489 1490 if (m_paramTypes[i] != TYPE_INVALID) 1491 numParams = i+1; 1492 } 1493 1494 DE_ASSERT(numParams > 0); 1495 1496 if (inputPrecision == DE_NULL) 1497 inputPrecision = precision; 1498 1499 int numAttributes = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS + numParams - 1; 1500 std::ostringstream vtx; 1501 std::ostringstream frag; 1502 std::ostringstream& op = isVertexCase ? vtx : frag; 1503 1504 vtx << "#version 300 es\n"; 1505 frag << "#version 300 es\n" 1506 << "layout (location = 0) out mediump vec4 o_color;\n"; 1507 1508 // Attributes. 1509 vtx << "in highp vec4 a_position;\n"; 1510 for (int i = 0; i < numAttributes; i++) 1511 vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n"; 1512 1513 if (isVertexCase) 1514 { 1515 vtx << "out mediump vec4 v_color;\n"; 1516 frag << "in mediump vec4 v_color;\n"; 1517 } 1518 else 1519 { 1520 for (int i = 0; i < numAttributes; i++) 1521 { 1522 vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n"; 1523 frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n"; 1524 } 1525 } 1526 1527 op << "uniform mediump int u_numLoopIterations;\n"; 1528 if (isVertexCase) 1529 op << "uniform mediump float u_zero;\n"; 1530 1531 for (int paramNdx = 0; paramNdx < numParams; paramNdx++) 1532 op << "uniform " << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " u_inc" << (char)('A'+paramNdx) << ";\n"; 1533 1534 vtx << "\n"; 1535 vtx << "void main()\n"; 1536 vtx << "{\n"; 1537 1538 if (!isVertexCase) 1539 vtx << "\tgl_Position = a_position;\n"; 1540 1541 frag << "\n"; 1542 frag << "void main()\n"; 1543 frag << "{\n"; 1544 1545 // Function call input and return value accumulation variables. 1546 { 1547 const char* const inPrefix = isVertexCase ? "a_" : "v_"; 1548 1549 for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++) 1550 { 1551 for (int paramNdx = 0; paramNdx < numParams; paramNdx++) 1552 { 1553 const glu::DataType paramType = m_paramTypes[paramNdx]; 1554 const bool mustCast = paramType != glu::TYPE_FLOAT_VEC4; 1555 1556 op << "\t" << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " in" << calcNdx << (char)('a'+paramNdx) << " = "; 1557 1558 if (mustCast) 1559 op << paramTypeNames[paramNdx] << "("; 1560 1561 if (glu::isDataTypeMatrix(paramType)) 1562 { 1563 static const char* const swizzles[3] = { "x", "xy", "xyz" }; 1564 const int numRows = glu::getDataTypeMatrixNumRows(paramType); 1565 const int numCols = glu::getDataTypeMatrixNumColumns(paramType); 1566 const string swizzle = numRows < 4 ? string() + "." + swizzles[numRows-1] : ""; 1567 1568 for (int i = 0; i < numCols; i++) 1569 op << (i > 0 ? ", " : "") << inPrefix << "in" << calcNdx+paramNdx << swizzle; 1570 } 1571 else 1572 { 1573 op << inPrefix << "in" << calcNdx+paramNdx; 1574 1575 if (paramNdx == m_modifyParamNdx) 1576 { 1577 DE_ASSERT(glu::isDataTypeFloatOrVec(paramType)); 1578 op << " + 2.0"; 1579 } 1580 } 1581 1582 if (mustCast) 1583 op << ")"; 1584 1585 op << ";\n"; 1586 } 1587 1588 op << "\t" << returnPrecisionMaybe << returnTypeName << " res" << calcNdx << " = " << returnTypeName << "(0);\n"; 1589 } 1590 } 1591 1592 // Loop with expressions in it. 1593 op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n"; 1594 op << "\t{\n"; 1595 for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++) 1596 { 1597 if (calcNdx > 0) 1598 op << "\n"; 1599 1600 op << "\t\t{\n"; 1601 1602 for (int inputNdx = 0; inputNdx < numParams; inputNdx++) 1603 { 1604 const string inputName = "in" + de::toString(calcNdx) + (char)('a'+inputNdx); 1605 const string incName = string() + "u_inc" + (char)('A'+inputNdx); 1606 const string incExpr = incrementExpr(incName, m_paramTypes[inputNdx], m_useNearlyConstantInputs); 1607 1608 op << "\t\t\t" << inputName << " = " << sumExpr(inputName, incExpr, m_paramTypes[inputNdx]) << ";\n"; 1609 } 1610 1611 op << "\t\t\t" << returnPrecisionMaybe << returnTypeName << " eval" << calcNdx << " = "; 1612 1613 if (programID == PROGRAM_WITH_FUNCTION_CALLS) 1614 { 1615 op << m_func << "("; 1616 1617 for (int paramNdx = 0; paramNdx < numParams; paramNdx++) 1618 { 1619 if (paramNdx > 0) 1620 op << ", "; 1621 1622 op << "in" << calcNdx << (char)('a'+paramNdx); 1623 } 1624 1625 op << ")"; 1626 } 1627 else 1628 { 1629 DE_ASSERT(programID == PROGRAM_WITHOUT_FUNCTION_CALLS); 1630 op << returnTypeName << "(1)"; 1631 } 1632 1633 op << ";\n"; 1634 1635 { 1636 const string resName = "res" + de::toString(calcNdx); 1637 const string evalName = "eval" + de::toString(calcNdx); 1638 const string incExpr = incrementExpr(evalName, m_returnType, m_useNearlyConstantInputs); 1639 1640 op << "\t\t\tres" << calcNdx << " = " << sumExpr(resName, incExpr, m_returnType) << ";\n"; 1641 } 1642 1643 op << "\t\t}\n"; 1644 } 1645 op << "\t}\n"; 1646 op << "\n"; 1647 1648 // Result variables. 1649 for (int inputNdx = 0; inputNdx < numParams; inputNdx++) 1650 { 1651 op << "\t" << paramPrecisionsMaybe[inputNdx] << paramTypeNames[inputNdx] << " sumIn" << (char)('A'+inputNdx) << " = "; 1652 { 1653 string expr = string() + "in0" + (char)('a'+inputNdx); 1654 for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) 1655 expr = sumExpr(expr, string() + "in" + de::toString(i) + (char)('a'+inputNdx), m_paramTypes[inputNdx]); 1656 op << expr; 1657 } 1658 op << ";\n"; 1659 } 1660 1661 op << "\t" << returnPrecisionMaybe << returnTypeName << " sumRes = "; 1662 { 1663 string expr = "res0"; 1664 for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) 1665 expr = sumExpr(expr, "res" + de::toString(i), m_returnType); 1666 op << expr; 1667 } 1668 op << ";\n"; 1669 1670 { 1671 glu::DataType finalResultDataType = glu::TYPE_LAST; 1672 1673 if (glu::isDataTypeMatrix(m_returnType)) 1674 { 1675 finalResultDataType = m_returnType; 1676 1677 op << "\t" << precision << " " << returnTypeName << " finalRes = "; 1678 1679 for (int inputNdx = 0; inputNdx < numParams; inputNdx++) 1680 { 1681 DE_ASSERT(m_paramTypes[inputNdx] == m_returnType); 1682 op << "sumIn" << (char)('A'+inputNdx) << " + "; 1683 } 1684 op << "sumRes;\n"; 1685 } 1686 else 1687 { 1688 int numFinalResComponents = glu::getDataTypeScalarSize(m_returnType); 1689 for (int inputNdx = 0; inputNdx < numParams; inputNdx++) 1690 numFinalResComponents = de::max(numFinalResComponents, glu::getDataTypeScalarSize(m_paramTypes[inputNdx])); 1691 1692 finalResultDataType = getDataTypeFloatOrVec(numFinalResComponents); 1693 1694 { 1695 const string finalResType = glu::getDataTypeName(finalResultDataType); 1696 op << "\t" << precision << " " << finalResType << " finalRes = "; 1697 for (int inputNdx = 0; inputNdx < numParams; inputNdx++) 1698 op << finalResType << "(sumIn" << (char)('A'+inputNdx) << ") + "; 1699 op << finalResType << "(sumRes);\n"; 1700 } 1701 } 1702 1703 // Convert to color. 1704 op << "\tmediump vec4 color = "; 1705 if (finalResultDataType == TYPE_FLOAT_VEC4) 1706 op << "finalRes"; 1707 else 1708 { 1709 int size = isMatrixReturn ? getDataTypeMatrixNumRows(finalResultDataType) : getDataTypeScalarSize(finalResultDataType); 1710 1711 op << "vec4("; 1712 1713 if (isMatrixReturn) 1714 { 1715 for (int i = 0; i < getDataTypeMatrixNumColumns(finalResultDataType); i++) 1716 { 1717 if (i > 0) 1718 op << " + "; 1719 op << "finalRes[" << i << "]"; 1720 } 1721 } 1722 else 1723 op << "finalRes"; 1724 1725 for (int i = size; i < 4; i++) 1726 op << ", " << (i == 3 ? "1.0" : "0.0"); 1727 1728 op << ")"; 1729 } 1730 op << ";\n"; 1731 op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n"; 1732 1733 if (isVertexCase) 1734 { 1735 vtx << " gl_Position = a_position + u_zero*color;\n"; 1736 frag << " o_color = v_color;\n"; 1737 } 1738 else 1739 { 1740 for (int i = 0; i < numAttributes; i++) 1741 vtx << " v_in" << i << " = a_in" << i << ";\n"; 1742 } 1743 1744 vtx << "}\n"; 1745 frag << "}\n"; 1746 } 1747 1748 { 1749 vector<AttribSpec> attributes; 1750 for (int i = 0; i < numAttributes; i++) 1751 attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(), 1752 m_attribute.swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4), 1753 m_attribute.swizzle((i+1)%4, (i+2)%4, (i+3)%4, (i+0)%4), 1754 m_attribute.swizzle((i+2)%4, (i+3)%4, (i+0)%4, (i+1)%4), 1755 m_attribute.swizzle((i+3)%4, (i+0)%4, (i+1)%4, (i+2)%4))); 1756 1757 { 1758 string description = "This is the program "; 1759 1760 description += programID == PROGRAM_WITHOUT_FUNCTION_CALLS ? "without" 1761 : programID == PROGRAM_WITH_FUNCTION_CALLS ? "with" 1762 : DE_NULL; 1763 1764 description += " '" + m_func + "' function calls.\n" 1765 "Note: workload size for this program means the number of loop iterations."; 1766 1767 return ProgramContext(vtx.str(), frag.str(), attributes, description); 1768 } 1769 } 1770 } 1771 1772 vector<FunctionCase::ProgramContext> FunctionCase::generateProgramData (void) const 1773 { 1774 vector<ProgramContext> progData; 1775 for (int i = 0; i < PROGRAM_LAST; i++) 1776 progData.push_back(generateSingleProgramData((ProgramID)i)); 1777 return progData; 1778 } 1779 1780 void FunctionCase::setGeneralUniforms (deUint32 program) const 1781 { 1782 const glw::Functions& gl = m_renderCtx.getFunctions(); 1783 1784 gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f); 1785 1786 for (int paramNdx = 0; paramNdx < MAX_PARAMS; paramNdx++) 1787 { 1788 if (m_paramTypes[paramNdx] != glu::TYPE_INVALID) 1789 { 1790 const glu::DataType paramType = m_paramTypes[paramNdx]; 1791 const int scalarSize = glu::getDataTypeScalarSize(paramType); 1792 const int location = gl.getUniformLocation(program, (string() + "u_inc" + (char)('A'+paramNdx)).c_str()); 1793 1794 if (glu::isDataTypeFloatOrVec(paramType)) 1795 { 1796 float values[4]; 1797 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++) 1798 values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary small values. 1799 uniformNfv(gl, scalarSize, location, 1, &values[0]); 1800 } 1801 else if (glu::isDataTypeIntOrIVec(paramType)) 1802 { 1803 int values[4]; 1804 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++) 1805 values[i] = paramNdx*100 + i; // Arbitrary values. 1806 uniformNiv(gl, scalarSize, location, 1, &values[0]); 1807 } 1808 else if (glu::isDataTypeBoolOrBVec(paramType)) 1809 { 1810 int values[4]; 1811 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++) 1812 values[i] = (paramNdx >> i) & 1; // Arbitrary values. 1813 uniformNiv(gl, scalarSize, location, 1, &values[0]); 1814 } 1815 else if (glu::isDataTypeMatrix(paramType)) 1816 { 1817 const int size = glu::getDataTypeMatrixNumRows(paramType); 1818 DE_ASSERT(size == glu::getDataTypeMatrixNumColumns(paramType)); 1819 float values[4*4]; 1820 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++) 1821 values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary values. 1822 uniformMatrixNfv(gl, size, location, 1, &values[0]); 1823 } 1824 else 1825 DE_ASSERT(false); 1826 } 1827 } 1828 } 1829 1830 void FunctionCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const 1831 { 1832 const glw::Functions& gl = m_renderCtx.getFunctions(); 1833 const int loc = gl.getUniformLocation(program, "u_numLoopIterations"); 1834 1835 gl.uniform1i(loc, numLoopIterations); 1836 } 1837 1838 float FunctionCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const 1839 { 1840 DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST); 1841 const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; 1842 const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_FUNCTION_CALLS] - perProgramOperationCosts[PROGRAM_WITHOUT_FUNCTION_CALLS]; 1843 1844 return programOperationCostDiff / (float)numFunctionCalls; 1845 } 1846 1847 void FunctionCase::logSingleOperationCalculationInfo (void) const 1848 { 1849 const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; 1850 1851 m_testCtx.getLog() << TestLog::Message << "Note: program " << (int)PROGRAM_WITH_FUNCTION_CALLS << " contains " 1852 << numFunctionCalls << " calls to '" << m_func << "' in one loop iteration; " 1853 << "cost of one operation is calculated as " 1854 << "(cost_of_workload_with_calls - cost_of_workload_without_calls) / " << numFunctionCalls << TestLog::EndMessage; 1855 } 1856 1857 } // anonymous 1858 1859 ShaderOperatorTests::ShaderOperatorTests (Context& context) 1860 : TestCaseGroup(context, "operator", "Operator Performance Tests") 1861 { 1862 } 1863 1864 ShaderOperatorTests::~ShaderOperatorTests (void) 1865 { 1866 } 1867 1868 void ShaderOperatorTests::init (void) 1869 { 1870 // Binary operator cases 1871 1872 static const DataType binaryOpTypes[] = 1873 { 1874 TYPE_FLOAT, 1875 TYPE_FLOAT_VEC2, 1876 TYPE_FLOAT_VEC3, 1877 TYPE_FLOAT_VEC4, 1878 TYPE_INT, 1879 TYPE_INT_VEC2, 1880 TYPE_INT_VEC3, 1881 TYPE_INT_VEC4, 1882 }; 1883 static const Precision precisions[] = 1884 { 1885 PRECISION_LOWP, 1886 PRECISION_MEDIUMP, 1887 PRECISION_HIGHP 1888 }; 1889 static const struct 1890 { 1891 const char* name; 1892 const char* op; 1893 bool swizzle; 1894 } binaryOps[] = 1895 { 1896 { "add", "+", false }, 1897 { "sub", "-", true }, 1898 { "mul", "*", false }, 1899 { "div", "/", true } 1900 }; 1901 1902 tcu::TestCaseGroup* const binaryOpsGroup = new tcu::TestCaseGroup(m_testCtx, "binary_operator", "Binary Operator Performance Tests"); 1903 addChild(binaryOpsGroup); 1904 1905 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(binaryOps); opNdx++) 1906 { 1907 tcu::TestCaseGroup* const opGroup = new tcu::TestCaseGroup(m_testCtx, binaryOps[opNdx].name, ""); 1908 binaryOpsGroup->addChild(opGroup); 1909 1910 for (int isFrag = 0; isFrag <= 1; isFrag++) 1911 { 1912 const BinaryOpCase::InitialCalibrationStorage shaderGroupCalibrationStorage (new BinaryOpCase::InitialCalibration); 1913 const bool isVertex = isFrag == 0; 1914 tcu::TestCaseGroup* const shaderGroup = new tcu::TestCaseGroup(m_testCtx, isVertex ? "vertex" : "fragment", ""); 1915 opGroup->addChild(shaderGroup); 1916 1917 for (int typeNdx = 0; typeNdx < DE_LENGTH_OF_ARRAY(binaryOpTypes); typeNdx++) 1918 { 1919 for (int precNdx = 0; precNdx < DE_LENGTH_OF_ARRAY(precisions); precNdx++) 1920 { 1921 const DataType type = binaryOpTypes[typeNdx]; 1922 const Precision precision = precisions[precNdx]; 1923 const char* const op = binaryOps[opNdx].op; 1924 const bool useSwizzle = binaryOps[opNdx].swizzle; 1925 std::ostringstream name; 1926 1927 name << getPrecisionName(precision) << "_" << getDataTypeName(type); 1928 1929 shaderGroup->addChild(new BinaryOpCase(m_context, name.str().c_str(), "", op, type, precision, useSwizzle, isVertex, shaderGroupCalibrationStorage)); 1930 } 1931 } 1932 } 1933 } 1934 1935 // Built-in function cases. 1936 1937 // Non-specific (i.e. includes gentypes) parameter types for the functions. 1938 enum ValueType 1939 { 1940 VALUE_NONE = 0, 1941 VALUE_FLOAT = (1<<0), // float scalar 1942 VALUE_FLOAT_VEC = (1<<1), // float vector 1943 VALUE_FLOAT_VEC34 = (1<<2), // float vector of size 3 or 4 1944 VALUE_FLOAT_GENTYPE = (1<<3), // float scalar/vector 1945 VALUE_VEC3 = (1<<4), // vec3 only 1946 VALUE_VEC4 = (1<<5), // vec4 only 1947 VALUE_MATRIX = (1<<6), // matrix 1948 VALUE_BOOL = (1<<7), // boolean scalar 1949 VALUE_BOOL_VEC = (1<<8), // boolean vector 1950 VALUE_BOOL_VEC4 = (1<<9), // bvec4 only 1951 VALUE_BOOL_GENTYPE = (1<<10), // boolean scalar/vector 1952 VALUE_INT = (1<<11), // int scalar 1953 VALUE_INT_VEC = (1<<12), // int vector 1954 VALUE_INT_VEC4 = (1<<13), // ivec4 only 1955 VALUE_INT_GENTYPE = (1<<14), // int scalar/vector 1956 1957 // Shorthands. 1958 N = VALUE_NONE, 1959 F = VALUE_FLOAT, 1960 FV = VALUE_FLOAT_VEC, 1961 VL = VALUE_FLOAT_VEC34, // L for "large" 1962 GT = VALUE_FLOAT_GENTYPE, 1963 V3 = VALUE_VEC3, 1964 V4 = VALUE_VEC4, 1965 M = VALUE_MATRIX, 1966 B = VALUE_BOOL, 1967 BV = VALUE_BOOL_VEC, 1968 B4 = VALUE_BOOL_VEC4, 1969 BGT = VALUE_BOOL_GENTYPE, 1970 I = VALUE_INT, 1971 IV = VALUE_INT_VEC, 1972 I4 = VALUE_INT_VEC4, 1973 IGT = VALUE_INT_GENTYPE, 1974 1975 VALUE_ANY_FLOAT = VALUE_FLOAT | VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_VEC3 | VALUE_VEC4 | VALUE_FLOAT_VEC34, 1976 VALUE_ANY_INT = VALUE_INT | VALUE_INT_VEC | VALUE_INT_GENTYPE | VALUE_INT_VEC4, 1977 VALUE_ANY_BOOL = VALUE_BOOL | VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE | VALUE_BOOL_VEC4, 1978 1979 VALUE_ANY_GENTYPE = VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_FLOAT_VEC34 | 1980 VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE | 1981 VALUE_INT_VEC | VALUE_INT_GENTYPE | 1982 VALUE_MATRIX 1983 }; 1984 enum PrecisionMask 1985 { 1986 PRECMASK_NA = 0, //!< Precision not applicable (booleans) 1987 PRECMASK_LOWP = (1<<PRECISION_LOWP), 1988 PRECMASK_MEDIUMP = (1<<PRECISION_MEDIUMP), 1989 PRECMASK_HIGHP = (1<<PRECISION_HIGHP), 1990 1991 PRECMASK_MEDIUMP_HIGHP = (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP), 1992 PRECMASK_ALL = (1<<PRECISION_LOWP) | (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP) 1993 }; 1994 1995 static const DataType floatTypes[] = 1996 { 1997 TYPE_FLOAT, 1998 TYPE_FLOAT_VEC2, 1999 TYPE_FLOAT_VEC3, 2000 TYPE_FLOAT_VEC4 2001 }; 2002 static const DataType intTypes[] = 2003 { 2004 TYPE_INT, 2005 TYPE_INT_VEC2, 2006 TYPE_INT_VEC3, 2007 TYPE_INT_VEC4 2008 }; 2009 static const DataType boolTypes[] = 2010 { 2011 TYPE_BOOL, 2012 TYPE_BOOL_VEC2, 2013 TYPE_BOOL_VEC3, 2014 TYPE_BOOL_VEC4 2015 }; 2016 static const DataType matrixTypes[] = 2017 { 2018 TYPE_FLOAT_MAT2, 2019 TYPE_FLOAT_MAT3, 2020 TYPE_FLOAT_MAT4 2021 }; 2022 2023 tcu::TestCaseGroup* const angleAndTrigonometryGroup = new tcu::TestCaseGroup(m_testCtx, "angle_and_trigonometry", "Built-In Angle and Trigonometry Function Performance Tests"); 2024 tcu::TestCaseGroup* const exponentialGroup = new tcu::TestCaseGroup(m_testCtx, "exponential", "Built-In Exponential Function Performance Tests"); 2025 tcu::TestCaseGroup* const commonFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "common_functions", "Built-In Common Function Performance Tests"); 2026 tcu::TestCaseGroup* const geometricFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "geometric", "Built-In Geometric Function Performance Tests"); 2027 tcu::TestCaseGroup* const matrixFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "matrix", "Built-In Matrix Function Performance Tests"); 2028 tcu::TestCaseGroup* const floatCompareGroup = new tcu::TestCaseGroup(m_testCtx, "float_compare", "Built-In Floating Point Comparison Function Performance Tests"); 2029 tcu::TestCaseGroup* const intCompareGroup = new tcu::TestCaseGroup(m_testCtx, "int_compare", "Built-In Integer Comparison Function Performance Tests"); 2030 tcu::TestCaseGroup* const boolCompareGroup = new tcu::TestCaseGroup(m_testCtx, "bool_compare", "Built-In Boolean Comparison Function Performance Tests"); 2031 2032 addChild(angleAndTrigonometryGroup); 2033 addChild(exponentialGroup); 2034 addChild(commonFunctionsGroup); 2035 addChild(geometricFunctionsGroup); 2036 addChild(matrixFunctionsGroup); 2037 addChild(floatCompareGroup); 2038 addChild(intCompareGroup); 2039 addChild(boolCompareGroup); 2040 2041 // Some attributes to be used as parameters for the functions. 2042 const Vec4 attrPos = Vec4( 2.3f, 1.9f, 0.8f, 0.7f); 2043 const Vec4 attrNegPos = Vec4(-1.3f, 2.5f, -3.5f, 4.3f); 2044 const Vec4 attrSmall = Vec4(-0.9f, 0.8f, -0.4f, 0.2f); 2045 const Vec4 attrBig = Vec4( 1.3f, 2.4f, 3.0f, 4.0f); 2046 2047 // \todo The following functions and variants are missing, and should be added in the future: 2048 // - modf (has an output parameter, not currently handled by test code) 2049 // - functions with uint/uvec* return or parameter types 2050 // - non-matrix <-> matrix functions (outerProduct etc.) 2051 // \note Remember to update test spec when these are added. 2052 2053 // Function name, return type and parameter type information; also, what attribute should be used in the test. 2054 // \note Different versions of the same function (i.e. with the same group name) can be defined by putting them successively in this array. 2055 // \note In order to reduce case count and thus total execution time, we don't test all input type combinations for every function. 2056 static const struct 2057 { 2058 tcu::TestCaseGroup* parentGroup; 2059 const char* groupName; 2060 const char* func; 2061 const ValueType types[FunctionCase::MAX_PARAMS + 1]; // Return type and parameter types, in that order. 2062 const Vec4& attribute; 2063 int modifyParamNdx; 2064 bool useNearlyConstantInputs; 2065 bool booleanCase; 2066 PrecisionMask precMask; 2067 } functionCaseGroups[] = 2068 { 2069 { angleAndTrigonometryGroup, "radians", "radians", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2070 { angleAndTrigonometryGroup, "degrees", "degrees", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2071 { angleAndTrigonometryGroup, "sin", "sin", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2072 { angleAndTrigonometryGroup, "cos", "cos", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2073 { angleAndTrigonometryGroup, "tan", "tan", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2074 { angleAndTrigonometryGroup, "asin", "asin", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL }, 2075 { angleAndTrigonometryGroup, "acos", "acos", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL }, 2076 { angleAndTrigonometryGroup, "atan2", "atan", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2077 { angleAndTrigonometryGroup, "atan", "atan", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2078 { angleAndTrigonometryGroup, "sinh", "sinh", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2079 { angleAndTrigonometryGroup, "cosh", "cosh", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2080 { angleAndTrigonometryGroup, "tanh", "tanh", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2081 { angleAndTrigonometryGroup, "asinh", "asinh", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2082 { angleAndTrigonometryGroup, "acosh", "acosh", { F, F, N, N }, attrBig, -1, false, false, PRECMASK_ALL }, 2083 { angleAndTrigonometryGroup, "atanh", "atanh", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL }, 2084 2085 { exponentialGroup, "pow", "pow", { F, F, F, N }, attrPos, -1, false, false, PRECMASK_ALL }, 2086 { exponentialGroup, "exp", "exp", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2087 { exponentialGroup, "log", "log", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL }, 2088 { exponentialGroup, "exp2", "exp2", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2089 { exponentialGroup, "log2", "log2", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL }, 2090 { exponentialGroup, "sqrt", "sqrt", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL }, 2091 { exponentialGroup, "inversesqrt", "inversesqrt", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL }, 2092 2093 { commonFunctionsGroup, "abs", "abs", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2094 { commonFunctionsGroup, "abs", "abs", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2095 { commonFunctionsGroup, "sign", "sign", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2096 { commonFunctionsGroup, "sign", "sign", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2097 { commonFunctionsGroup, "floor", "floor", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2098 { commonFunctionsGroup, "floor", "floor", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2099 { commonFunctionsGroup, "trunc", "trunc", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2100 { commonFunctionsGroup, "trunc", "trunc", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2101 { commonFunctionsGroup, "round", "round", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2102 { commonFunctionsGroup, "round", "round", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2103 { commonFunctionsGroup, "roundEven", "roundEven", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2104 { commonFunctionsGroup, "roundEven", "roundEven", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2105 { commonFunctionsGroup, "ceil", "ceil", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2106 { commonFunctionsGroup, "ceil", "ceil", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2107 { commonFunctionsGroup, "fract", "fract", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2108 { commonFunctionsGroup, "fract", "fract", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2109 { commonFunctionsGroup, "mod", "mod", { GT, GT, GT, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2110 { commonFunctionsGroup, "min", "min", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2111 { commonFunctionsGroup, "min", "min", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2112 { commonFunctionsGroup, "max", "max", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2113 { commonFunctionsGroup, "max", "max", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2114 { commonFunctionsGroup, "clamp", "clamp", { F, F, F, F }, attrSmall, 2, false, false, PRECMASK_MEDIUMP_HIGHP }, 2115 { commonFunctionsGroup, "clamp", "clamp", { V4, V4, V4, V4 }, attrSmall, 2, false, false, PRECMASK_ALL }, 2116 { commonFunctionsGroup, "mix", "mix", { F, F, F, F }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2117 { commonFunctionsGroup, "mix", "mix", { V4, V4, V4, V4 }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2118 { commonFunctionsGroup, "mix", "mix", { F, F, F, B }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2119 { commonFunctionsGroup, "mix", "mix", { V4, V4, V4, B4 }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2120 { commonFunctionsGroup, "step", "step", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2121 { commonFunctionsGroup, "step", "step", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2122 { commonFunctionsGroup, "smoothstep", "smoothstep", { F, F, F, F }, attrSmall, 1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2123 { commonFunctionsGroup, "smoothstep", "smoothstep", { V4, V4, V4, V4 }, attrSmall, 1, false, false, PRECMASK_ALL }, 2124 { commonFunctionsGroup, "isnan", "isnan", { B, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2125 { commonFunctionsGroup, "isnan", "isnan", { B4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2126 { commonFunctionsGroup, "isinf", "isinf", { B, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2127 { commonFunctionsGroup, "isinf", "isinf", { B4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2128 { commonFunctionsGroup, "floatBitsToInt", "floatBitsToInt", { I, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2129 { commonFunctionsGroup, "floatBitsToInt", "floatBitsToInt", { I4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2130 { commonFunctionsGroup, "intBitsToFloat", "intBitsToFloat", { F, I, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, 2131 { commonFunctionsGroup, "intBitsToFloat", "intBitsToFloat", { V4, I4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2132 2133 { geometricFunctionsGroup, "length", "length", { F, VL, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2134 { geometricFunctionsGroup, "distance", "distance", { F, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2135 { geometricFunctionsGroup, "dot", "dot", { F, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2136 { geometricFunctionsGroup, "cross", "cross", { V3, V3, V3, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2137 { geometricFunctionsGroup, "normalize", "normalize", { VL, VL, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2138 { geometricFunctionsGroup, "faceforward", "faceforward", { VL, VL, VL, VL }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2139 { geometricFunctionsGroup, "reflect", "reflect", { VL, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2140 { geometricFunctionsGroup, "refract", "refract", { VL, VL, VL, F }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2141 2142 { matrixFunctionsGroup, "matrixCompMult", "matrixCompMult", { M, M, M, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2143 { matrixFunctionsGroup, "transpose", "transpose", { M, M, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2144 { matrixFunctionsGroup, "inverse", "inverse", { M, M, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2145 2146 { floatCompareGroup, "lessThan", "lessThan", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2147 { floatCompareGroup, "lessThanEqual", "lessThanEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2148 { floatCompareGroup, "greaterThan", "greaterThan", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2149 { floatCompareGroup, "greaterThanEqual", "greaterThanEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2150 { floatCompareGroup, "equal", "equal", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2151 { floatCompareGroup, "notEqual", "notEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2152 2153 { intCompareGroup, "lessThan", "lessThan", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2154 { intCompareGroup, "lessThanEqual", "lessThanEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2155 { intCompareGroup, "greaterThan", "greaterThan", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2156 { intCompareGroup, "greaterThanEqual", "greaterThanEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2157 { intCompareGroup, "equal", "equal", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2158 { intCompareGroup, "notEqual", "notEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, 2159 2160 { boolCompareGroup, "equal", "equal", { BV, BV, BV, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }, 2161 { boolCompareGroup, "notEqual", "notEqual", { BV, BV, BV, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }, 2162 { boolCompareGroup, "any", "any", { B, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }, 2163 { boolCompareGroup, "all", "all", { B, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }, 2164 { boolCompareGroup, "not", "not", { BV, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP } 2165 }; 2166 2167 // vertexSubGroup and fragmentSubGroup are the groups where the various vertex/fragment cases of a single function are added. 2168 // \note These are defined here so that different versions (different entries in the functionCaseGroups array) of the same function can be put in the same group. 2169 tcu::TestCaseGroup* vertexSubGroup = DE_NULL; 2170 tcu::TestCaseGroup* fragmentSubGroup = DE_NULL; 2171 FunctionCase::InitialCalibrationStorage vertexSubGroupCalibrationStorage; 2172 FunctionCase::InitialCalibrationStorage fragmentSubGroupCalibrationStorage; 2173 for (int funcNdx = 0; funcNdx < DE_LENGTH_OF_ARRAY(functionCaseGroups); funcNdx++) 2174 { 2175 tcu::TestCaseGroup* const parentGroup = functionCaseGroups[funcNdx].parentGroup; 2176 const char* const groupName = functionCaseGroups[funcNdx].groupName; 2177 const char* const groupFunc = functionCaseGroups[funcNdx].func; 2178 const ValueType* const funcTypes = functionCaseGroups[funcNdx].types; 2179 const Vec4& groupAttribute = functionCaseGroups[funcNdx].attribute; 2180 const int modifyParamNdx = functionCaseGroups[funcNdx].modifyParamNdx; 2181 const bool useNearlyConstantInputs = functionCaseGroups[funcNdx].useNearlyConstantInputs; 2182 const bool booleanCase = functionCaseGroups[funcNdx].booleanCase; 2183 const PrecisionMask precMask = functionCaseGroups[funcNdx].precMask; 2184 2185 // If this is a new function and not just a different version of the previously defined function, create a new group. 2186 if (funcNdx == 0 || parentGroup != functionCaseGroups[funcNdx-1].parentGroup || string(groupName) != functionCaseGroups[funcNdx-1].groupName) 2187 { 2188 tcu::TestCaseGroup* const funcGroup = new tcu::TestCaseGroup(m_testCtx, groupName, ""); 2189 functionCaseGroups[funcNdx].parentGroup->addChild(funcGroup); 2190 2191 vertexSubGroup = new tcu::TestCaseGroup(m_testCtx, "vertex", ""); 2192 fragmentSubGroup = new tcu::TestCaseGroup(m_testCtx, "fragment", ""); 2193 2194 funcGroup->addChild(vertexSubGroup); 2195 funcGroup->addChild(fragmentSubGroup); 2196 2197 vertexSubGroupCalibrationStorage = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration); 2198 fragmentSubGroupCalibrationStorage = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration); 2199 } 2200 2201 DE_ASSERT(vertexSubGroup != DE_NULL); 2202 DE_ASSERT(fragmentSubGroup != DE_NULL); 2203 2204 // Find the type size range of parameters (e.g. from 2 to 4 in case of vectors). 2205 int genTypeFirstSize = 1; 2206 int genTypeLastSize = 1; 2207 2208 // Find the first return value or parameter with a gentype (if any) and set sizes accordingly. 2209 // \note Assumes only matching sizes gentypes are to be found, e.g. no "genType func (vec param)" 2210 for (int i = 0; i < FunctionCase::MAX_PARAMS + 1 && genTypeLastSize == 1; i++) 2211 { 2212 switch (funcTypes[i]) 2213 { 2214 case VALUE_FLOAT_VEC: 2215 case VALUE_BOOL_VEC: 2216 case VALUE_INT_VEC: // \note Fall-through. 2217 genTypeFirstSize = 2; 2218 genTypeLastSize = 4; 2219 break; 2220 case VALUE_FLOAT_VEC34: 2221 genTypeFirstSize = 3; 2222 genTypeLastSize = 4; 2223 break; 2224 case VALUE_FLOAT_GENTYPE: 2225 case VALUE_BOOL_GENTYPE: 2226 case VALUE_INT_GENTYPE: // \note Fall-through. 2227 genTypeFirstSize = 1; 2228 genTypeLastSize = 4; 2229 break; 2230 case VALUE_MATRIX: 2231 genTypeFirstSize = 2; 2232 genTypeLastSize = 4; 2233 break; 2234 // If none of the above, keep looping. 2235 default: 2236 break; 2237 } 2238 } 2239 2240 // Create a case for each possible size of the gentype. 2241 for (int curSize = genTypeFirstSize; curSize <= genTypeLastSize; curSize++) 2242 { 2243 // Determine specific types for return value and the parameters, according to curSize. Non-gentypes not affected by curSize. 2244 DataType types[FunctionCase::MAX_PARAMS + 1]; 2245 for (int i = 0; i < FunctionCase::MAX_PARAMS + 1; i++) 2246 { 2247 if (funcTypes[i] == VALUE_NONE) 2248 types[i] = TYPE_INVALID; 2249 else 2250 { 2251 int isFloat = funcTypes[i] & VALUE_ANY_FLOAT; 2252 int isBool = funcTypes[i] & VALUE_ANY_BOOL; 2253 int isInt = funcTypes[i] & VALUE_ANY_INT; 2254 int isMat = funcTypes[i] == VALUE_MATRIX; 2255 int inSize = (funcTypes[i] & VALUE_ANY_GENTYPE) ? curSize 2256 : funcTypes[i] == VALUE_VEC3 ? 3 2257 : funcTypes[i] == VALUE_VEC4 ? 4 2258 : funcTypes[i] == VALUE_BOOL_VEC4 ? 4 2259 : funcTypes[i] == VALUE_INT_VEC4 ? 4 2260 : 1; 2261 int typeArrayNdx = isMat ? inSize - 2 : inSize - 1; // \note No matrices of size 1. 2262 2263 types[i] = isFloat ? floatTypes[typeArrayNdx] 2264 : isBool ? boolTypes[typeArrayNdx] 2265 : isInt ? intTypes[typeArrayNdx] 2266 : isMat ? matrixTypes[typeArrayNdx] 2267 : TYPE_LAST; 2268 } 2269 2270 DE_ASSERT(types[i] != TYPE_LAST); 2271 } 2272 2273 // Array for just the parameter types. 2274 DataType paramTypes[FunctionCase::MAX_PARAMS]; 2275 for (int i = 0; i < FunctionCase::MAX_PARAMS; i++) 2276 paramTypes[i] = types[i+1]; 2277 2278 for (int prec = (int)PRECISION_LOWP; prec < (int)PRECISION_LAST; prec++) 2279 { 2280 if ((precMask & (1 << prec)) == 0) 2281 continue; 2282 2283 const string precisionPrefix = booleanCase ? "" : (string(getPrecisionName((Precision)prec)) + "_"); 2284 std::ostringstream caseName; 2285 2286 caseName << precisionPrefix; 2287 2288 // Write the name of each distinct parameter data type into the test case name. 2289 for (int i = 1; i < FunctionCase::MAX_PARAMS + 1 && types[i] != TYPE_INVALID; i++) 2290 { 2291 if (i == 1 || types[i] != types[i-1]) 2292 { 2293 if (i > 1) 2294 caseName << "_"; 2295 2296 caseName << getDataTypeName(types[i]); 2297 } 2298 } 2299 2300 for (int fragI = 0; fragI <= 1; fragI++) 2301 { 2302 const bool vert = fragI == 0; 2303 tcu::TestCaseGroup* const group = vert ? vertexSubGroup : fragmentSubGroup; 2304 group->addChild (new FunctionCase(m_context, 2305 caseName.str().c_str(), "", 2306 groupFunc, 2307 types[0], paramTypes, 2308 groupAttribute, modifyParamNdx, useNearlyConstantInputs, 2309 (Precision)prec, vert, 2310 vert ? vertexSubGroupCalibrationStorage : fragmentSubGroupCalibrationStorage)); 2311 } 2312 } 2313 } 2314 } 2315 } 2316 2317 } // Performance 2318 } // gles3 2319 } // deqp 2320