Home | History | Annotate | Download | only in skpbench
      1 /*
      2  * Copyright 2016 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "GpuTimer.h"
      9 #include "GrContextFactory.h"
     10 #include "SkGr.h"
     11 
     12 #include "SkCanvas.h"
     13 #include "SkCommonFlags.h"
     14 #include "SkCommonFlagsGpu.h"
     15 #include "SkOSFile.h"
     16 #include "SkOSPath.h"
     17 #include "SkPerlinNoiseShader.h"
     18 #include "SkPicture.h"
     19 #include "SkPictureRecorder.h"
     20 #include "SkStream.h"
     21 #include "SkSurface.h"
     22 #include "SkSurfaceProps.h"
     23 #include "picture_utils.h"
     24 #include "sk_tool_utils.h"
     25 #include "flags/SkCommandLineFlags.h"
     26 #include "flags/SkCommonFlagsConfig.h"
     27 #include <stdlib.h>
     28 #include <algorithm>
     29 #include <array>
     30 #include <chrono>
     31 #include <cmath>
     32 #include <vector>
     33 
     34 /**
     35  * This is a minimalist program whose sole purpose is to open an skp file, benchmark it on a single
     36  * config, and exit. It is intended to be used through skpbench.py rather than invoked directly.
     37  * Limiting the entire process to a single config/skp pair helps to keep the results repeatable.
     38  *
     39  * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
     40  * render target and syncs the GPU after each draw.
     41  *
     42  * Currently, only GPU configs are supported.
     43  */
     44 
     45 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
     46 DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
     47 DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
     48 DEFINE_bool(fps, false, "use fps instead of ms");
     49 DEFINE_string(skp, "", "path to a single .skp file, or 'warmup' for a builtin warmup run");
     50 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
     51 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
     52 DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
     53 
     54 static const char* header =
     55 "   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
     56 
     57 static const char* resultFormat =
     58 "%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7li  %9i  %-5s  %-6s  %-9s %s";
     59 
     60 struct Sample {
     61     using duration = std::chrono::nanoseconds;
     62 
     63     Sample() : fFrames(0), fDuration(0) {}
     64     double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
     65     double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
     66     double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
     67     static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
     68 
     69     int        fFrames;
     70     duration   fDuration;
     71 };
     72 
     73 class GpuSync {
     74 public:
     75     GpuSync(const sk_gpu_test::FenceSync* fenceSync);
     76     ~GpuSync();
     77 
     78     void syncToPreviousFrame();
     79 
     80 private:
     81     void updateFence();
     82 
     83     const sk_gpu_test::FenceSync* const   fFenceSync;
     84     sk_gpu_test::PlatformFence            fFence;
     85 };
     86 
     87 enum class ExitErr {
     88     kOk           = 0,
     89     kUsage        = 64,
     90     kData         = 65,
     91     kUnavailable  = 69,
     92     kIO           = 74,
     93     kSoftware     = 70
     94 };
     95 
     96 static void draw_skp_and_flush(SkCanvas*, const SkPicture*);
     97 static sk_sp<SkPicture> create_warmup_skp();
     98 static bool mkdir_p(const SkString& name);
     99 static SkString join(const SkCommandLineFlags::StringArray&);
    100 static void exitf(ExitErr, const char* format, ...);
    101 
    102 static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
    103                           const SkPicture* skp, std::vector<Sample>* samples) {
    104     using clock = std::chrono::high_resolution_clock;
    105     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
    106     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
    107 
    108     draw_skp_and_flush(canvas, skp);
    109     GpuSync gpuSync(fenceSync);
    110 
    111     draw_skp_and_flush(canvas, skp);
    112     gpuSync.syncToPreviousFrame();
    113 
    114     clock::time_point now = clock::now();
    115     const clock::time_point endTime = now + benchDuration;
    116 
    117     do {
    118         clock::time_point sampleStart = now;
    119         samples->emplace_back();
    120         Sample& sample = samples->back();
    121 
    122         do {
    123             draw_skp_and_flush(canvas, skp);
    124             gpuSync.syncToPreviousFrame();
    125 
    126             now = clock::now();
    127             sample.fDuration = now - sampleStart;
    128             ++sample.fFrames;
    129         } while (sample.fDuration < sampleDuration);
    130     } while (now < endTime || 0 == samples->size() % 2);
    131 }
    132 
    133 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
    134                                    const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
    135                                    const SkPicture* skp, std::vector<Sample>* samples) {
    136     using sk_gpu_test::PlatformTimerQuery;
    137     using clock = std::chrono::steady_clock;
    138     const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
    139     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
    140 
    141     if (!gpuTimer->disjointSupport()) {
    142         fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
    143                         "results may be unreliable\n");
    144     }
    145 
    146     draw_skp_and_flush(canvas, skp);
    147     GpuSync gpuSync(fenceSync);
    148 
    149     gpuTimer->queueStart();
    150     draw_skp_and_flush(canvas, skp);
    151     PlatformTimerQuery previousTime = gpuTimer->queueStop();
    152     gpuSync.syncToPreviousFrame();
    153 
    154     clock::time_point now = clock::now();
    155     const clock::time_point endTime = now + benchDuration;
    156 
    157     do {
    158         const clock::time_point sampleEndTime = now + sampleDuration;
    159         samples->emplace_back();
    160         Sample& sample = samples->back();
    161 
    162         do {
    163             gpuTimer->queueStart();
    164             draw_skp_and_flush(canvas, skp);
    165             PlatformTimerQuery time = gpuTimer->queueStop();
    166             gpuSync.syncToPreviousFrame();
    167 
    168             switch (gpuTimer->checkQueryStatus(previousTime)) {
    169                 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
    170                 case QueryStatus::kInvalid:
    171                     exitf(ExitErr::kUnavailable, "GPU timer failed");
    172                 case QueryStatus::kPending:
    173                     exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
    174                 case QueryStatus::kDisjoint:
    175                     if (FLAGS_verbosity >= 4) {
    176                         fprintf(stderr, "discarding timer query due to disjoint operations.\n");
    177                     }
    178                     break;
    179                 case QueryStatus::kAccurate:
    180                     sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
    181                     ++sample.fFrames;
    182                     break;
    183             }
    184             gpuTimer->deleteQuery(previousTime);
    185             previousTime = time;
    186             now = clock::now();
    187         } while (now < sampleEndTime || 0 == sample.fFrames);
    188     } while (now < endTime || 0 == samples->size() % 2);
    189 
    190     gpuTimer->deleteQuery(previousTime);
    191 }
    192 
    193 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
    194     if (0 == (samples.size() % 2)) {
    195         exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
    196     }
    197 
    198     Sample accum = Sample();
    199     std::vector<double> values;
    200     values.reserve(samples.size());
    201     for (const Sample& sample : samples) {
    202         accum.fFrames += sample.fFrames;
    203         accum.fDuration += sample.fDuration;
    204         values.push_back(sample.value());
    205     }
    206     std::sort(values.begin(), values.end());
    207 
    208     const double accumValue = accum.value();
    209     double variance = 0;
    210     for (double value : values) {
    211         const double delta = value - accumValue;
    212         variance += delta * delta;
    213     }
    214     variance /= values.size();
    215     // Technically, this is the relative standard deviation.
    216     const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
    217 
    218     printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
    219            stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
    220            config, bench);
    221     printf("\n");
    222     fflush(stdout);
    223 }
    224 
    225 int main(int argc, char** argv) {
    226     SkCommandLineFlags::SetUsage("Use skpbench.py instead. "
    227                                  "You usually don't want to use this program directly.");
    228     SkCommandLineFlags::Parse(argc, argv);
    229 
    230     if (!FLAGS_suppressHeader) {
    231         printf("%s\n", header);
    232     }
    233     if (FLAGS_duration <= 0) {
    234         exit(0); // This can be used to print the header and quit.
    235     }
    236 
    237     // Parse the config.
    238     const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
    239     SkCommandLineConfigArray configs;
    240     ParseConfigs(FLAGS_config, &configs);
    241     if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) {
    242         exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
    243                                join(FLAGS_config).c_str());
    244     }
    245 
    246     // Parse the skp.
    247     if (FLAGS_skp.count() != 1) {
    248         exitf(ExitErr::kUsage, "invalid skp '%s': must specify a single skp file, or 'warmup'",
    249                                join(FLAGS_skp).c_str());
    250     }
    251     sk_sp<SkPicture> skp;
    252     SkString skpname;
    253     if (0 == strcmp(FLAGS_skp[0], "warmup")) {
    254         skp = create_warmup_skp();
    255         skpname = "warmup";
    256     } else {
    257         const char* skpfile = FLAGS_skp[0];
    258         std::unique_ptr<SkStream> skpstream(SkStream::MakeFromFile(skpfile));
    259         if (!skpstream) {
    260             exitf(ExitErr::kIO, "failed to open skp file %s", skpfile);
    261         }
    262         skp = SkPicture::MakeFromStream(skpstream.get());
    263         if (!skp) {
    264             exitf(ExitErr::kData, "failed to parse skp file %s", skpfile);
    265         }
    266         skpname = SkOSPath::Basename(skpfile);
    267     }
    268     int width = SkTMin(SkScalarCeilToInt(skp->cullRect().width()), 2048),
    269         height = SkTMin(SkScalarCeilToInt(skp->cullRect().height()), 2048);
    270     if (FLAGS_verbosity >= 3 &&
    271         (width != skp->cullRect().width() || height != skp->cullRect().height())) {
    272         fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
    273                         skpname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
    274                         SkScalarCeilToInt(skp->cullRect().height()), width, height);
    275     }
    276 
    277     // Create a context.
    278     GrContextOptions ctxOptions;
    279     SetCtxOptionsFromCommonFlags(&ctxOptions);
    280     sk_gpu_test::GrContextFactory factory(ctxOptions);
    281     sk_gpu_test::ContextInfo ctxInfo =
    282         factory.getContextInfo(config->getContextType(), config->getContextOverrides());
    283     GrContext* ctx = ctxInfo.grContext();
    284     if (!ctx) {
    285         exitf(ExitErr::kUnavailable, "failed to create context for config %s",
    286                                      config->getTag().c_str());
    287     }
    288     if (ctx->caps()->maxRenderTargetSize() < SkTMax(width, height)) {
    289         exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
    290                                      width, height, ctx->caps()->maxRenderTargetSize());
    291     }
    292     GrPixelConfig grPixConfig = SkImageInfo2GrPixelConfig(config->getColorType(),
    293                                                           config->getColorSpace(),
    294                                                           *ctx->caps());
    295     if (kUnknown_GrPixelConfig == grPixConfig) {
    296         exitf(ExitErr::kUnavailable, "failed to get GrPixelConfig from SkColorType: %d",
    297                                      config->getColorType());
    298     }
    299     int supportedSampleCount =
    300             ctx->caps()->getRenderTargetSampleCount(config->getSamples(), grPixConfig);
    301     if (supportedSampleCount != config->getSamples()) {
    302         exitf(ExitErr::kUnavailable, "sample count %i not supported by platform",
    303                                      config->getSamples());
    304     }
    305     sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
    306     if (!testCtx) {
    307         exitf(ExitErr::kSoftware, "testContext is null");
    308     }
    309     if (!testCtx->fenceSyncSupport()) {
    310         exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
    311     }
    312 
    313     // Create a render target.
    314     SkImageInfo info =
    315             SkImageInfo::Make(width, height, config->getColorType(), config->getAlphaType(),
    316                               sk_ref_sp(config->getColorSpace()));
    317     uint32_t flags = config->getUseDIText() ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : 0;
    318     SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
    319     sk_sp<SkSurface> surface =
    320         SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props);
    321     if (!surface) {
    322         exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
    323                                      width, height, config->getTag().c_str());
    324     }
    325 
    326     // Run the benchmark.
    327     std::vector<Sample> samples;
    328     if (FLAGS_sampleMs > 0) {
    329         // +1 because we might take one more sample in order to have an odd number.
    330         samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
    331     } else {
    332         samples.reserve(2 * FLAGS_duration);
    333     }
    334     SkCanvas* canvas = surface->getCanvas();
    335     canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
    336     if (!FLAGS_gpuClock) {
    337         run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
    338     } else {
    339         if (!testCtx->gpuTimingSupport()) {
    340             exitf(ExitErr::kUnavailable, "GPU does not support timing");
    341         }
    342         run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
    343                                &samples);
    344     }
    345     print_result(samples, config->getTag().c_str(), skpname.c_str());
    346 
    347     // Save a proof (if one was requested).
    348     if (!FLAGS_png.isEmpty()) {
    349         SkBitmap bmp;
    350         bmp.allocPixels(info);
    351         if (!surface->getCanvas()->readPixels(bmp, 0, 0)) {
    352             exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
    353         }
    354         const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]),
    355                        &basename = SkOSPath::Basename(FLAGS_png[0]);
    356         if (!mkdir_p(dirname)) {
    357             exitf(ExitErr::kIO, "failed to create directory \"%s\" for png", dirname.c_str());
    358         }
    359         if (!sk_tools::write_bitmap_to_disk(bmp, dirname, nullptr, basename)) {
    360             exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
    361         }
    362     }
    363 
    364     exit(0);
    365 }
    366 
    367 static void draw_skp_and_flush(SkCanvas* canvas, const SkPicture* skp) {
    368     canvas->drawPicture(skp);
    369     canvas->flush();
    370 }
    371 
    372 static sk_sp<SkPicture> create_warmup_skp() {
    373     static constexpr SkRect bounds{0, 0, 500, 500};
    374     SkPictureRecorder recorder;
    375     SkCanvas* recording = recorder.beginRecording(bounds);
    376 
    377     recording->clear(SK_ColorWHITE);
    378 
    379     SkPaint stroke;
    380     stroke.setStyle(SkPaint::kStroke_Style);
    381     stroke.setStrokeWidth(2);
    382 
    383     // Use a big path to (theoretically) warmup the CPU.
    384     SkPath bigPath;
    385     sk_tool_utils::make_big_path(bigPath);
    386     recording->drawPath(bigPath, stroke);
    387 
    388     // Use a perlin shader to warmup the GPU.
    389     SkPaint perlin;
    390     perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
    391     recording->drawRect(bounds, perlin);
    392 
    393     return recorder.finishRecordingAsPicture();
    394 }
    395 
    396 bool mkdir_p(const SkString& dirname) {
    397     if (dirname.isEmpty()) {
    398         return true;
    399     }
    400     return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
    401 }
    402 
    403 static SkString join(const SkCommandLineFlags::StringArray& stringArray) {
    404     SkString joined;
    405     for (int i = 0; i < stringArray.count(); ++i) {
    406         joined.appendf(i ? " %s" : "%s", stringArray[i]);
    407     }
    408     return joined;
    409 }
    410 
    411 static void exitf(ExitErr err, const char* format, ...) {
    412     fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
    413     va_list args;
    414     va_start(args, format);
    415     vfprintf(stderr, format, args);
    416     va_end(args);
    417     fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
    418     exit((int)err);
    419 }
    420 
    421 GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync)
    422     : fFenceSync(fenceSync) {
    423     this->updateFence();
    424 }
    425 
    426 GpuSync::~GpuSync() {
    427     fFenceSync->deleteFence(fFence);
    428 }
    429 
    430 void GpuSync::syncToPreviousFrame() {
    431     if (sk_gpu_test::kInvalidFence == fFence) {
    432         exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
    433     }
    434     if (!fFenceSync->waitFence(fFence)) {
    435         exitf(ExitErr::kUnavailable, "failed to wait for fence");
    436     }
    437     fFenceSync->deleteFence(fFence);
    438     this->updateFence();
    439 }
    440 
    441 void GpuSync::updateFence() {
    442     fFence = fFenceSync->insertFence();
    443     if (sk_gpu_test::kInvalidFence == fFence) {
    444         exitf(ExitErr::kUnavailable, "failed to insert fence");
    445     }
    446 }
    447