Home | History | Annotate | Download | only in benchmarks
      1 #include <android-base/logging.h>
      2 #include <android/native_window.h>
      3 #include <benchmark/benchmark.h>
      4 #include <binder/IPCThreadState.h>
      5 #include <binder/IServiceManager.h>
      6 #include <dvr/dvr_api.h>
      7 #include <gui/BufferItem.h>
      8 #include <gui/BufferItemConsumer.h>
      9 #include <gui/Surface.h>
     10 #include <private/dvr/epoll_file_descriptor.h>
     11 #include <utils/Trace.h>
     12 
     13 #include <chrono>
     14 #include <functional>
     15 #include <iostream>
     16 #include <thread>
     17 #include <vector>
     18 
     19 #include <dlfcn.h>
     20 #include <poll.h>
     21 #include <sys/epoll.h>
     22 #include <sys/wait.h>
     23 
     24 // Use ALWAYS at the tag level. Control is performed manually during command
     25 // line processing.
     26 #ifdef ATRACE_TAG
     27 #undef ATRACE_TAG
     28 #endif
     29 #define ATRACE_TAG ATRACE_TAG_ALWAYS
     30 
     31 using namespace android;
     32 using ::benchmark::State;
     33 
     34 static const String16 kBinderService = String16("bufferTransport");
     35 static const uint32_t kBufferWidth = 100;
     36 static const uint32_t kBufferHeight = 1;
     37 static const uint32_t kBufferFormat = HAL_PIXEL_FORMAT_BLOB;
     38 static const uint64_t kBufferUsage =
     39     GRALLOC_USAGE_SW_READ_OFTEN | GRALLOC_USAGE_SW_WRITE_OFTEN;
     40 static const uint32_t kBufferLayer = 1;
     41 static const int kMaxAcquiredImages = 1;
     42 static const int kQueueDepth = 2;  // We are double buffering for this test.
     43 static const size_t kMaxQueueCounts = 128;
     44 static const int kInvalidFence = -1;
     45 
     46 enum BufferTransportServiceCode {
     47   CREATE_BUFFER_QUEUE = IBinder::FIRST_CALL_TRANSACTION,
     48 };
     49 
     50 // A binder services that minics a compositor that consumes buffers. It provides
     51 // one Binder interface to create a new Surface for buffer producer to write
     52 // into; while itself will carry out no-op buffer consuming by acquiring then
     53 // releasing the buffer immediately.
     54 class BufferTransportService : public BBinder {
     55  public:
     56   BufferTransportService() = default;
     57   ~BufferTransportService() = default;
     58 
     59   virtual status_t onTransact(uint32_t code, const Parcel& data, Parcel* reply,
     60                               uint32_t flags = 0) {
     61     (void)flags;
     62     (void)data;
     63     switch (code) {
     64       case CREATE_BUFFER_QUEUE: {
     65         auto new_queue = std::make_shared<BufferQueueHolder>(this);
     66         reply->writeStrongBinder(
     67             IGraphicBufferProducer::asBinder(new_queue->producer));
     68         buffer_queues_.push_back(new_queue);
     69         return NO_ERROR;
     70       }
     71       default:
     72         return UNKNOWN_TRANSACTION;
     73     };
     74   }
     75 
     76  private:
     77   struct FrameListener : public ConsumerBase::FrameAvailableListener {
     78    public:
     79     FrameListener(BufferTransportService* /*service*/,
     80                   sp<BufferItemConsumer> buffer_item_consumer)
     81         : buffer_item_consumer_(buffer_item_consumer) {}
     82 
     83     void onFrameAvailable(const BufferItem& /*item*/) override {
     84       BufferItem buffer;
     85       status_t ret = 0;
     86       {
     87         ATRACE_NAME("AcquireBuffer");
     88         ret = buffer_item_consumer_->acquireBuffer(&buffer, /*presentWhen=*/0,
     89                                                    /*waitForFence=*/false);
     90       }
     91 
     92       if (ret != NO_ERROR) {
     93         LOG(ERROR) << "Failed to acquire next buffer.";
     94         return;
     95       }
     96 
     97       {
     98         ATRACE_NAME("ReleaseBuffer");
     99         ret = buffer_item_consumer_->releaseBuffer(buffer);
    100       }
    101 
    102       if (ret != NO_ERROR) {
    103         LOG(ERROR) << "Failed to release buffer.";
    104         return;
    105       }
    106     }
    107 
    108    private:
    109     sp<BufferItemConsumer> buffer_item_consumer_;
    110   };
    111 
    112   struct BufferQueueHolder {
    113     explicit BufferQueueHolder(BufferTransportService* service) {
    114       BufferQueue::createBufferQueue(&producer, &consumer);
    115 
    116       sp<BufferItemConsumer> buffer_item_consumer =
    117           new BufferItemConsumer(consumer, kBufferUsage, kMaxAcquiredImages,
    118                                  /*controlledByApp=*/true);
    119       buffer_item_consumer->setName(String8("BinderBufferTransport"));
    120       frame_listener_ = new FrameListener(service, buffer_item_consumer);
    121       buffer_item_consumer->setFrameAvailableListener(frame_listener_);
    122     }
    123 
    124     sp<IGraphicBufferProducer> producer;
    125     sp<IGraphicBufferConsumer> consumer;
    126 
    127    private:
    128     sp<FrameListener> frame_listener_;
    129   };
    130 
    131   std::vector<std::shared_ptr<BufferQueueHolder>> buffer_queues_;
    132 };
    133 
    134 // A virtual interfaces that abstracts the common BufferQueue operations, so
    135 // that the test suite can use the same test case to drive different types of
    136 // transport backends.
    137 class BufferTransport {
    138  public:
    139   virtual ~BufferTransport() {}
    140 
    141   virtual int Start() = 0;
    142   virtual sp<Surface> CreateSurface() = 0;
    143 };
    144 
    145 // Binder-based buffer transport backend.
    146 //
    147 // On Start() a new process will be swapned to run a Binder server that
    148 // actually consumes the buffer.
    149 // On CreateSurface() a new Binder BufferQueue will be created, which the
    150 // service holds the concrete binder node of the IGraphicBufferProducer while
    151 // sending the binder proxy to the client. In another word, the producer side
    152 // operations are carried out process while the consumer side operations are
    153 // carried out within the BufferTransportService's own process.
    154 class BinderBufferTransport : public BufferTransport {
    155  public:
    156   BinderBufferTransport() {}
    157 
    158   int Start() override {
    159     sp<IServiceManager> sm = defaultServiceManager();
    160     service_ = sm->getService(kBinderService);
    161     if (service_ == nullptr) {
    162       LOG(ERROR) << "Failed to get the benchmark service.";
    163       return -EIO;
    164     }
    165 
    166     LOG(INFO) << "Binder server is ready for client.";
    167     return 0;
    168   }
    169 
    170   sp<Surface> CreateSurface() override {
    171     Parcel data;
    172     Parcel reply;
    173     int error = service_->transact(CREATE_BUFFER_QUEUE, data, &reply);
    174     if (error != NO_ERROR) {
    175       LOG(ERROR) << "Failed to get buffer queue over binder.";
    176       return nullptr;
    177     }
    178 
    179     sp<IBinder> binder;
    180     error = reply.readNullableStrongBinder(&binder);
    181     if (error != NO_ERROR) {
    182       LOG(ERROR) << "Failed to get IGraphicBufferProducer over binder.";
    183       return nullptr;
    184     }
    185 
    186     auto producer = interface_cast<IGraphicBufferProducer>(binder);
    187     if (producer == nullptr) {
    188       LOG(ERROR) << "Failed to get IGraphicBufferProducer over binder.";
    189       return nullptr;
    190     }
    191 
    192     sp<Surface> surface = new Surface(producer, /*controlledByApp=*/true);
    193 
    194     // Set buffer dimension.
    195     ANativeWindow* window = static_cast<ANativeWindow*>(surface.get());
    196     ANativeWindow_setBuffersGeometry(window, kBufferWidth, kBufferHeight,
    197                                      kBufferFormat);
    198 
    199     return surface;
    200   }
    201 
    202  private:
    203   sp<IBinder> service_;
    204 };
    205 
    206 class DvrApi {
    207  public:
    208   DvrApi() {
    209     handle_ = dlopen("libdvr.so", RTLD_NOW | RTLD_LOCAL);
    210     CHECK(handle_);
    211 
    212     auto dvr_get_api =
    213         reinterpret_cast<decltype(&dvrGetApi)>(dlsym(handle_, "dvrGetApi"));
    214     int ret = dvr_get_api(&api_, sizeof(api_), /*version=*/1);
    215 
    216     CHECK(ret == 0);
    217   }
    218 
    219   ~DvrApi() { dlclose(handle_); }
    220 
    221   const DvrApi_v1& Api() { return api_; }
    222 
    223  private:
    224   void* handle_ = nullptr;
    225   DvrApi_v1 api_;
    226 };
    227 
    228 // BufferHub/PDX-based buffer transport.
    229 //
    230 // On Start() a new thread will be swapned to run an epoll polling thread which
    231 // minics the behavior of a compositor. Similar to Binder-based backend, the
    232 // buffer available handler is also a no-op: Buffer gets acquired and released
    233 // immediately.
    234 // On CreateSurface() a pair of dvr::ProducerQueue and dvr::ConsumerQueue will
    235 // be created. The epoll thread holds on the consumer queue and dequeues buffer
    236 // from it; while the producer queue will be wrapped in a Surface and returned
    237 // to test suite.
    238 class BufferHubTransport : public BufferTransport {
    239  public:
    240   virtual ~BufferHubTransport() {
    241     stopped_.store(true);
    242     if (reader_thread_.joinable()) {
    243       reader_thread_.join();
    244     }
    245   }
    246 
    247   int Start() override {
    248     int ret = epoll_fd_.Create();
    249     if (ret < 0) {
    250       LOG(ERROR) << "Failed to create epoll fd: %s", strerror(-ret);
    251       return -1;
    252     }
    253 
    254     // Create the reader thread.
    255     reader_thread_ = std::thread([this]() {
    256       int ret = dvr_.Api().PerformanceSetSchedulerPolicy(0, "graphics");
    257       if (ret < 0) {
    258         LOG(ERROR) << "Failed to set scheduler policy, ret=" << ret;
    259         return;
    260       }
    261 
    262       stopped_.store(false);
    263       LOG(INFO) << "Reader Thread Running...";
    264 
    265       while (!stopped_.load()) {
    266         std::array<epoll_event, kMaxQueueCounts> events;
    267 
    268         // Don't sleep forever so that we will have a chance to wake up.
    269         const int ret = epoll_fd_.Wait(events.data(), events.size(),
    270                                        /*timeout=*/100);
    271         if (ret < 0) {
    272           LOG(ERROR) << "Error polling consumer queues.";
    273           continue;
    274         }
    275         if (ret == 0) {
    276           continue;
    277         }
    278 
    279         const int num_events = ret;
    280         for (int i = 0; i < num_events; i++) {
    281           uint32_t index = events[i].data.u32;
    282           dvr_.Api().ReadBufferQueueHandleEvents(
    283               buffer_queues_[index]->GetReadQueue());
    284         }
    285       }
    286 
    287       LOG(INFO) << "Reader Thread Exiting...";
    288     });
    289 
    290     return 0;
    291   }
    292 
    293   sp<Surface> CreateSurface() override {
    294     auto new_queue = std::make_shared<BufferQueueHolder>();
    295     if (!new_queue->IsReady()) {
    296       LOG(ERROR) << "Failed to create BufferHub-based BufferQueue.";
    297       return nullptr;
    298     }
    299 
    300     // Set buffer dimension.
    301     ANativeWindow_setBuffersGeometry(new_queue->GetSurface(), kBufferWidth,
    302                                      kBufferHeight, kBufferFormat);
    303 
    304     // Use the next position as buffer_queue index.
    305     uint32_t index = buffer_queues_.size();
    306     epoll_event event = {.events = EPOLLIN | EPOLLET, .data = {.u32 = index}};
    307     int queue_fd =
    308         dvr_.Api().ReadBufferQueueGetEventFd(new_queue->GetReadQueue());
    309     const int ret = epoll_fd_.Control(EPOLL_CTL_ADD, queue_fd, &event);
    310     if (ret < 0) {
    311       LOG(ERROR) << "Failed to track consumer queue: " << strerror(-ret)
    312                  << ", consumer queue fd: " << queue_fd;
    313       return nullptr;
    314     }
    315 
    316     buffer_queues_.push_back(new_queue);
    317     ANativeWindow_acquire(new_queue->GetSurface());
    318     return static_cast<Surface*>(new_queue->GetSurface());
    319   }
    320 
    321  private:
    322   struct BufferQueueHolder {
    323     BufferQueueHolder() {
    324       int ret = 0;
    325       ret = dvr_.Api().WriteBufferQueueCreate(
    326           kBufferWidth, kBufferHeight, kBufferFormat, kBufferLayer,
    327           kBufferUsage, 0, sizeof(DvrNativeBufferMetadata), &write_queue_);
    328       if (ret < 0) {
    329         LOG(ERROR) << "Failed to create write buffer queue, ret=" << ret;
    330         return;
    331       }
    332 
    333       ret = dvr_.Api().WriteBufferQueueCreateReadQueue(write_queue_,
    334                                                        &read_queue_);
    335       if (ret < 0) {
    336         LOG(ERROR) << "Failed to create read buffer queue, ret=" << ret;
    337         return;
    338       }
    339 
    340       ret = dvr_.Api().ReadBufferQueueSetBufferAvailableCallback(
    341           read_queue_, BufferAvailableCallback, this);
    342       if (ret < 0) {
    343         LOG(ERROR) << "Failed to create buffer available callback, ret=" << ret;
    344         return;
    345       }
    346 
    347       ret =
    348           dvr_.Api().WriteBufferQueueGetANativeWindow(write_queue_, &surface_);
    349       if (ret < 0) {
    350         LOG(ERROR) << "Failed to create surface, ret=" << ret;
    351         return;
    352       }
    353     }
    354 
    355     static void BufferAvailableCallback(void* context) {
    356       BufferQueueHolder* thiz = static_cast<BufferQueueHolder*>(context);
    357       thiz->HandleBufferAvailable();
    358     }
    359 
    360     DvrReadBufferQueue* GetReadQueue() { return read_queue_; }
    361 
    362     ANativeWindow* GetSurface() { return surface_; }
    363 
    364     bool IsReady() {
    365       return write_queue_ != nullptr && read_queue_ != nullptr &&
    366              surface_ != nullptr;
    367     }
    368 
    369     void HandleBufferAvailable() {
    370       int ret = 0;
    371       DvrNativeBufferMetadata meta;
    372       DvrReadBuffer* buffer = nullptr;
    373       DvrNativeBufferMetadata metadata;
    374       int acquire_fence = kInvalidFence;
    375 
    376       {
    377         ATRACE_NAME("AcquireBuffer");
    378         ret = dvr_.Api().ReadBufferQueueAcquireBuffer(
    379             read_queue_, 0, &buffer, &metadata, &acquire_fence);
    380       }
    381       if (ret < 0) {
    382         LOG(ERROR) << "Failed to acquire consumer buffer, error: " << ret;
    383         return;
    384       }
    385 
    386       if (buffer != nullptr) {
    387         ATRACE_NAME("ReleaseBuffer");
    388         ret = dvr_.Api().ReadBufferQueueReleaseBuffer(read_queue_, buffer,
    389                                                       &meta, kInvalidFence);
    390       }
    391       if (ret < 0) {
    392         LOG(ERROR) << "Failed to release consumer buffer, error: " << ret;
    393       }
    394     }
    395 
    396    private:
    397     DvrWriteBufferQueue* write_queue_ = nullptr;
    398     DvrReadBufferQueue* read_queue_ = nullptr;
    399     ANativeWindow* surface_ = nullptr;
    400   };
    401 
    402   static DvrApi dvr_;
    403   std::atomic<bool> stopped_;
    404   std::thread reader_thread_;
    405 
    406   dvr::EpollFileDescriptor epoll_fd_;
    407   std::vector<std::shared_ptr<BufferQueueHolder>> buffer_queues_;
    408 };
    409 
    410 DvrApi BufferHubTransport::dvr_ = {};
    411 
    412 enum TransportType {
    413   kBinderBufferTransport,
    414   kBufferHubTransport,
    415 };
    416 
    417 // Main test suite, which supports two transport backend: 1) BinderBufferQueue,
    418 // 2) BufferHubQueue. The test case drives the producer end of both transport
    419 // backend by queuing buffers into the buffer queue by using ANativeWindow API.
    420 class BufferTransportBenchmark : public ::benchmark::Fixture {
    421  public:
    422   void SetUp(State& state) override {
    423     if (state.thread_index == 0) {
    424       const int transport = state.range(0);
    425       switch (transport) {
    426         case kBinderBufferTransport:
    427           transport_.reset(new BinderBufferTransport);
    428           break;
    429         case kBufferHubTransport:
    430           transport_.reset(new BufferHubTransport);
    431           break;
    432         default:
    433           CHECK(false) << "Unknown test case.";
    434           break;
    435       }
    436 
    437       CHECK(transport_);
    438       const int ret = transport_->Start();
    439       CHECK_EQ(ret, 0);
    440 
    441       LOG(INFO) << "Transport backend running, transport=" << transport << ".";
    442 
    443       // Create surfaces for each thread.
    444       surfaces_.resize(state.threads);
    445       for (int i = 0; i < state.threads; i++) {
    446         // Common setup every thread needs.
    447         surfaces_[i] = transport_->CreateSurface();
    448         CHECK(surfaces_[i]);
    449 
    450         LOG(INFO) << "Surface initialized on thread " << i << ".";
    451       }
    452     }
    453   }
    454 
    455   void TearDown(State& state) override {
    456     if (state.thread_index == 0) {
    457       surfaces_.clear();
    458       transport_.reset();
    459       LOG(INFO) << "Tear down benchmark.";
    460     }
    461   }
    462 
    463  protected:
    464   std::unique_ptr<BufferTransport> transport_;
    465   std::vector<sp<Surface>> surfaces_;
    466 };
    467 
    468 BENCHMARK_DEFINE_F(BufferTransportBenchmark, Producers)(State& state) {
    469   ANativeWindow* window = nullptr;
    470   ANativeWindow_Buffer buffer;
    471   int32_t error = 0;
    472   double total_gain_buffer_us = 0;
    473   double total_post_buffer_us = 0;
    474   int iterations = 0;
    475 
    476   while (state.KeepRunning()) {
    477     if (window == nullptr) {
    478       CHECK(surfaces_[state.thread_index]);
    479       window = static_cast<ANativeWindow*>(surfaces_[state.thread_index].get());
    480 
    481       // Lock buffers a couple time from the queue, so that we have the buffer
    482       // allocated.
    483       for (int i = 0; i < kQueueDepth; i++) {
    484         error = ANativeWindow_lock(window, &buffer,
    485                                    /*inOutDirtyBounds=*/nullptr);
    486         CHECK_EQ(error, 0);
    487         error = ANativeWindow_unlockAndPost(window);
    488         CHECK_EQ(error, 0);
    489       }
    490     }
    491 
    492     {
    493       ATRACE_NAME("GainBuffer");
    494       auto t1 = std::chrono::high_resolution_clock::now();
    495       error = ANativeWindow_lock(window, &buffer,
    496                                  /*inOutDirtyBounds=*/nullptr);
    497       auto t2 = std::chrono::high_resolution_clock::now();
    498       std::chrono::duration<double, std::micro> delta_us = t2 - t1;
    499       total_gain_buffer_us += delta_us.count();
    500     }
    501     CHECK_EQ(error, 0);
    502 
    503     {
    504       ATRACE_NAME("PostBuffer");
    505       auto t1 = std::chrono::high_resolution_clock::now();
    506       error = ANativeWindow_unlockAndPost(window);
    507       auto t2 = std::chrono::high_resolution_clock::now();
    508       std::chrono::duration<double, std::micro> delta_us = t2 - t1;
    509       total_post_buffer_us += delta_us.count();
    510     }
    511     CHECK_EQ(error, 0);
    512 
    513     iterations++;
    514   }
    515 
    516   state.counters["gain_buffer_us"] = ::benchmark::Counter(
    517       total_gain_buffer_us / iterations, ::benchmark::Counter::kAvgThreads);
    518   state.counters["post_buffer_us"] = ::benchmark::Counter(
    519       total_post_buffer_us / iterations, ::benchmark::Counter::kAvgThreads);
    520   state.counters["producer_us"] = ::benchmark::Counter(
    521       (total_gain_buffer_us + total_post_buffer_us) / iterations,
    522       ::benchmark::Counter::kAvgThreads);
    523 }
    524 
    525 BENCHMARK_REGISTER_F(BufferTransportBenchmark, Producers)
    526     ->Unit(::benchmark::kMicrosecond)
    527     ->Ranges({{kBinderBufferTransport, kBufferHubTransport}})
    528     ->ThreadRange(1, 32);
    529 
    530 static void runBinderServer() {
    531   ProcessState::self()->setThreadPoolMaxThreadCount(0);
    532   ProcessState::self()->startThreadPool();
    533 
    534   sp<IServiceManager> sm = defaultServiceManager();
    535   sp<BufferTransportService> service = new BufferTransportService;
    536   sm->addService(kBinderService, service, false);
    537 
    538   LOG(INFO) << "Binder server running...";
    539 
    540   while (true) {
    541     int stat, retval;
    542     retval = wait(&stat);
    543     if (retval == -1 && errno == ECHILD) {
    544       break;
    545     }
    546   }
    547 
    548   LOG(INFO) << "Service Exiting...";
    549 }
    550 
    551 // To run binder-based benchmark, use:
    552 // adb shell buffer_transport_benchmark \
    553 //   --benchmark_filter="BufferTransportBenchmark/ContinuousLoad/0/"
    554 //
    555 // To run bufferhub-based benchmark, use:
    556 // adb shell buffer_transport_benchmark \
    557 //   --benchmark_filter="BufferTransportBenchmark/ContinuousLoad/1/"
    558 int main(int argc, char** argv) {
    559   bool tracing_enabled = false;
    560 
    561   // Parse arguments in addition to "--benchmark_filter" paramters.
    562   for (int i = 1; i < argc; i++) {
    563     if (std::string(argv[i]) == "--help") {
    564       std::cout << "Usage: binderThroughputTest [OPTIONS]" << std::endl;
    565       std::cout << "\t--trace: Enable systrace logging." << std::endl;
    566       return 0;
    567     }
    568     if (std::string(argv[i]) == "--trace") {
    569       tracing_enabled = true;
    570       continue;
    571     }
    572   }
    573 
    574   // Setup ATRACE/systrace based on command line.
    575   atrace_setup();
    576   atrace_set_tracing_enabled(tracing_enabled);
    577 
    578   pid_t pid = fork();
    579   if (pid == 0) {
    580     // Child, i.e. the client side.
    581     ProcessState::self()->startThreadPool();
    582 
    583     ::benchmark::Initialize(&argc, argv);
    584     ::benchmark::RunSpecifiedBenchmarks();
    585   } else {
    586     LOG(INFO) << "Benchmark process pid: " << pid;
    587     runBinderServer();
    588   }
    589 }
    590