1 // Copyright (C) 2018 The Android Open Source Project 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <gtest/gtest.h> 16 #include <random> 17 18 #include "benchmark/benchmark.h" 19 #include "perfetto/base/time.h" 20 #include "perfetto/traced/traced.h" 21 #include "perfetto/tracing/core/trace_config.h" 22 #include "perfetto/tracing/core/trace_packet.h" 23 #include "src/base/test/test_task_runner.h" 24 #include "test/task_runner_thread.h" 25 #include "test/task_runner_thread_delegates.h" 26 #include "test/test_helper.h" 27 28 #include "perfetto/trace/trace_packet.pb.h" 29 #include "perfetto/trace/trace_packet.pbzero.h" 30 31 namespace perfetto { 32 33 namespace { 34 35 bool IsBenchmarkFunctionalOnly() { 36 return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr; 37 } 38 39 void BenchmarkProducer(benchmark::State& state) { 40 base::TestTaskRunner task_runner; 41 42 TestHelper helper(&task_runner); 43 helper.StartServiceIfRequired(); 44 45 FakeProducer* producer = helper.ConnectFakeProducer(); 46 helper.ConnectConsumer(); 47 helper.WaitForConsumerConnect(); 48 49 TraceConfig trace_config; 50 trace_config.add_buffers()->set_size_kb(512); 51 52 auto* ds_config = trace_config.add_data_sources()->mutable_config(); 53 ds_config->set_name("android.perfetto.FakeProducer"); 54 ds_config->set_target_buffer(0); 55 56 static constexpr uint32_t kRandomSeed = 42; 57 uint32_t message_count = static_cast<uint32_t>(state.range(0)); 58 uint32_t message_bytes = static_cast<uint32_t>(state.range(1)); 59 uint32_t mb_per_s = static_cast<uint32_t>(state.range(2)); 60 61 uint32_t messages_per_s = mb_per_s * 1024 * 1024 / message_bytes; 62 uint32_t time_for_messages_ms = 63 10000 + (messages_per_s == 0 ? 0 : message_count * 1000 / messages_per_s); 64 65 ds_config->mutable_for_testing()->set_seed(kRandomSeed); 66 ds_config->mutable_for_testing()->set_message_count(message_count); 67 ds_config->mutable_for_testing()->set_message_size(message_bytes); 68 ds_config->mutable_for_testing()->set_max_messages_per_second(messages_per_s); 69 70 helper.StartTracing(trace_config); 71 helper.WaitForProducerEnabled(); 72 73 uint64_t wall_start_ns = static_cast<uint64_t>(base::GetWallTimeNs().count()); 74 uint64_t service_start_ns = helper.service_thread()->GetThreadCPUTimeNs(); 75 uint64_t producer_start_ns = helper.producer_thread()->GetThreadCPUTimeNs(); 76 uint32_t iterations = 0; 77 for (auto _ : state) { 78 auto cname = "produced.and.committed." + std::to_string(iterations++); 79 auto on_produced_and_committed = task_runner.CreateCheckpoint(cname); 80 producer->ProduceEventBatch(helper.WrapTask(on_produced_and_committed)); 81 task_runner.RunUntilCheckpoint(cname, time_for_messages_ms); 82 } 83 uint64_t service_ns = 84 helper.service_thread()->GetThreadCPUTimeNs() - service_start_ns; 85 uint64_t producer_ns = 86 helper.producer_thread()->GetThreadCPUTimeNs() - producer_start_ns; 87 uint64_t wall_ns = 88 static_cast<uint64_t>(base::GetWallTimeNs().count()) - wall_start_ns; 89 90 state.counters["Ser CPU"] = benchmark::Counter(100.0 * service_ns / wall_ns); 91 state.counters["Ser ns/m"] = 92 benchmark::Counter(1.0 * service_ns / message_count); 93 state.counters["Pro CPU"] = benchmark::Counter(100.0 * producer_ns / wall_ns); 94 state.SetBytesProcessed(iterations * message_bytes * message_count); 95 96 // Read back the buffer just to check correctness. 97 helper.ReadData(); 98 helper.WaitForReadData(); 99 100 bool is_first_packet = true; 101 std::minstd_rand0 rnd_engine(kRandomSeed); 102 for (const auto& packet : helper.trace()) { 103 ASSERT_TRUE(packet.has_for_testing()); 104 if (is_first_packet) { 105 rnd_engine = std::minstd_rand0(packet.for_testing().seq_value()); 106 is_first_packet = false; 107 } else { 108 ASSERT_EQ(packet.for_testing().seq_value(), rnd_engine()); 109 } 110 } 111 } 112 113 static void BenchmarkConsumer(benchmark::State& state) { 114 base::TestTaskRunner task_runner; 115 116 TestHelper helper(&task_runner); 117 helper.StartServiceIfRequired(); 118 119 FakeProducer* producer = helper.ConnectFakeProducer(); 120 helper.ConnectConsumer(); 121 helper.WaitForConsumerConnect(); 122 123 TraceConfig trace_config; 124 125 static const uint32_t kBufferSizeBytes = 126 IsBenchmarkFunctionalOnly() ? 16 * 1024 : 2 * 1024 * 1024; 127 trace_config.add_buffers()->set_size_kb(kBufferSizeBytes / 1024); 128 129 static constexpr uint32_t kRandomSeed = 42; 130 uint32_t message_bytes = static_cast<uint32_t>(state.range(0)); 131 uint32_t mb_per_s = static_cast<uint32_t>(state.range(1)); 132 bool is_saturated_producer = mb_per_s == 0; 133 134 uint32_t message_count = kBufferSizeBytes / message_bytes; 135 uint32_t messages_per_s = mb_per_s * 1024 * 1024 / message_bytes; 136 uint32_t number_of_batches = 137 is_saturated_producer ? 0 : std::max(1u, message_count / messages_per_s); 138 139 auto* ds_config = trace_config.add_data_sources()->mutable_config(); 140 ds_config->set_name("android.perfetto.FakeProducer"); 141 ds_config->set_target_buffer(0); 142 ds_config->mutable_for_testing()->set_seed(kRandomSeed); 143 ds_config->mutable_for_testing()->set_message_count(message_count); 144 ds_config->mutable_for_testing()->set_message_size(message_bytes); 145 ds_config->mutable_for_testing()->set_max_messages_per_second(messages_per_s); 146 147 helper.StartTracing(trace_config); 148 helper.WaitForProducerEnabled(); 149 150 uint64_t wall_start_ns = static_cast<uint64_t>(base::GetWallTimeNs().count()); 151 uint64_t service_start_ns = 152 static_cast<uint64_t>(helper.service_thread()->GetThreadCPUTimeNs()); 153 uint64_t consumer_start_ns = 154 static_cast<uint64_t>(base::GetThreadCPUTimeNs().count()); 155 uint64_t read_time_taken_ns = 0; 156 157 uint64_t iterations = 0; 158 uint32_t counter = 0; 159 for (auto _ : state) { 160 auto cname = "produced.and.committed." + std::to_string(iterations++); 161 auto on_produced_and_committed = task_runner.CreateCheckpoint(cname); 162 producer->ProduceEventBatch(helper.WrapTask(on_produced_and_committed)); 163 164 if (is_saturated_producer) { 165 // If the producer is running in saturated mode, wait until it flushes 166 // data. 167 task_runner.RunUntilCheckpoint(cname); 168 169 // Then time how long it takes to read back the data. 170 int64_t start = base::GetWallTimeNs().count(); 171 helper.ReadData(counter); 172 helper.WaitForReadData(counter++); 173 read_time_taken_ns += 174 static_cast<uint64_t>(base::GetWallTimeNs().count() - start); 175 } else { 176 // If the producer is not running in saturated mode, every second the 177 // producer will send a batch of data over. Wait for a second before 178 // performing readback; do this for each batch the producer sends. 179 for (uint32_t i = 0; i < number_of_batches; i++) { 180 auto batch_cname = "batch.checkpoint." + std::to_string(counter); 181 auto batch_checkpoint = task_runner.CreateCheckpoint(batch_cname); 182 task_runner.PostDelayedTask(batch_checkpoint, 1000); 183 task_runner.RunUntilCheckpoint(batch_cname); 184 185 int64_t start = base::GetWallTimeNs().count(); 186 helper.ReadData(counter); 187 helper.WaitForReadData(counter++); 188 read_time_taken_ns += 189 static_cast<uint64_t>(base::GetWallTimeNs().count() - start); 190 } 191 } 192 } 193 uint64_t service_ns = 194 helper.service_thread()->GetThreadCPUTimeNs() - service_start_ns; 195 uint64_t consumer_ns = 196 static_cast<uint64_t>(base::GetThreadCPUTimeNs().count()) - 197 consumer_start_ns; 198 uint64_t wall_ns = 199 static_cast<uint64_t>(base::GetWallTimeNs().count()) - wall_start_ns; 200 201 state.counters["Ser CPU"] = benchmark::Counter(100.0 * service_ns / wall_ns); 202 state.counters["Ser ns/m"] = 203 benchmark::Counter(1.0 * service_ns / message_count); 204 state.counters["Con CPU"] = benchmark::Counter(100.0 * consumer_ns / wall_ns); 205 state.counters["Con Speed"] = 206 benchmark::Counter(iterations * 1000.0 * 1000 * 1000 * kBufferSizeBytes / 207 read_time_taken_ns); 208 } 209 210 void SaturateCpuProducerArgs(benchmark::internal::Benchmark* b) { 211 int min_message_count = 16; 212 int max_message_count = IsBenchmarkFunctionalOnly() ? 1024 : 1024 * 1024; 213 int min_payload = 8; 214 int max_payload = IsBenchmarkFunctionalOnly() ? 256 : 2048; 215 for (int count = min_message_count; count <= max_message_count; count *= 2) { 216 for (int bytes = min_payload; bytes <= max_payload; bytes *= 2) { 217 b->Args({count, bytes, 0 /* speed */}); 218 } 219 } 220 } 221 222 void ConstantRateProducerArgs(benchmark::internal::Benchmark* b) { 223 int message_count = IsBenchmarkFunctionalOnly() ? 2 * 1024 : 128 * 1024; 224 int min_speed = IsBenchmarkFunctionalOnly() ? 64 : 8; 225 int max_speed = 128; 226 for (int speed = min_speed; speed <= max_speed; speed *= 2) { 227 b->Args({message_count, 128, speed}); 228 b->Args({message_count, 256, speed}); 229 } 230 } 231 232 void SaturateCpuConsumerArgs(benchmark::internal::Benchmark* b) { 233 int min_payload = 8; 234 int max_payload = IsBenchmarkFunctionalOnly() ? 16 : 64 * 1024; 235 for (int bytes = min_payload; bytes <= max_payload; bytes *= 2) { 236 b->Args({bytes, 0 /* speed */}); 237 } 238 } 239 240 void ConstantRateConsumerArgs(benchmark::internal::Benchmark* b) { 241 int min_speed = IsBenchmarkFunctionalOnly() ? 128 : 1; 242 int max_speed = IsBenchmarkFunctionalOnly() ? 128 : 2; 243 for (int speed = min_speed; speed <= max_speed; speed *= 2) { 244 b->Args({2, speed}); 245 b->Args({4, speed}); 246 } 247 } 248 249 } // namespace 250 251 static void BM_EndToEnd_Producer_SaturateCpu(benchmark::State& state) { 252 BenchmarkProducer(state); 253 } 254 255 BENCHMARK(BM_EndToEnd_Producer_SaturateCpu) 256 ->Unit(benchmark::kMicrosecond) 257 ->UseRealTime() 258 ->Apply(SaturateCpuProducerArgs); 259 260 static void BM_EndToEnd_Producer_ConstantRate(benchmark::State& state) { 261 BenchmarkProducer(state); 262 } 263 264 BENCHMARK(BM_EndToEnd_Producer_ConstantRate) 265 ->Unit(benchmark::kMicrosecond) 266 ->UseRealTime() 267 ->Apply(ConstantRateProducerArgs); 268 269 static void BM_EndToEnd_Consumer_SaturateCpu(benchmark::State& state) { 270 BenchmarkConsumer(state); 271 } 272 273 BENCHMARK(BM_EndToEnd_Consumer_SaturateCpu) 274 ->Unit(benchmark::kMicrosecond) 275 ->UseRealTime() 276 ->Apply(SaturateCpuConsumerArgs); 277 278 static void BM_EndToEnd_Consumer_ConstantRate(benchmark::State& state) { 279 BenchmarkConsumer(state); 280 } 281 282 BENCHMARK(BM_EndToEnd_Consumer_ConstantRate) 283 ->Unit(benchmark::kMillisecond) 284 ->UseRealTime() 285 ->Apply(ConstantRateConsumerArgs); 286 287 } // namespace perfetto 288