1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Our goal is to measure the cost of various C++ atomic operations. 18 // Android doesn't really control those. But since some of these operations can be quite 19 // expensive, this may be useful input for development of higher level code. 20 // Expected mappings from C++ atomics to hardware primitives can be found at 21 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html . 22 23 #include <benchmark/benchmark.h> 24 #include <atomic> 25 #include <mutex> 26 27 // We time atomic operations separated by a volatile (not atomic!) increment. This ensures 28 // that the compiler emits memory instructions (e.g. load or store) prior to any fence or the 29 // like. That in turn ensures that the CPU has outstanding memory operations when the fence 30 // is executed. 31 32 // In most respects, we compute best case values. Since there is only one thread, there are no 33 // coherence misses. 34 35 // We assume that the compiler is not smart enough to optimize away fences in a single-threaded 36 // program. If that changes, we'll need to add a second thread. 37 38 volatile unsigned counter; 39 40 std::atomic<int> test_loc(0); 41 42 volatile unsigned sink; 43 44 std::mutex mtx; 45 46 void BM_empty(benchmark::State& state) { 47 while (state.KeepRunning()) { 48 ++counter; 49 } 50 } 51 BENCHMARK(BM_empty); 52 53 static void BM_load_relaxed(benchmark::State& state) { 54 unsigned result = 0; 55 while (state.KeepRunning()) { 56 result += test_loc.load(std::memory_order_relaxed); 57 ++counter; 58 } 59 sink = result; 60 } 61 BENCHMARK(BM_load_relaxed); 62 63 static void BM_load_acquire(benchmark::State& state) { 64 unsigned result = 0; 65 while (state.KeepRunning()) { 66 result += test_loc.load(std::memory_order_acquire); 67 ++counter; 68 } 69 sink = result; 70 } 71 BENCHMARK(BM_load_acquire); 72 73 static void BM_store_release(benchmark::State& state) { 74 int i = counter; 75 while (state.KeepRunning()) { 76 test_loc.store(++i, std::memory_order_release); 77 ++counter; 78 } 79 } 80 BENCHMARK(BM_store_release); 81 82 static void BM_store_seq_cst(benchmark::State& state) { 83 int i = counter; 84 while (state.KeepRunning()) { 85 test_loc.store(++i, std::memory_order_seq_cst); 86 ++counter; 87 } 88 } 89 BENCHMARK(BM_store_seq_cst); 90 91 static void BM_fetch_add_relaxed(benchmark::State& state) { 92 unsigned result = 0; 93 while (state.KeepRunning()) { 94 result += test_loc.fetch_add(1, std::memory_order_relaxed); 95 ++counter; 96 } 97 sink = result; 98 } 99 BENCHMARK(BM_fetch_add_relaxed); 100 101 static void BM_fetch_add_seq_cst(benchmark::State& state) { 102 unsigned result = 0; 103 while (state.KeepRunning()) { 104 result += test_loc.fetch_add(1, std::memory_order_seq_cst); 105 ++counter; 106 } 107 sink = result; 108 } 109 BENCHMARK(BM_fetch_add_seq_cst); 110 111 // The fence benchmarks include a relaxed load to make it much harder to optimize away 112 // the fence. 113 114 static void BM_acquire_fence(benchmark::State& state) { 115 unsigned result = 0; 116 while (state.KeepRunning()) { 117 result += test_loc.load(std::memory_order_relaxed); 118 std::atomic_thread_fence(std::memory_order_acquire); 119 ++counter; 120 } 121 sink = result; 122 } 123 BENCHMARK(BM_acquire_fence); 124 125 static void BM_seq_cst_fence(benchmark::State& state) { 126 unsigned result = 0; 127 while (state.KeepRunning()) { 128 result += test_loc.load(std::memory_order_relaxed); 129 std::atomic_thread_fence(std::memory_order_seq_cst); 130 ++counter; 131 } 132 sink = result; 133 } 134 BENCHMARK(BM_seq_cst_fence); 135 136 // For comparison, also throw in a critical section version: 137 138 static void BM_fetch_add_cs(benchmark::State& state) { 139 unsigned result = 0; 140 while (state.KeepRunning()) { 141 { 142 std::lock_guard<std::mutex> _(mtx); 143 result += ++counter; 144 } 145 } 146 sink = result; 147 } 148 BENCHMARK(BM_fetch_add_cs); 149