1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Our goal is to measure the cost of various C++ atomic operations. 18 // Android doesn't really control those. But since some of these operations can be quite 19 // expensive, this may be useful input for development of higher level code. 20 // Expected mappings from C++ atomics to hardware primitives can be found at 21 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html . 22 23 #include <atomic> 24 #include <mutex> 25 26 #include <benchmark/benchmark.h> 27 #include "util.h" 28 29 // We time atomic operations separated by a volatile (not atomic!) increment. This ensures 30 // that the compiler emits memory instructions (e.g. load or store) prior to any fence or the 31 // like. That in turn ensures that the CPU has outstanding memory operations when the fence 32 // is executed. 33 34 // In most respects, we compute best case values. Since there is only one thread, there are no 35 // coherence misses. 36 37 // We assume that the compiler is not smart enough to optimize away fences in a single-threaded 38 // program. If that changes, we'll need to add a second thread. 39 40 static volatile unsigned counter; 41 42 std::atomic<int> test_loc(0); 43 44 static volatile unsigned sink; 45 46 static std::mutex mtx; 47 48 void BM_atomic_empty(benchmark::State& state) { 49 while (state.KeepRunning()) { 50 ++counter; 51 } 52 } 53 BIONIC_BENCHMARK(BM_atomic_empty); 54 55 static void BM_atomic_load_relaxed(benchmark::State& state) { 56 unsigned result = 0; 57 while (state.KeepRunning()) { 58 result += test_loc.load(std::memory_order_relaxed); 59 ++counter; 60 } 61 sink = result; 62 } 63 BIONIC_BENCHMARK(BM_atomic_load_relaxed); 64 65 static void BM_atomic_load_acquire(benchmark::State& state) { 66 unsigned result = 0; 67 while (state.KeepRunning()) { 68 result += test_loc.load(std::memory_order_acquire); 69 ++counter; 70 } 71 sink = result; 72 } 73 BIONIC_BENCHMARK(BM_atomic_load_acquire); 74 75 static void BM_atomic_store_release(benchmark::State& state) { 76 int i = counter; 77 while (state.KeepRunning()) { 78 test_loc.store(++i, std::memory_order_release); 79 ++counter; 80 } 81 } 82 BIONIC_BENCHMARK(BM_atomic_store_release); 83 84 static void BM_atomic_store_seq_cst(benchmark::State& state) { 85 int i = counter; 86 while (state.KeepRunning()) { 87 test_loc.store(++i, std::memory_order_seq_cst); 88 ++counter; 89 } 90 } 91 BIONIC_BENCHMARK(BM_atomic_store_seq_cst); 92 93 static void BM_atomic_fetch_add_relaxed(benchmark::State& state) { 94 unsigned result = 0; 95 while (state.KeepRunning()) { 96 result += test_loc.fetch_add(1, std::memory_order_relaxed); 97 ++counter; 98 } 99 sink = result; 100 } 101 BIONIC_BENCHMARK(BM_atomic_fetch_add_relaxed); 102 103 static void BM_atomic_fetch_add_seq_cst(benchmark::State& state) { 104 unsigned result = 0; 105 while (state.KeepRunning()) { 106 result += test_loc.fetch_add(1, std::memory_order_seq_cst); 107 ++counter; 108 } 109 sink = result; 110 } 111 BIONIC_BENCHMARK(BM_atomic_fetch_add_seq_cst); 112 113 // The fence benchmarks include a relaxed load to make it much harder to optimize away 114 // the fence. 115 116 static void BM_atomic_acquire_fence(benchmark::State& state) { 117 unsigned result = 0; 118 while (state.KeepRunning()) { 119 result += test_loc.load(std::memory_order_relaxed); 120 std::atomic_thread_fence(std::memory_order_acquire); 121 ++counter; 122 } 123 sink = result; 124 } 125 BIONIC_BENCHMARK(BM_atomic_acquire_fence); 126 127 static void BM_atomic_seq_cst_fence(benchmark::State& state) { 128 unsigned result = 0; 129 while (state.KeepRunning()) { 130 result += test_loc.load(std::memory_order_relaxed); 131 std::atomic_thread_fence(std::memory_order_seq_cst); 132 ++counter; 133 } 134 sink = result; 135 } 136 BIONIC_BENCHMARK(BM_atomic_seq_cst_fence); 137 138 // For comparison, also throw in a critical section version: 139 140 static void BM_atomic_fetch_add_cs(benchmark::State& state) { 141 unsigned result = 0; 142 while (state.KeepRunning()) { 143 { 144 std::lock_guard<std::mutex> _(mtx); 145 result += ++counter; 146 } 147 } 148 sink = result; 149 } 150 BIONIC_BENCHMARK(BM_atomic_fetch_add_cs); 151