1 // Copyright 2010 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This file is an internal atomic implementation, use atomicops.h instead. 6 // 7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. 8 9 #ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ 10 #define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ 11 12 #if defined(__QNXNTO__) 13 #include <sys/cpuinline.h> 14 #endif 15 16 namespace v8 { 17 namespace base { 18 19 // Memory barriers on ARM are funky, but the kernel is here to help: 20 // 21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at 22 // all on this architecture, or when targeting its machine code. 23 // 24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by 25 // writing a random value to a very specific coprocessor register. 26 // 27 // * On ARMv7, the "dmb" instruction is used to perform a full memory 28 // barrier (though writing to the co-processor will still work). 29 // However, on single core devices (e.g. Nexus One, or Nexus S), 30 // this instruction will take up to 200 ns, which is huge, even though 31 // it's completely un-needed on these devices. 32 // 33 // * There is no easy way to determine at runtime if the device is 34 // single or multi-core. However, the kernel provides a useful helper 35 // function at a fixed memory address (0xffff0fa0), which will always 36 // perform a memory barrier in the most efficient way. I.e. on single 37 // core devices, this is an empty function that exits immediately. 38 // On multi-core devices, it implements a full memory barrier. 39 // 40 // * This source could be compiled to ARMv5 machine code that runs on a 41 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers 42 // are needed for correct execution. Always call the kernel helper, even 43 // when targeting ARMv5TE. 44 // 45 46 inline void MemoryBarrier() { 47 #if defined(__linux__) || defined(__ANDROID__) 48 // Note: This is a function call, which is also an implicit compiler barrier. 49 typedef void (*KernelMemoryBarrierFunc)(); 50 ((KernelMemoryBarrierFunc)0xffff0fa0)(); 51 #elif defined(__QNXNTO__) 52 __cpu_membarrier(); 53 #else 54 #error MemoryBarrier() is not implemented on this platform. 55 #endif 56 } 57 58 // An ARM toolchain would only define one of these depending on which 59 // variant of the target architecture is being used. This tests against 60 // any known ARMv6 or ARMv7 variant, where it is possible to directly 61 // use ldrex/strex instructions to implement fast atomic operations. 62 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ 63 defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ 64 defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 65 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 66 defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__) 67 68 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 69 Atomic32 old_value, 70 Atomic32 new_value) { 71 Atomic32 prev_value; 72 int reloop; 73 do { 74 // The following is equivalent to: 75 // 76 // prev_value = LDREX(ptr) 77 // reloop = 0 78 // if (prev_value != old_value) 79 // reloop = STREX(ptr, new_value) 80 __asm__ __volatile__(" ldrex %0, [%3]\n" 81 " mov %1, #0\n" 82 " cmp %0, %4\n" 83 #ifdef __thumb2__ 84 " it eq\n" 85 #endif 86 " strexeq %1, %5, [%3]\n" 87 : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) 88 : "r"(ptr), "r"(old_value), "r"(new_value) 89 : "cc", "memory"); 90 } while (reloop != 0); 91 return prev_value; 92 } 93 94 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 95 Atomic32 old_value, 96 Atomic32 new_value) { 97 Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 98 MemoryBarrier(); 99 return result; 100 } 101 102 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 103 Atomic32 old_value, 104 Atomic32 new_value) { 105 MemoryBarrier(); 106 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 107 } 108 109 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 110 Atomic32 increment) { 111 Atomic32 value; 112 int reloop; 113 do { 114 // Equivalent to: 115 // 116 // value = LDREX(ptr) 117 // value += increment 118 // reloop = STREX(ptr, value) 119 // 120 __asm__ __volatile__(" ldrex %0, [%3]\n" 121 " add %0, %0, %4\n" 122 " strex %1, %0, [%3]\n" 123 : "=&r"(value), "=&r"(reloop), "+m"(*ptr) 124 : "r"(ptr), "r"(increment) 125 : "cc", "memory"); 126 } while (reloop); 127 return value; 128 } 129 130 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 131 Atomic32 increment) { 132 // TODO(digit): Investigate if it's possible to implement this with 133 // a single MemoryBarrier() operation between the LDREX and STREX. 134 // See http://crbug.com/246514 135 MemoryBarrier(); 136 Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); 137 MemoryBarrier(); 138 return result; 139 } 140 141 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 142 Atomic32 new_value) { 143 Atomic32 old_value; 144 int reloop; 145 do { 146 // old_value = LDREX(ptr) 147 // reloop = STREX(ptr, new_value) 148 __asm__ __volatile__(" ldrex %0, [%3]\n" 149 " strex %1, %4, [%3]\n" 150 : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) 151 : "r"(ptr), "r"(new_value) 152 : "cc", "memory"); 153 } while (reloop != 0); 154 return old_value; 155 } 156 157 // This tests against any known ARMv5 variant. 158 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ 159 defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) 160 161 // The kernel also provides a helper function to perform an atomic 162 // compare-and-swap operation at the hard-wired address 0xffff0fc0. 163 // On ARMv5, this is implemented by a special code path that the kernel 164 // detects and treats specially when thread pre-emption happens. 165 // On ARMv6 and higher, it uses LDREX/STREX instructions instead. 166 // 167 // Note that this always perform a full memory barrier, there is no 168 // need to add calls MemoryBarrier() before or after it. It also 169 // returns 0 on success, and 1 on exit. 170 // 171 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS 172 // use newer kernel revisions, so this should not be a concern. 173 namespace { 174 175 inline int LinuxKernelCmpxchg(Atomic32 old_value, 176 Atomic32 new_value, 177 volatile Atomic32* ptr) { 178 typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); 179 return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); 180 } 181 182 } // namespace 183 184 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 185 Atomic32 old_value, 186 Atomic32 new_value) { 187 Atomic32 prev_value; 188 for (;;) { 189 prev_value = *ptr; 190 if (prev_value != old_value) 191 return prev_value; 192 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) 193 return old_value; 194 } 195 } 196 197 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 198 Atomic32 new_value) { 199 Atomic32 old_value; 200 do { 201 old_value = *ptr; 202 } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); 203 return old_value; 204 } 205 206 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 207 Atomic32 increment) { 208 return Barrier_AtomicIncrement(ptr, increment); 209 } 210 211 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 212 Atomic32 increment) { 213 for (;;) { 214 // Atomic exchange the old value with an incremented one. 215 Atomic32 old_value = *ptr; 216 Atomic32 new_value = old_value + increment; 217 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { 218 // The exchange took place as expected. 219 return new_value; 220 } 221 // Otherwise, *ptr changed mid-loop and we need to retry. 222 } 223 } 224 225 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 226 Atomic32 old_value, 227 Atomic32 new_value) { 228 Atomic32 prev_value; 229 for (;;) { 230 prev_value = *ptr; 231 if (prev_value != old_value) { 232 // Always ensure acquire semantics. 233 MemoryBarrier(); 234 return prev_value; 235 } 236 if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) 237 return old_value; 238 } 239 } 240 241 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 242 Atomic32 old_value, 243 Atomic32 new_value) { 244 // This could be implemented as: 245 // MemoryBarrier(); 246 // return NoBarrier_CompareAndSwap(); 247 // 248 // But would use 3 barriers per succesful CAS. To save performance, 249 // use Acquire_CompareAndSwap(). Its implementation guarantees that: 250 // - A succesful swap uses only 2 barriers (in the kernel helper). 251 // - An early return due to (prev_value != old_value) performs 252 // a memory barrier with no store, which is equivalent to the 253 // generic implementation above. 254 return Acquire_CompareAndSwap(ptr, old_value, new_value); 255 } 256 257 #else 258 # error "Your CPU's ARM architecture is not supported yet" 259 #endif 260 261 // NOTE: Atomicity of the following load and store operations is only 262 // guaranteed in case of 32-bit alignement of |ptr| values. 263 264 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { 265 *ptr = value; 266 } 267 268 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 269 *ptr = value; 270 MemoryBarrier(); 271 } 272 273 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { 274 MemoryBarrier(); 275 *ptr = value; 276 } 277 278 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } 279 280 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { 281 Atomic32 value = *ptr; 282 MemoryBarrier(); 283 return value; 284 } 285 286 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { 287 MemoryBarrier(); 288 return *ptr; 289 } 290 291 // Byte accessors. 292 293 inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) { 294 *ptr = value; 295 } 296 297 inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; } 298 299 } } // namespace v8::base 300 301 #endif // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ 302