1 /* Copyright (c) 2006, Google Inc. 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * --- 31 * Author: Sanjay Ghemawat 32 */ 33 34 // Implementation of atomic operations for x86. This file should not 35 // be included directly. Clients should instead include 36 // "base/atomicops.h". 37 38 #ifndef BASE_ATOMICOPS_INTERNALS_X86_H_ 39 #define BASE_ATOMICOPS_INTERNALS_X86_H_ 40 41 typedef int32_t Atomic32; 42 #define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* 43 44 45 // NOTE(vchen): x86 does not need to define AtomicWordCastType, because it 46 // already matches Atomic32 or Atomic64, depending on the platform. 47 48 49 // This struct is not part of the public API of this module; clients may not 50 // use it. 51 // Features of this x86. Values may not be correct before main() is run, 52 // but are set conservatively. 53 struct AtomicOps_x86CPUFeatureStruct { 54 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence 55 // after acquire compare-and-swap. 56 bool has_sse2; // Processor has SSE2. 57 bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. 58 }; 59 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; 60 61 62 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") 63 64 65 namespace base { 66 namespace subtle { 67 68 typedef int64_t Atomic64; 69 70 // 32-bit low-level operations on any platform. 71 72 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 73 Atomic32 old_value, 74 Atomic32 new_value) { 75 Atomic32 prev; 76 __asm__ __volatile__("lock; cmpxchgl %1,%2" 77 : "=a" (prev) 78 : "q" (new_value), "m" (*ptr), "0" (old_value) 79 : "memory"); 80 return prev; 81 } 82 83 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 84 Atomic32 new_value) { 85 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. 86 : "=r" (new_value) 87 : "m" (*ptr), "0" (new_value) 88 : "memory"); 89 return new_value; // Now it's the previous value. 90 } 91 92 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 93 Atomic32 increment) { 94 Atomic32 temp = increment; 95 __asm__ __volatile__("lock; xaddl %0,%1" 96 : "+r" (temp), "+m" (*ptr) 97 : : "memory"); 98 // temp now holds the old value of *ptr 99 return temp + increment; 100 } 101 102 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 103 Atomic32 increment) { 104 Atomic32 temp = increment; 105 __asm__ __volatile__("lock; xaddl %0,%1" 106 : "+r" (temp), "+m" (*ptr) 107 : : "memory"); 108 // temp now holds the old value of *ptr 109 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 110 __asm__ __volatile__("lfence" : : : "memory"); 111 } 112 return temp + increment; 113 } 114 115 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 116 Atomic32 old_value, 117 Atomic32 new_value) { 118 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 119 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 120 __asm__ __volatile__("lfence" : : : "memory"); 121 } 122 return x; 123 } 124 125 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 126 Atomic32 old_value, 127 Atomic32 new_value) { 128 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 129 } 130 131 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { 132 *ptr = value; 133 } 134 135 #if defined(__x86_64__) 136 137 // 64-bit implementations of memory barrier can be simpler, because it 138 // "mfence" is guaranteed to exist. 139 inline void MemoryBarrier() { 140 __asm__ __volatile__("mfence" : : : "memory"); 141 } 142 143 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 144 *ptr = value; 145 MemoryBarrier(); 146 } 147 148 #else 149 150 inline void MemoryBarrier() { 151 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 152 __asm__ __volatile__("mfence" : : : "memory"); 153 } else { // mfence is faster but not present on PIII 154 Atomic32 x = 0; 155 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII 156 } 157 } 158 159 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 160 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 161 *ptr = value; 162 __asm__ __volatile__("mfence" : : : "memory"); 163 } else { 164 NoBarrier_AtomicExchange(ptr, value); 165 // acts as a barrier on PIII 166 } 167 } 168 #endif 169 170 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { 171 ATOMICOPS_COMPILER_BARRIER(); 172 *ptr = value; // An x86 store acts as a release barrier. 173 // See comments in Atomic64 version of Release_Store(), below. 174 } 175 176 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { 177 return *ptr; 178 } 179 180 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { 181 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. 182 // See comments in Atomic64 version of Release_Store(), below. 183 ATOMICOPS_COMPILER_BARRIER(); 184 return value; 185 } 186 187 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { 188 MemoryBarrier(); 189 return *ptr; 190 } 191 192 #if defined(__x86_64__) 193 194 // 64-bit low-level operations on 64-bit platform. 195 196 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 197 Atomic64 old_value, 198 Atomic64 new_value) { 199 Atomic64 prev; 200 __asm__ __volatile__("lock; cmpxchgq %1,%2" 201 : "=a" (prev) 202 : "q" (new_value), "m" (*ptr), "0" (old_value) 203 : "memory"); 204 return prev; 205 } 206 207 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 208 Atomic64 new_value) { 209 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. 210 : "=r" (new_value) 211 : "m" (*ptr), "0" (new_value) 212 : "memory"); 213 return new_value; // Now it's the previous value. 214 } 215 216 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 217 Atomic64 increment) { 218 Atomic64 temp = increment; 219 __asm__ __volatile__("lock; xaddq %0,%1" 220 : "+r" (temp), "+m" (*ptr) 221 : : "memory"); 222 // temp now contains the previous value of *ptr 223 return temp + increment; 224 } 225 226 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 227 Atomic64 increment) { 228 Atomic64 temp = increment; 229 __asm__ __volatile__("lock; xaddq %0,%1" 230 : "+r" (temp), "+m" (*ptr) 231 : : "memory"); 232 // temp now contains the previous value of *ptr 233 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 234 __asm__ __volatile__("lfence" : : : "memory"); 235 } 236 return temp + increment; 237 } 238 239 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 240 *ptr = value; 241 } 242 243 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 244 *ptr = value; 245 MemoryBarrier(); 246 } 247 248 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 249 ATOMICOPS_COMPILER_BARRIER(); 250 251 *ptr = value; // An x86 store acts as a release barrier 252 // for current AMD/Intel chips as of Jan 2008. 253 // See also Acquire_Load(), below. 254 255 // When new chips come out, check: 256 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: 257 // System Programming Guide, Chatper 7: Multiple-processor management, 258 // Section 7.2, Memory Ordering. 259 // Last seen at: 260 // http://developer.intel.com/design/pentium4/manuals/index_new.htm 261 // 262 // x86 stores/loads fail to act as barriers for a few instructions (clflush 263 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are 264 // not generated by the compiler, and are rare. Users of these instructions 265 // need to know about cache behaviour in any case since all of these involve 266 // either flushing cache lines or non-temporal cache hints. 267 } 268 269 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 270 return *ptr; 271 } 272 273 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 274 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, 275 // for current AMD/Intel chips as of Jan 2008. 276 // See also Release_Store(), above. 277 ATOMICOPS_COMPILER_BARRIER(); 278 return value; 279 } 280 281 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 282 MemoryBarrier(); 283 return *ptr; 284 } 285 286 #else // defined(__x86_64__) 287 288 // 64-bit low-level operations on 32-bit platform. 289 290 #if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) 291 // For compilers older than gcc 4.1, we use inline asm. 292 // 293 // Potential pitfalls: 294 // 295 // 1. %ebx points to Global offset table (GOT) with -fPIC. 296 // We need to preserve this register. 297 // 2. When explicit registers are used in inline asm, the 298 // compiler may not be aware of it and might try to reuse 299 // the same register for another argument which has constraints 300 // that allow it ("r" for example). 301 302 inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, 303 Atomic64 old_value, 304 Atomic64 new_value) { 305 Atomic64 prev; 306 __asm__ __volatile__("push %%ebx\n\t" 307 "movl (%3), %%ebx\n\t" // Move 64-bit new_value into 308 "movl 4(%3), %%ecx\n\t" // ecx:ebx 309 "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same 310 "pop %%ebx\n\t" 311 : "=A" (prev) // as contents of ptr: 312 : "D" (ptr), // ecx:ebx => ptr 313 "0" (old_value), // else: 314 "S" (&new_value) // old *ptr => edx:eax 315 : "memory", "%ecx"); 316 return prev; 317 } 318 #endif // Compiler < gcc-4.1 319 320 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 321 Atomic64 old_val, 322 Atomic64 new_val) { 323 return __sync_val_compare_and_swap(ptr, old_val, new_val); 324 } 325 326 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 327 Atomic64 new_val) { 328 Atomic64 old_val; 329 330 do { 331 old_val = *ptr; 332 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); 333 334 return old_val; 335 } 336 337 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 338 Atomic64 increment) { 339 Atomic64 old_val, new_val; 340 341 do { 342 old_val = *ptr; 343 new_val = old_val + increment; 344 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); 345 346 return old_val + increment; 347 } 348 349 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 350 Atomic64 increment) { 351 Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); 352 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 353 __asm__ __volatile__("lfence" : : : "memory"); 354 } 355 return new_val; 356 } 357 358 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 359 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic 360 "movq %%mm0, %0\n\t" // moves (ptr could be read-only) 361 "emms\n\t" // Empty mmx state/Reset FP regs 362 : "=m" (*ptr) 363 : "m" (value) 364 : // mark the FP stack and mmx registers as clobbered 365 "st", "st(1)", "st(2)", "st(3)", "st(4)", 366 "st(5)", "st(6)", "st(7)", "mm0", "mm1", 367 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); 368 } 369 370 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 371 NoBarrier_Store(ptr, value); 372 MemoryBarrier(); 373 } 374 375 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 376 ATOMICOPS_COMPILER_BARRIER(); 377 NoBarrier_Store(ptr, value); 378 } 379 380 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 381 Atomic64 value; 382 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic 383 "movq %%mm0, %0\n\t" // moves (ptr could be read-only) 384 "emms\n\t" // Empty mmx state/Reset FP regs 385 : "=m" (value) 386 : "m" (*ptr) 387 : // mark the FP stack and mmx registers as clobbered 388 "st", "st(1)", "st(2)", "st(3)", "st(4)", 389 "st(5)", "st(6)", "st(7)", "mm0", "mm1", 390 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); 391 return value; 392 } 393 394 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 395 Atomic64 value = NoBarrier_Load(ptr); 396 ATOMICOPS_COMPILER_BARRIER(); 397 return value; 398 } 399 400 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 401 MemoryBarrier(); 402 return NoBarrier_Load(ptr); 403 } 404 405 #endif // defined(__x86_64__) 406 407 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, 408 Atomic64 old_value, 409 Atomic64 new_value) { 410 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 411 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 412 __asm__ __volatile__("lfence" : : : "memory"); 413 } 414 return x; 415 } 416 417 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, 418 Atomic64 old_value, 419 Atomic64 new_value) { 420 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 421 } 422 423 } // namespace base::subtle 424 } // namespace base 425 426 #undef ATOMICOPS_COMPILER_BARRIER 427 428 #endif // BASE_ATOMICOPS_INTERNALS_X86_H_ 429