1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This file is an internal atomic implementation, use base/atomicops.h instead. 6 7 #ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ 8 #define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ 9 #pragma once 10 11 // This struct is not part of the public API of this module; clients may not 12 // use it. 13 // Features of this x86. Values may not be correct before main() is run, 14 // but are set conservatively. 15 struct AtomicOps_x86CPUFeatureStruct { 16 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence 17 // after acquire compare-and-swap. 18 bool has_sse2; // Processor has SSE2. 19 }; 20 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; 21 22 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") 23 24 namespace base { 25 namespace subtle { 26 27 // 32-bit low-level operations on any platform. 28 29 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 30 Atomic32 old_value, 31 Atomic32 new_value) { 32 Atomic32 prev; 33 __asm__ __volatile__("lock; cmpxchgl %1,%2" 34 : "=a" (prev) 35 : "q" (new_value), "m" (*ptr), "0" (old_value) 36 : "memory"); 37 return prev; 38 } 39 40 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 41 Atomic32 new_value) { 42 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. 43 : "=r" (new_value) 44 : "m" (*ptr), "0" (new_value) 45 : "memory"); 46 return new_value; // Now it's the previous value. 47 } 48 49 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 50 Atomic32 increment) { 51 Atomic32 temp = increment; 52 __asm__ __volatile__("lock; xaddl %0,%1" 53 : "+r" (temp), "+m" (*ptr) 54 : : "memory"); 55 // temp now holds the old value of *ptr 56 return temp + increment; 57 } 58 59 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 60 Atomic32 increment) { 61 Atomic32 temp = increment; 62 __asm__ __volatile__("lock; xaddl %0,%1" 63 : "+r" (temp), "+m" (*ptr) 64 : : "memory"); 65 // temp now holds the old value of *ptr 66 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 67 __asm__ __volatile__("lfence" : : : "memory"); 68 } 69 return temp + increment; 70 } 71 72 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 73 Atomic32 old_value, 74 Atomic32 new_value) { 75 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 76 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 77 __asm__ __volatile__("lfence" : : : "memory"); 78 } 79 return x; 80 } 81 82 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 83 Atomic32 old_value, 84 Atomic32 new_value) { 85 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 86 } 87 88 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { 89 *ptr = value; 90 } 91 92 #if defined(__x86_64__) 93 94 // 64-bit implementations of memory barrier can be simpler, because it 95 // "mfence" is guaranteed to exist. 96 inline void MemoryBarrier() { 97 __asm__ __volatile__("mfence" : : : "memory"); 98 } 99 100 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 101 *ptr = value; 102 MemoryBarrier(); 103 } 104 105 #else 106 107 inline void MemoryBarrier() { 108 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 109 __asm__ __volatile__("mfence" : : : "memory"); 110 } else { // mfence is faster but not present on PIII 111 Atomic32 x = 0; 112 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII 113 } 114 } 115 116 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 117 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 118 *ptr = value; 119 __asm__ __volatile__("mfence" : : : "memory"); 120 } else { 121 NoBarrier_AtomicExchange(ptr, value); 122 // acts as a barrier on PIII 123 } 124 } 125 #endif 126 127 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { 128 ATOMICOPS_COMPILER_BARRIER(); 129 *ptr = value; // An x86 store acts as a release barrier. 130 // See comments in Atomic64 version of Release_Store(), below. 131 } 132 133 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { 134 return *ptr; 135 } 136 137 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { 138 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. 139 // See comments in Atomic64 version of Release_Store(), below. 140 ATOMICOPS_COMPILER_BARRIER(); 141 return value; 142 } 143 144 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { 145 MemoryBarrier(); 146 return *ptr; 147 } 148 149 #if defined(__x86_64__) 150 151 // 64-bit low-level operations on 64-bit platform. 152 153 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 154 Atomic64 old_value, 155 Atomic64 new_value) { 156 Atomic64 prev; 157 __asm__ __volatile__("lock; cmpxchgq %1,%2" 158 : "=a" (prev) 159 : "q" (new_value), "m" (*ptr), "0" (old_value) 160 : "memory"); 161 return prev; 162 } 163 164 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 165 Atomic64 new_value) { 166 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. 167 : "=r" (new_value) 168 : "m" (*ptr), "0" (new_value) 169 : "memory"); 170 return new_value; // Now it's the previous value. 171 } 172 173 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 174 Atomic64 increment) { 175 Atomic64 temp = increment; 176 __asm__ __volatile__("lock; xaddq %0,%1" 177 : "+r" (temp), "+m" (*ptr) 178 : : "memory"); 179 // temp now contains the previous value of *ptr 180 return temp + increment; 181 } 182 183 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 184 Atomic64 increment) { 185 Atomic64 temp = increment; 186 __asm__ __volatile__("lock; xaddq %0,%1" 187 : "+r" (temp), "+m" (*ptr) 188 : : "memory"); 189 // temp now contains the previous value of *ptr 190 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 191 __asm__ __volatile__("lfence" : : : "memory"); 192 } 193 return temp + increment; 194 } 195 196 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 197 *ptr = value; 198 } 199 200 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 201 *ptr = value; 202 MemoryBarrier(); 203 } 204 205 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 206 ATOMICOPS_COMPILER_BARRIER(); 207 208 *ptr = value; // An x86 store acts as a release barrier 209 // for current AMD/Intel chips as of Jan 2008. 210 // See also Acquire_Load(), below. 211 212 // When new chips come out, check: 213 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: 214 // System Programming Guide, Chatper 7: Multiple-processor management, 215 // Section 7.2, Memory Ordering. 216 // Last seen at: 217 // http://developer.intel.com/design/pentium4/manuals/index_new.htm 218 // 219 // x86 stores/loads fail to act as barriers for a few instructions (clflush 220 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are 221 // not generated by the compiler, and are rare. Users of these instructions 222 // need to know about cache behaviour in any case since all of these involve 223 // either flushing cache lines or non-temporal cache hints. 224 } 225 226 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 227 return *ptr; 228 } 229 230 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 231 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, 232 // for current AMD/Intel chips as of Jan 2008. 233 // See also Release_Store(), above. 234 ATOMICOPS_COMPILER_BARRIER(); 235 return value; 236 } 237 238 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 239 MemoryBarrier(); 240 return *ptr; 241 } 242 243 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, 244 Atomic64 old_value, 245 Atomic64 new_value) { 246 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 247 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 248 __asm__ __volatile__("lfence" : : : "memory"); 249 } 250 return x; 251 } 252 253 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, 254 Atomic64 old_value, 255 Atomic64 new_value) { 256 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 257 } 258 259 #endif // defined(__x86_64__) 260 261 } // namespace base::subtle 262 } // namespace base 263 264 #undef ATOMICOPS_COMPILER_BARRIER 265 266 #endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ 267