1 // Copyright 2010 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 // This file is an internal atomic implementation, use atomicops.h instead. 29 30 #ifndef V8_ATOMICOPS_INTERNALS_X86_GCC_H_ 31 #define V8_ATOMICOPS_INTERNALS_X86_GCC_H_ 32 33 namespace v8 { 34 namespace internal { 35 36 // This struct is not part of the public API of this module; clients may not 37 // use it. 38 // Features of this x86. Values may not be correct before main() is run, 39 // but are set conservatively. 40 struct AtomicOps_x86CPUFeatureStruct { 41 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence 42 // after acquire compare-and-swap. 43 bool has_sse2; // Processor has SSE2. 44 }; 45 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; 46 47 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") 48 49 // 32-bit low-level operations on any platform. 50 51 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 52 Atomic32 old_value, 53 Atomic32 new_value) { 54 Atomic32 prev; 55 __asm__ __volatile__("lock; cmpxchgl %1,%2" 56 : "=a" (prev) 57 : "q" (new_value), "m" (*ptr), "0" (old_value) 58 : "memory"); 59 return prev; 60 } 61 62 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 63 Atomic32 new_value) { 64 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. 65 : "=r" (new_value) 66 : "m" (*ptr), "0" (new_value) 67 : "memory"); 68 return new_value; // Now it's the previous value. 69 } 70 71 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 72 Atomic32 increment) { 73 Atomic32 temp = increment; 74 __asm__ __volatile__("lock; xaddl %0,%1" 75 : "+r" (temp), "+m" (*ptr) 76 : : "memory"); 77 // temp now holds the old value of *ptr 78 return temp + increment; 79 } 80 81 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 82 Atomic32 increment) { 83 Atomic32 temp = increment; 84 __asm__ __volatile__("lock; xaddl %0,%1" 85 : "+r" (temp), "+m" (*ptr) 86 : : "memory"); 87 // temp now holds the old value of *ptr 88 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 89 __asm__ __volatile__("lfence" : : : "memory"); 90 } 91 return temp + increment; 92 } 93 94 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 95 Atomic32 old_value, 96 Atomic32 new_value) { 97 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 98 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 99 __asm__ __volatile__("lfence" : : : "memory"); 100 } 101 return x; 102 } 103 104 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 105 Atomic32 old_value, 106 Atomic32 new_value) { 107 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 108 } 109 110 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { 111 *ptr = value; 112 } 113 114 #if defined(__x86_64__) 115 116 // 64-bit implementations of memory barrier can be simpler, because it 117 // "mfence" is guaranteed to exist. 118 inline void MemoryBarrier() { 119 __asm__ __volatile__("mfence" : : : "memory"); 120 } 121 122 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 123 *ptr = value; 124 MemoryBarrier(); 125 } 126 127 #else 128 129 inline void MemoryBarrier() { 130 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 131 __asm__ __volatile__("mfence" : : : "memory"); 132 } else { // mfence is faster but not present on PIII 133 Atomic32 x = 0; 134 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII 135 } 136 } 137 138 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 139 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 140 *ptr = value; 141 __asm__ __volatile__("mfence" : : : "memory"); 142 } else { 143 NoBarrier_AtomicExchange(ptr, value); 144 // acts as a barrier on PIII 145 } 146 } 147 #endif 148 149 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { 150 ATOMICOPS_COMPILER_BARRIER(); 151 *ptr = value; // An x86 store acts as a release barrier. 152 // See comments in Atomic64 version of Release_Store(), below. 153 } 154 155 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { 156 return *ptr; 157 } 158 159 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { 160 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. 161 // See comments in Atomic64 version of Release_Store(), below. 162 ATOMICOPS_COMPILER_BARRIER(); 163 return value; 164 } 165 166 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { 167 MemoryBarrier(); 168 return *ptr; 169 } 170 171 #if defined(__x86_64__) 172 173 // 64-bit low-level operations on 64-bit platform. 174 175 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 176 Atomic64 old_value, 177 Atomic64 new_value) { 178 Atomic64 prev; 179 __asm__ __volatile__("lock; cmpxchgq %1,%2" 180 : "=a" (prev) 181 : "q" (new_value), "m" (*ptr), "0" (old_value) 182 : "memory"); 183 return prev; 184 } 185 186 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 187 Atomic64 new_value) { 188 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. 189 : "=r" (new_value) 190 : "m" (*ptr), "0" (new_value) 191 : "memory"); 192 return new_value; // Now it's the previous value. 193 } 194 195 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 196 Atomic64 increment) { 197 Atomic64 temp = increment; 198 __asm__ __volatile__("lock; xaddq %0,%1" 199 : "+r" (temp), "+m" (*ptr) 200 : : "memory"); 201 // temp now contains the previous value of *ptr 202 return temp + increment; 203 } 204 205 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 206 Atomic64 increment) { 207 Atomic64 temp = increment; 208 __asm__ __volatile__("lock; xaddq %0,%1" 209 : "+r" (temp), "+m" (*ptr) 210 : : "memory"); 211 // temp now contains the previous value of *ptr 212 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 213 __asm__ __volatile__("lfence" : : : "memory"); 214 } 215 return temp + increment; 216 } 217 218 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 219 *ptr = value; 220 } 221 222 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 223 *ptr = value; 224 MemoryBarrier(); 225 } 226 227 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 228 ATOMICOPS_COMPILER_BARRIER(); 229 230 *ptr = value; // An x86 store acts as a release barrier 231 // for current AMD/Intel chips as of Jan 2008. 232 // See also Acquire_Load(), below. 233 234 // When new chips come out, check: 235 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: 236 // System Programming Guide, Chatper 7: Multiple-processor management, 237 // Section 7.2, Memory Ordering. 238 // Last seen at: 239 // http://developer.intel.com/design/pentium4/manuals/index_new.htm 240 // 241 // x86 stores/loads fail to act as barriers for a few instructions (clflush 242 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are 243 // not generated by the compiler, and are rare. Users of these instructions 244 // need to know about cache behaviour in any case since all of these involve 245 // either flushing cache lines or non-temporal cache hints. 246 } 247 248 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 249 return *ptr; 250 } 251 252 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 253 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, 254 // for current AMD/Intel chips as of Jan 2008. 255 // See also Release_Store(), above. 256 ATOMICOPS_COMPILER_BARRIER(); 257 return value; 258 } 259 260 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 261 MemoryBarrier(); 262 return *ptr; 263 } 264 265 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, 266 Atomic64 old_value, 267 Atomic64 new_value) { 268 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 269 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 270 __asm__ __volatile__("lfence" : : : "memory"); 271 } 272 return x; 273 } 274 275 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, 276 Atomic64 old_value, 277 Atomic64 new_value) { 278 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 279 } 280 281 #endif // defined(__x86_64__) 282 283 } } // namespace v8::internal 284 285 #undef ATOMICOPS_COMPILER_BARRIER 286 287 #endif // V8_ATOMICOPS_INTERNALS_X86_GCC_H_ 288