Home | History | Annotate | Download | only in base
      1 /* Copyright (c) 2006, Google Inc.
      2  * All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  *
     30  * ---
     31  * Author: Sanjay Ghemawat
     32  */
     33 
     34 // Implementation of atomic operations for x86.  This file should not
     35 // be included directly.  Clients should instead include
     36 // "base/atomicops.h".
     37 
     38 #ifndef BASE_ATOMICOPS_INTERNALS_X86_H_
     39 #define BASE_ATOMICOPS_INTERNALS_X86_H_
     40 
     41 typedef int32_t Atomic32;
     42 #define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
     43 
     44 
     45 // NOTE(vchen): x86 does not need to define AtomicWordCastType, because it
     46 // already matches Atomic32 or Atomic64, depending on the platform.
     47 
     48 
     49 // This struct is not part of the public API of this module; clients may not
     50 // use it.
     51 // Features of this x86.  Values may not be correct before main() is run,
     52 // but are set conservatively.
     53 struct AtomicOps_x86CPUFeatureStruct {
     54   bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
     55                             // after acquire compare-and-swap.
     56   bool has_sse2;            // Processor has SSE2.
     57   bool has_cmpxchg16b;      // Processor supports cmpxchg16b instruction.
     58 };
     59 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
     60 
     61 
     62 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
     63 
     64 
     65 namespace base {
     66 namespace subtle {
     67 
     68 typedef int64_t Atomic64;
     69 
     70 // 32-bit low-level operations on any platform.
     71 
     72 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
     73                                          Atomic32 old_value,
     74                                          Atomic32 new_value) {
     75   Atomic32 prev;
     76   __asm__ __volatile__("lock; cmpxchgl %1,%2"
     77                        : "=a" (prev)
     78                        : "q" (new_value), "m" (*ptr), "0" (old_value)
     79                        : "memory");
     80   return prev;
     81 }
     82 
     83 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
     84                                          Atomic32 new_value) {
     85   __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
     86                        : "=r" (new_value)
     87                        : "m" (*ptr), "0" (new_value)
     88                        : "memory");
     89   return new_value;  // Now it's the previous value.
     90 }
     91 
     92 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
     93                                           Atomic32 increment) {
     94   Atomic32 temp = increment;
     95   __asm__ __volatile__("lock; xaddl %0,%1"
     96                        : "+r" (temp), "+m" (*ptr)
     97                        : : "memory");
     98   // temp now holds the old value of *ptr
     99   return temp + increment;
    100 }
    101 
    102 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
    103                                         Atomic32 increment) {
    104   Atomic32 temp = increment;
    105   __asm__ __volatile__("lock; xaddl %0,%1"
    106                        : "+r" (temp), "+m" (*ptr)
    107                        : : "memory");
    108   // temp now holds the old value of *ptr
    109   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
    110     __asm__ __volatile__("lfence" : : : "memory");
    111   }
    112   return temp + increment;
    113 }
    114 
    115 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
    116                                        Atomic32 old_value,
    117                                        Atomic32 new_value) {
    118   Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
    119   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
    120     __asm__ __volatile__("lfence" : : : "memory");
    121   }
    122   return x;
    123 }
    124 
    125 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
    126                                        Atomic32 old_value,
    127                                        Atomic32 new_value) {
    128   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
    129 }
    130 
    131 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
    132   *ptr = value;
    133 }
    134 
    135 #if defined(__x86_64__)
    136 
    137 // 64-bit implementations of memory barrier can be simpler, because it
    138 // "mfence" is guaranteed to exist.
    139 inline void MemoryBarrier() {
    140   __asm__ __volatile__("mfence" : : : "memory");
    141 }
    142 
    143 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
    144   *ptr = value;
    145   MemoryBarrier();
    146 }
    147 
    148 #else
    149 
    150 inline void MemoryBarrier() {
    151   if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
    152     __asm__ __volatile__("mfence" : : : "memory");
    153   } else { // mfence is faster but not present on PIII
    154     Atomic32 x = 0;
    155     NoBarrier_AtomicExchange(&x, 0);  // acts as a barrier on PIII
    156   }
    157 }
    158 
    159 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
    160   if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
    161     *ptr = value;
    162     __asm__ __volatile__("mfence" : : : "memory");
    163   } else {
    164     NoBarrier_AtomicExchange(ptr, value);
    165                           // acts as a barrier on PIII
    166   }
    167 }
    168 #endif
    169 
    170 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
    171   ATOMICOPS_COMPILER_BARRIER();
    172   *ptr = value; // An x86 store acts as a release barrier.
    173   // See comments in Atomic64 version of Release_Store(), below.
    174 }
    175 
    176 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
    177   return *ptr;
    178 }
    179 
    180 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
    181   Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
    182   // See comments in Atomic64 version of Release_Store(), below.
    183   ATOMICOPS_COMPILER_BARRIER();
    184   return value;
    185 }
    186 
    187 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
    188   MemoryBarrier();
    189   return *ptr;
    190 }
    191 
    192 #if defined(__x86_64__)
    193 
    194 // 64-bit low-level operations on 64-bit platform.
    195 
    196 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
    197                                          Atomic64 old_value,
    198                                          Atomic64 new_value) {
    199   Atomic64 prev;
    200   __asm__ __volatile__("lock; cmpxchgq %1,%2"
    201                        : "=a" (prev)
    202                        : "q" (new_value), "m" (*ptr), "0" (old_value)
    203                        : "memory");
    204   return prev;
    205 }
    206 
    207 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
    208                                          Atomic64 new_value) {
    209   __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
    210                        : "=r" (new_value)
    211                        : "m" (*ptr), "0" (new_value)
    212                        : "memory");
    213   return new_value;  // Now it's the previous value.
    214 }
    215 
    216 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
    217                                           Atomic64 increment) {
    218   Atomic64 temp = increment;
    219   __asm__ __volatile__("lock; xaddq %0,%1"
    220                        : "+r" (temp), "+m" (*ptr)
    221                        : : "memory");
    222   // temp now contains the previous value of *ptr
    223   return temp + increment;
    224 }
    225 
    226 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
    227                                         Atomic64 increment) {
    228   Atomic64 temp = increment;
    229   __asm__ __volatile__("lock; xaddq %0,%1"
    230                        : "+r" (temp), "+m" (*ptr)
    231                        : : "memory");
    232   // temp now contains the previous value of *ptr
    233   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
    234     __asm__ __volatile__("lfence" : : : "memory");
    235   }
    236   return temp + increment;
    237 }
    238 
    239 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
    240   *ptr = value;
    241 }
    242 
    243 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
    244   *ptr = value;
    245   MemoryBarrier();
    246 }
    247 
    248 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
    249   ATOMICOPS_COMPILER_BARRIER();
    250 
    251   *ptr = value; // An x86 store acts as a release barrier
    252                 // for current AMD/Intel chips as of Jan 2008.
    253                 // See also Acquire_Load(), below.
    254 
    255   // When new chips come out, check:
    256   //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
    257   //  System Programming Guide, Chatper 7: Multiple-processor management,
    258   //  Section 7.2, Memory Ordering.
    259   // Last seen at:
    260   //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
    261   //
    262   // x86 stores/loads fail to act as barriers for a few instructions (clflush
    263   // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
    264   // not generated by the compiler, and are rare.  Users of these instructions
    265   // need to know about cache behaviour in any case since all of these involve
    266   // either flushing cache lines or non-temporal cache hints.
    267 }
    268 
    269 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
    270   return *ptr;
    271 }
    272 
    273 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
    274   Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
    275                          // for current AMD/Intel chips as of Jan 2008.
    276                          // See also Release_Store(), above.
    277   ATOMICOPS_COMPILER_BARRIER();
    278   return value;
    279 }
    280 
    281 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
    282   MemoryBarrier();
    283   return *ptr;
    284 }
    285 
    286 #else // defined(__x86_64__)
    287 
    288 // 64-bit low-level operations on 32-bit platform.
    289 
    290 #if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
    291 // For compilers older than gcc 4.1, we use inline asm.
    292 //
    293 // Potential pitfalls:
    294 //
    295 // 1. %ebx points to Global offset table (GOT) with -fPIC.
    296 //    We need to preserve this register.
    297 // 2. When explicit registers are used in inline asm, the
    298 //    compiler may not be aware of it and might try to reuse
    299 //    the same register for another argument which has constraints
    300 //    that allow it ("r" for example).
    301 
    302 inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,
    303                                             Atomic64 old_value,
    304                                             Atomic64 new_value) {
    305   Atomic64 prev;
    306   __asm__ __volatile__("push %%ebx\n\t"
    307                        "movl (%3), %%ebx\n\t"    // Move 64-bit new_value into
    308                        "movl 4(%3), %%ecx\n\t"   // ecx:ebx
    309                        "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same
    310                        "pop %%ebx\n\t"
    311                        : "=A" (prev)             // as contents of ptr:
    312                        : "D" (ptr),              //   ecx:ebx => ptr
    313                          "0" (old_value),        // else:
    314                          "S" (&new_value)        //   old *ptr => edx:eax
    315                        : "memory", "%ecx");
    316   return prev;
    317 }
    318 #endif  // Compiler < gcc-4.1
    319 
    320 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
    321                                          Atomic64 old_val,
    322                                          Atomic64 new_val) {
    323   return __sync_val_compare_and_swap(ptr, old_val, new_val);
    324 }
    325 
    326 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
    327                                          Atomic64 new_val) {
    328   Atomic64 old_val;
    329 
    330   do {
    331     old_val = *ptr;
    332   } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
    333 
    334   return old_val;
    335 }
    336 
    337 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
    338                                           Atomic64 increment) {
    339   Atomic64 old_val, new_val;
    340 
    341   do {
    342     old_val = *ptr;
    343     new_val = old_val + increment;
    344   } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
    345 
    346   return old_val + increment;
    347 }
    348 
    349 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
    350                                         Atomic64 increment) {
    351   Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment);
    352   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
    353     __asm__ __volatile__("lfence" : : : "memory");
    354   }
    355   return new_val;
    356 }
    357 
    358 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
    359   __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
    360                        "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
    361                        "emms\n\t"            // Empty mmx state/Reset FP regs
    362                        : "=m" (*ptr)
    363                        : "m" (value)
    364                        : // mark the FP stack and mmx registers as clobbered
    365 			 "st", "st(1)", "st(2)", "st(3)", "st(4)",
    366                          "st(5)", "st(6)", "st(7)", "mm0", "mm1",
    367                          "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
    368 }
    369 
    370 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
    371   NoBarrier_Store(ptr, value);
    372   MemoryBarrier();
    373 }
    374 
    375 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
    376   ATOMICOPS_COMPILER_BARRIER();
    377   NoBarrier_Store(ptr, value);
    378 }
    379 
    380 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
    381   Atomic64 value;
    382   __asm__ __volatile__("movq %1, %%mm0\n\t"  // Use mmx reg for 64-bit atomic
    383                        "movq %%mm0, %0\n\t"  // moves (ptr could be read-only)
    384                        "emms\n\t"            // Empty mmx state/Reset FP regs
    385                        : "=m" (value)
    386                        : "m" (*ptr)
    387                        : // mark the FP stack and mmx registers as clobbered
    388                          "st", "st(1)", "st(2)", "st(3)", "st(4)",
    389                          "st(5)", "st(6)", "st(7)", "mm0", "mm1",
    390                          "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
    391   return value;
    392 }
    393 
    394 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
    395   Atomic64 value = NoBarrier_Load(ptr);
    396   ATOMICOPS_COMPILER_BARRIER();
    397   return value;
    398 }
    399 
    400 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
    401   MemoryBarrier();
    402   return NoBarrier_Load(ptr);
    403 }
    404 
    405 #endif // defined(__x86_64__)
    406 
    407 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
    408                                        Atomic64 old_value,
    409                                        Atomic64 new_value) {
    410   Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
    411   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
    412     __asm__ __volatile__("lfence" : : : "memory");
    413   }
    414   return x;
    415 }
    416 
    417 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
    418                                        Atomic64 old_value,
    419                                        Atomic64 new_value) {
    420   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
    421 }
    422 
    423 } // namespace base::subtle
    424 } // namespace base
    425 
    426 #undef ATOMICOPS_COMPILER_BARRIER
    427 
    428 #endif  // BASE_ATOMICOPS_INTERNALS_X86_H_
    429