Home | History | Annotate | Download | only in base
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This file is an internal atomic implementation, use base/atomicops.h instead.
      6 //
      7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
      8 
      9 #ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
     10 #define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
     11 
     12 namespace base {
     13 namespace subtle {
     14 
     15 // Memory barriers on ARM are funky, but the kernel is here to help:
     16 //
     17 // * ARMv5 didn't support SMP, there is no memory barrier instruction at
     18 //   all on this architecture, or when targeting its machine code.
     19 //
     20 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
     21 //   writing a random value to a very specific coprocessor register.
     22 //
     23 // * On ARMv7, the "dmb" instruction is used to perform a full memory
     24 //   barrier (though writing to the co-processor will still work).
     25 //   However, on single core devices (e.g. Nexus One, or Nexus S),
     26 //   this instruction will take up to 200 ns, which is huge, even though
     27 //   it's completely un-needed on these devices.
     28 //
     29 // * There is no easy way to determine at runtime if the device is
     30 //   single or multi-core. However, the kernel provides a useful helper
     31 //   function at a fixed memory address (0xffff0fa0), which will always
     32 //   perform a memory barrier in the most efficient way. I.e. on single
     33 //   core devices, this is an empty function that exits immediately.
     34 //   On multi-core devices, it implements a full memory barrier.
     35 //
     36 // * This source could be compiled to ARMv5 machine code that runs on a
     37 //   multi-core ARMv6 or ARMv7 device. In this case, memory barriers
     38 //   are needed for correct execution. Always call the kernel helper, even
     39 //   when targeting ARMv5TE.
     40 //
     41 
     42 inline void MemoryBarrier() {
     43   // Note: This is a function call, which is also an implicit compiler
     44   // barrier.
     45   typedef void (*KernelMemoryBarrierFunc)();
     46   ((KernelMemoryBarrierFunc)0xffff0fa0)();
     47 }
     48 
     49 // An ARM toolchain would only define one of these depending on which
     50 // variant of the target architecture is being used. This tests against
     51 // any known ARMv6 or ARMv7 variant, where it is possible to directly
     52 // use ldrex/strex instructions to implement fast atomic operations.
     53 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
     54     defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
     55     defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
     56     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
     57     defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__)
     58 
     59 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
     60                                          Atomic32 old_value,
     61                                          Atomic32 new_value) {
     62   Atomic32 prev_value;
     63   int reloop;
     64   do {
     65     // The following is equivalent to:
     66     //
     67     //   prev_value = LDREX(ptr)
     68     //   reloop = 0
     69     //   if (prev_value != old_value)
     70     //      reloop = STREX(ptr, new_value)
     71     __asm__ __volatile__("    ldrex %0, [%3]\n"
     72                          "    mov %1, #0\n"
     73                          "    cmp %0, %4\n"
     74 #ifdef __thumb2__
     75                          "    it eq\n"
     76 #endif
     77                          "    strexeq %1, %5, [%3]\n"
     78                          : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
     79                          : "r"(ptr), "r"(old_value), "r"(new_value)
     80                          : "cc", "memory");
     81   } while (reloop != 0);
     82   return prev_value;
     83 }
     84 
     85 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
     86                                        Atomic32 old_value,
     87                                        Atomic32 new_value) {
     88   Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
     89   MemoryBarrier();
     90   return result;
     91 }
     92 
     93 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
     94                                        Atomic32 old_value,
     95                                        Atomic32 new_value) {
     96   MemoryBarrier();
     97   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
     98 }
     99 
    100 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
    101                                           Atomic32 increment) {
    102   Atomic32 value;
    103   int reloop;
    104   do {
    105     // Equivalent to:
    106     //
    107     //  value = LDREX(ptr)
    108     //  value += increment
    109     //  reloop = STREX(ptr, value)
    110     //
    111     __asm__ __volatile__("    ldrex %0, [%3]\n"
    112                          "    add %0, %0, %4\n"
    113                          "    strex %1, %0, [%3]\n"
    114                          : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
    115                          : "r"(ptr), "r"(increment)
    116                          : "cc", "memory");
    117   } while (reloop);
    118   return value;
    119 }
    120 
    121 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
    122                                         Atomic32 increment) {
    123   // TODO(digit): Investigate if it's possible to implement this with
    124   // a single MemoryBarrier() operation between the LDREX and STREX.
    125   // See http://crbug.com/246514
    126   MemoryBarrier();
    127   Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
    128   MemoryBarrier();
    129   return result;
    130 }
    131 
    132 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
    133                                          Atomic32 new_value) {
    134   Atomic32 old_value;
    135   int reloop;
    136   do {
    137     // old_value = LDREX(ptr)
    138     // reloop = STREX(ptr, new_value)
    139     __asm__ __volatile__("   ldrex %0, [%3]\n"
    140                          "   strex %1, %4, [%3]\n"
    141                          : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
    142                          : "r"(ptr), "r"(new_value)
    143                          : "cc", "memory");
    144   } while (reloop != 0);
    145   return old_value;
    146 }
    147 
    148 // This tests against any known ARMv5 variant.
    149 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
    150       defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
    151 
    152 // The kernel also provides a helper function to perform an atomic
    153 // compare-and-swap operation at the hard-wired address 0xffff0fc0.
    154 // On ARMv5, this is implemented by a special code path that the kernel
    155 // detects and treats specially when thread pre-emption happens.
    156 // On ARMv6 and higher, it uses LDREX/STREX instructions instead.
    157 //
    158 // Note that this always perform a full memory barrier, there is no
    159 // need to add calls MemoryBarrier() before or after it. It also
    160 // returns 0 on success, and 1 on exit.
    161 //
    162 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS
    163 // use newer kernel revisions, so this should not be a concern.
    164 namespace {
    165 
    166 inline int LinuxKernelCmpxchg(Atomic32 old_value,
    167                               Atomic32 new_value,
    168                               volatile Atomic32* ptr) {
    169   typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*);
    170   return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
    171 }
    172 
    173 }  // namespace
    174 
    175 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
    176                                          Atomic32 old_value,
    177                                          Atomic32 new_value) {
    178   Atomic32 prev_value;
    179   for (;;) {
    180     prev_value = *ptr;
    181     if (prev_value != old_value)
    182       return prev_value;
    183     if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
    184       return old_value;
    185   }
    186 }
    187 
    188 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
    189                                          Atomic32 new_value) {
    190   Atomic32 old_value;
    191   do {
    192     old_value = *ptr;
    193   } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
    194   return old_value;
    195 }
    196 
    197 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
    198                                           Atomic32 increment) {
    199   return Barrier_AtomicIncrement(ptr, increment);
    200 }
    201 
    202 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
    203                                         Atomic32 increment) {
    204   for (;;) {
    205     // Atomic exchange the old value with an incremented one.
    206     Atomic32 old_value = *ptr;
    207     Atomic32 new_value = old_value + increment;
    208     if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
    209       // The exchange took place as expected.
    210       return new_value;
    211     }
    212     // Otherwise, *ptr changed mid-loop and we need to retry.
    213   }
    214 }
    215 
    216 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
    217                                        Atomic32 old_value,
    218                                        Atomic32 new_value) {
    219   Atomic32 prev_value;
    220   for (;;) {
    221     prev_value = *ptr;
    222     if (prev_value != old_value) {
    223       // Always ensure acquire semantics.
    224       MemoryBarrier();
    225       return prev_value;
    226     }
    227     if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
    228       return old_value;
    229   }
    230 }
    231 
    232 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
    233                                        Atomic32 old_value,
    234                                        Atomic32 new_value) {
    235   // This could be implemented as:
    236   //    MemoryBarrier();
    237   //    return NoBarrier_CompareAndSwap();
    238   //
    239   // But would use 3 barriers per succesful CAS. To save performance,
    240   // use Acquire_CompareAndSwap(). Its implementation guarantees that:
    241   // - A succesful swap uses only 2 barriers (in the kernel helper).
    242   // - An early return due to (prev_value != old_value) performs
    243   //   a memory barrier with no store, which is equivalent to the
    244   //   generic implementation above.
    245   return Acquire_CompareAndSwap(ptr, old_value, new_value);
    246 }
    247 
    248 #else
    249 #  error "Your CPU's ARM architecture is not supported yet"
    250 #endif
    251 
    252 // NOTE: Atomicity of the following load and store operations is only
    253 // guaranteed in case of 32-bit alignement of |ptr| values.
    254 
    255 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
    256   *ptr = value;
    257 }
    258 
    259 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
    260   *ptr = value;
    261   MemoryBarrier();
    262 }
    263 
    264 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
    265   MemoryBarrier();
    266   *ptr = value;
    267 }
    268 
    269 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
    270 
    271 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
    272   Atomic32 value = *ptr;
    273   MemoryBarrier();
    274   return value;
    275 }
    276 
    277 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
    278   MemoryBarrier();
    279   return *ptr;
    280 }
    281 
    282 }  // namespace base::subtle
    283 }  // namespace base
    284 
    285 #endif  // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
    286