Home | History | Annotate | Download | only in base
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This file is an internal atomic implementation, use base/atomicops.h instead.
      6 //
      7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
      8 
      9 #ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
     10 #define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
     11 
     12 #if defined(OS_QNX)
     13 #include <sys/cpuinline.h>
     14 #endif
     15 
     16 namespace base {
     17 namespace subtle {
     18 
     19 // Memory barriers on ARM are funky, but the kernel is here to help:
     20 //
     21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at
     22 //   all on this architecture, or when targeting its machine code.
     23 //
     24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
     25 //   writing a random value to a very specific coprocessor register.
     26 //
     27 // * On ARMv7, the "dmb" instruction is used to perform a full memory
     28 //   barrier (though writing to the co-processor will still work).
     29 //   However, on single core devices (e.g. Nexus One, or Nexus S),
     30 //   this instruction will take up to 200 ns, which is huge, even though
     31 //   it's completely un-needed on these devices.
     32 //
     33 // * There is no easy way to determine at runtime if the device is
     34 //   single or multi-core. However, the kernel provides a useful helper
     35 //   function at a fixed memory address (0xffff0fa0), which will always
     36 //   perform a memory barrier in the most efficient way. I.e. on single
     37 //   core devices, this is an empty function that exits immediately.
     38 //   On multi-core devices, it implements a full memory barrier.
     39 //
     40 // * This source could be compiled to ARMv5 machine code that runs on a
     41 //   multi-core ARMv6 or ARMv7 device. In this case, memory barriers
     42 //   are needed for correct execution. Always call the kernel helper, even
     43 //   when targeting ARMv5TE.
     44 //
     45 
     46 inline void MemoryBarrier() {
     47 #if defined(OS_LINUX) || defined(OS_ANDROID)
     48   // Note: This is a function call, which is also an implicit compiler barrier.
     49   typedef void (*KernelMemoryBarrierFunc)();
     50   ((KernelMemoryBarrierFunc)0xffff0fa0)();
     51 #elif defined(OS_QNX)
     52   __cpu_membarrier();
     53 #else
     54 #error MemoryBarrier() is not implemented on this platform.
     55 #endif
     56 }
     57 
     58 // An ARM toolchain would only define one of these depending on which
     59 // variant of the target architecture is being used. This tests against
     60 // any known ARMv6 or ARMv7 variant, where it is possible to directly
     61 // use ldrex/strex instructions to implement fast atomic operations.
     62 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
     63     defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
     64     defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
     65     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
     66     defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)
     67 
     68 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
     69                                          Atomic32 old_value,
     70                                          Atomic32 new_value) {
     71   Atomic32 prev_value;
     72   int reloop;
     73   do {
     74     // The following is equivalent to:
     75     //
     76     //   prev_value = LDREX(ptr)
     77     //   reloop = 0
     78     //   if (prev_value != old_value)
     79     //      reloop = STREX(ptr, new_value)
     80     __asm__ __volatile__("    ldrex %0, [%3]\n"
     81                          "    mov %1, #0\n"
     82                          "    cmp %0, %4\n"
     83 #ifdef __thumb2__
     84                          "    it eq\n"
     85 #endif
     86                          "    strexeq %1, %5, [%3]\n"
     87                          : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
     88                          : "r"(ptr), "r"(old_value), "r"(new_value)
     89                          : "cc", "memory");
     90   } while (reloop != 0);
     91   return prev_value;
     92 }
     93 
     94 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
     95                                        Atomic32 old_value,
     96                                        Atomic32 new_value) {
     97   Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
     98   MemoryBarrier();
     99   return result;
    100 }
    101 
    102 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
    103                                        Atomic32 old_value,
    104                                        Atomic32 new_value) {
    105   MemoryBarrier();
    106   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
    107 }
    108 
    109 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
    110                                           Atomic32 increment) {
    111   Atomic32 value;
    112   int reloop;
    113   do {
    114     // Equivalent to:
    115     //
    116     //  value = LDREX(ptr)
    117     //  value += increment
    118     //  reloop = STREX(ptr, value)
    119     //
    120     __asm__ __volatile__("    ldrex %0, [%3]\n"
    121                          "    add %0, %0, %4\n"
    122                          "    strex %1, %0, [%3]\n"
    123                          : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
    124                          : "r"(ptr), "r"(increment)
    125                          : "cc", "memory");
    126   } while (reloop);
    127   return value;
    128 }
    129 
    130 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
    131                                         Atomic32 increment) {
    132   // TODO(digit): Investigate if it's possible to implement this with
    133   // a single MemoryBarrier() operation between the LDREX and STREX.
    134   // See http://crbug.com/246514
    135   MemoryBarrier();
    136   Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
    137   MemoryBarrier();
    138   return result;
    139 }
    140 
    141 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
    142                                          Atomic32 new_value) {
    143   Atomic32 old_value;
    144   int reloop;
    145   do {
    146     // old_value = LDREX(ptr)
    147     // reloop = STREX(ptr, new_value)
    148     __asm__ __volatile__("   ldrex %0, [%3]\n"
    149                          "   strex %1, %4, [%3]\n"
    150                          : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
    151                          : "r"(ptr), "r"(new_value)
    152                          : "cc", "memory");
    153   } while (reloop != 0);
    154   return old_value;
    155 }
    156 
    157 // This tests against any known ARMv5 variant.
    158 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
    159       defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
    160 
    161 // The kernel also provides a helper function to perform an atomic
    162 // compare-and-swap operation at the hard-wired address 0xffff0fc0.
    163 // On ARMv5, this is implemented by a special code path that the kernel
    164 // detects and treats specially when thread pre-emption happens.
    165 // On ARMv6 and higher, it uses LDREX/STREX instructions instead.
    166 //
    167 // Note that this always perform a full memory barrier, there is no
    168 // need to add calls MemoryBarrier() before or after it. It also
    169 // returns 0 on success, and 1 on exit.
    170 //
    171 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS
    172 // use newer kernel revisions, so this should not be a concern.
    173 namespace {
    174 
    175 inline int LinuxKernelCmpxchg(Atomic32 old_value,
    176                               Atomic32 new_value,
    177                               volatile Atomic32* ptr) {
    178   typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*);
    179   return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
    180 }
    181 
    182 }  // namespace
    183 
    184 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
    185                                          Atomic32 old_value,
    186                                          Atomic32 new_value) {
    187   Atomic32 prev_value;
    188   for (;;) {
    189     prev_value = *ptr;
    190     if (prev_value != old_value)
    191       return prev_value;
    192     if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
    193       return old_value;
    194   }
    195 }
    196 
    197 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
    198                                          Atomic32 new_value) {
    199   Atomic32 old_value;
    200   do {
    201     old_value = *ptr;
    202   } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
    203   return old_value;
    204 }
    205 
    206 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
    207                                           Atomic32 increment) {
    208   return Barrier_AtomicIncrement(ptr, increment);
    209 }
    210 
    211 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
    212                                         Atomic32 increment) {
    213   for (;;) {
    214     // Atomic exchange the old value with an incremented one.
    215     Atomic32 old_value = *ptr;
    216     Atomic32 new_value = old_value + increment;
    217     if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
    218       // The exchange took place as expected.
    219       return new_value;
    220     }
    221     // Otherwise, *ptr changed mid-loop and we need to retry.
    222   }
    223 }
    224 
    225 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
    226                                        Atomic32 old_value,
    227                                        Atomic32 new_value) {
    228   Atomic32 prev_value;
    229   for (;;) {
    230     prev_value = *ptr;
    231     if (prev_value != old_value) {
    232       // Always ensure acquire semantics.
    233       MemoryBarrier();
    234       return prev_value;
    235     }
    236     if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
    237       return old_value;
    238   }
    239 }
    240 
    241 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
    242                                        Atomic32 old_value,
    243                                        Atomic32 new_value) {
    244   // This could be implemented as:
    245   //    MemoryBarrier();
    246   //    return NoBarrier_CompareAndSwap();
    247   //
    248   // But would use 3 barriers per succesful CAS. To save performance,
    249   // use Acquire_CompareAndSwap(). Its implementation guarantees that:
    250   // - A succesful swap uses only 2 barriers (in the kernel helper).
    251   // - An early return due to (prev_value != old_value) performs
    252   //   a memory barrier with no store, which is equivalent to the
    253   //   generic implementation above.
    254   return Acquire_CompareAndSwap(ptr, old_value, new_value);
    255 }
    256 
    257 #else
    258 #  error "Your CPU's ARM architecture is not supported yet"
    259 #endif
    260 
    261 // NOTE: Atomicity of the following load and store operations is only
    262 // guaranteed in case of 32-bit alignement of |ptr| values.
    263 
    264 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
    265   *ptr = value;
    266 }
    267 
    268 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
    269   *ptr = value;
    270   MemoryBarrier();
    271 }
    272 
    273 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
    274   MemoryBarrier();
    275   *ptr = value;
    276 }
    277 
    278 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
    279 
    280 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
    281   Atomic32 value = *ptr;
    282   MemoryBarrier();
    283   return value;
    284 }
    285 
    286 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
    287   MemoryBarrier();
    288   return *ptr;
    289 }
    290 
    291 }  // namespace base::subtle
    292 }  // namespace base
    293 
    294 #endif  // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
    295