Home | History | Annotate | Download | only in parallel
      1 // -*- C++ -*-
      2 
      3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the terms
      7 // of the GNU General Public License as published by the Free Software
      8 // Foundation; either version 3, or (at your option) any later
      9 // version.
     10 
     11 // This library is distributed in the hope that it will be useful, but
     12 // WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 // General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /** @file parallel/compatibility.h
     26  *  @brief Compatibility layer, mostly concerned with atomic operations.
     27  *  This file is a GNU parallel extension to the Standard C++ Library.
     28  */
     29 
     30 // Written by Felix Putze.
     31 
     32 #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
     33 #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
     34 
     35 #include <parallel/types.h>
     36 #include <parallel/base.h>
     37 
     38 #if defined(__SUNPRO_CC) && defined(__sparc)
     39 #include <sys/atomic.h>
     40 #endif
     41 
     42 #if !defined(_WIN32) || defined (__CYGWIN__)
     43 #include <sched.h>
     44 #endif
     45 
     46 #if defined(_MSC_VER)
     47 #include <Windows.h>
     48 #include <intrin.h>
     49 #undef max
     50 #undef min
     51 #endif
     52 
     53 #ifdef __MINGW32__
     54 // Including <windows.h> will drag in all the windows32 names.  Since
     55 // that can cause user code portability problems, we just declare the
     56 // one needed function here.
     57 extern "C"
     58 __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
     59 #endif
     60 
     61 namespace __gnu_parallel
     62 {
     63 #if defined(__ICC)
     64   template<typename must_be_int = int>
     65   int32 faa32(int32* x, int32 inc)
     66   {
     67     asm volatile("lock xadd %0,%1"
     68 		 : "=r" (inc), "=m" (*x)
     69 		 : "0" (inc)
     70 		 : "memory");
     71     return inc;
     72   }
     73 #if defined(__x86_64)
     74   template<typename must_be_int = int>
     75   int64 faa64(int64* x, int64 inc)
     76   {
     77     asm volatile("lock xadd %0,%1"
     78 		 : "=r" (inc), "=m" (*x)
     79 		 : "0" (inc)
     80 		 : "memory");
     81     return inc;
     82   }
     83 #endif
     84 #endif
     85 
     86   // atomic functions only work on integers
     87 
     88   /** @brief Add a value to a variable, atomically.
     89    *
     90    *  Implementation is heavily platform-dependent.
     91    *  @param ptr Pointer to a 32-bit signed integer.
     92    *  @param addend Value to add.
     93    */
     94   inline int32
     95   fetch_and_add_32(volatile int32* ptr, int32 addend)
     96   {
     97 #if defined(__ICC)	//x86 version
     98     return _InterlockedExchangeAdd((void*)ptr, addend);
     99 #elif defined(__ECC)	//IA-64 version
    100     return _InterlockedExchangeAdd((void*)ptr, addend);
    101 #elif defined(__ICL) || defined(_MSC_VER)
    102     return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr),
    103 				   addend);
    104 #elif defined(__GNUC__)
    105     return __sync_fetch_and_add(ptr, addend);
    106 #elif defined(__SUNPRO_CC) && defined(__sparc)
    107     volatile int32 before, after;
    108     do
    109       {
    110 	before = *ptr;
    111 	after = before + addend;
    112       } while (atomic_cas_32((volatile unsigned int*)ptr, before,
    113 			     after) != before);
    114     return before;
    115 #else	//fallback, slow
    116 #pragma message("slow fetch_and_add_32")
    117     int32 res;
    118 #pragma omp critical
    119     {
    120       res = *ptr;
    121       *(ptr) += addend;
    122     }
    123     return res;
    124 #endif
    125   }
    126 
    127   /** @brief Add a value to a variable, atomically.
    128    *
    129    *  Implementation is heavily platform-dependent.
    130    *  @param ptr Pointer to a 64-bit signed integer.
    131    *  @param addend Value to add.
    132    */
    133   inline int64
    134   fetch_and_add_64(volatile int64* ptr, int64 addend)
    135   {
    136 #if defined(__ICC) && defined(__x86_64)	//x86 version
    137     return faa64<int>((int64*)ptr, addend);
    138 #elif defined(__ECC)	//IA-64 version
    139     return _InterlockedExchangeAdd64((void*)ptr, addend);
    140 #elif defined(__ICL) || defined(_MSC_VER)
    141 #ifndef _WIN64
    142     _GLIBCXX_PARALLEL_ASSERT(false);	//not available in this case
    143     return 0;
    144 #else
    145     return _InterlockedExchangeAdd64(ptr, addend);
    146 #endif
    147 #elif defined(__GNUC__) && defined(__x86_64)
    148     return __sync_fetch_and_add(ptr, addend);
    149 #elif defined(__GNUC__) && defined(__i386) &&			\
    150   (defined(__i686) || defined(__pentium4) || defined(__athlon))
    151     return __sync_fetch_and_add(ptr, addend);
    152 #elif defined(__SUNPRO_CC) && defined(__sparc)
    153     volatile int64 before, after;
    154     do
    155       {
    156 	before = *ptr;
    157 	after = before + addend;
    158       } while (atomic_cas_64((volatile unsigned long long*)ptr, before,
    159 			     after) != before);
    160     return before;
    161 #else	//fallback, slow
    162 #if defined(__GNUC__) && defined(__i386)
    163     // XXX doesn't work with -march=native
    164     //#warning "please compile with -march=i686 or better"
    165 #endif
    166 #pragma message("slow fetch_and_add_64")
    167     int64 res;
    168 #pragma omp critical
    169     {
    170       res = *ptr;
    171       *(ptr) += addend;
    172     }
    173     return res;
    174 #endif
    175   }
    176 
    177   /** @brief Add a value to a variable, atomically.
    178    *
    179    *  Implementation is heavily platform-dependent.
    180    *  @param ptr Pointer to a signed integer.
    181    *  @param addend Value to add.
    182    */
    183   template<typename T>
    184   inline T
    185   fetch_and_add(volatile T* ptr, T addend)
    186   {
    187     if (sizeof(T) == sizeof(int32))
    188       return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend);
    189     else if (sizeof(T) == sizeof(int64))
    190       return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend);
    191     else
    192       _GLIBCXX_PARALLEL_ASSERT(false);
    193   }
    194 
    195 
    196 #if defined(__ICC)
    197 
    198   template<typename must_be_int = int>
    199   inline int32
    200   cas32(volatile int32* ptr, int32 old, int32 nw)
    201   {
    202     int32 before;
    203     __asm__ __volatile__("lock; cmpxchgl %1,%2"
    204 			 : "=a"(before)
    205 			 : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
    206 			 : "memory");
    207     return before;
    208   }
    209 
    210 #if defined(__x86_64)
    211   template<typename must_be_int = int>
    212   inline int64
    213   cas64(volatile int64 *ptr, int64 old, int64 nw)
    214   {
    215     int64 before;
    216     __asm__ __volatile__("lock; cmpxchgq %1,%2"
    217 			 : "=a"(before)
    218 			 : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
    219 			 : "memory");
    220     return before;
    221   }
    222 #endif
    223 
    224 #endif
    225 
    226   /** @brief Compare @c *ptr and @c comparand. If equal, let @c
    227    * *ptr=replacement and return @c true, return @c false otherwise.
    228    *
    229    *  Implementation is heavily platform-dependent.
    230    *  @param ptr Pointer to 32-bit signed integer.
    231    *  @param comparand Compare value.
    232    *  @param replacement Replacement value.
    233    */
    234   inline bool
    235   compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement)
    236   {
    237 #if defined(__ICC)	//x86 version
    238     return _InterlockedCompareExchange((void*)ptr, replacement,
    239 				       comparand) == comparand;
    240 #elif defined(__ECC)	//IA-64 version
    241     return _InterlockedCompareExchange((void*)ptr, replacement,
    242 				       comparand) == comparand;
    243 #elif defined(__ICL) || defined(_MSC_VER)
    244     return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr),
    245 				       replacement, comparand) == comparand;
    246 #elif defined(__GNUC__)
    247     return __sync_bool_compare_and_swap(ptr, comparand, replacement);
    248 #elif defined(__SUNPRO_CC) && defined(__sparc)
    249     return atomic_cas_32((volatile unsigned int*)ptr, comparand,
    250 			 replacement) == comparand;
    251 #else
    252 #pragma message("slow compare_and_swap_32")
    253     bool res = false;
    254 #pragma omp critical
    255     {
    256       if (*ptr == comparand)
    257 	{
    258 	  *ptr = replacement;
    259 	  res = true;
    260 	}
    261     }
    262     return res;
    263 #endif
    264   }
    265 
    266   /** @brief Compare @c *ptr and @c comparand. If equal, let @c
    267    * *ptr=replacement and return @c true, return @c false otherwise.
    268    *
    269    *  Implementation is heavily platform-dependent.
    270    *  @param ptr Pointer to 64-bit signed integer.
    271    *  @param comparand Compare value.
    272    *  @param replacement Replacement value.
    273    */
    274   inline bool
    275   compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement)
    276   {
    277 #if defined(__ICC) && defined(__x86_64)	//x86 version
    278     return cas64<int>(ptr, comparand, replacement) == comparand;
    279 #elif defined(__ECC)	//IA-64 version
    280     return _InterlockedCompareExchange64((void*)ptr, replacement,
    281 					 comparand) == comparand;
    282 #elif defined(__ICL) || defined(_MSC_VER)
    283 #ifndef _WIN64
    284     _GLIBCXX_PARALLEL_ASSERT(false);	//not available in this case
    285     return 0;
    286 #else
    287     return _InterlockedCompareExchange64(ptr, replacement,
    288 					 comparand) == comparand;
    289 #endif
    290 
    291 #elif defined(__GNUC__) && defined(__x86_64)
    292     return __sync_bool_compare_and_swap(ptr, comparand, replacement);
    293 #elif defined(__GNUC__) && defined(__i386) &&			\
    294   (defined(__i686) || defined(__pentium4) || defined(__athlon))
    295     return __sync_bool_compare_and_swap(ptr, comparand, replacement);
    296 #elif defined(__SUNPRO_CC) && defined(__sparc)
    297     return atomic_cas_64((volatile unsigned long long*)ptr,
    298 			 comparand, replacement) == comparand;
    299 #else
    300 #if defined(__GNUC__) && defined(__i386)
    301     // XXX -march=native
    302     //#warning "please compile with -march=i686 or better"
    303 #endif
    304 #pragma message("slow compare_and_swap_64")
    305     bool res = false;
    306 #pragma omp critical
    307     {
    308       if (*ptr == comparand)
    309 	{
    310 	  *ptr = replacement;
    311 	  res = true;
    312 	}
    313     }
    314     return res;
    315 #endif
    316   }
    317 
    318   /** @brief Compare @c *ptr and @c comparand. If equal, let @c
    319    * *ptr=replacement and return @c true, return @c false otherwise.
    320    *
    321    *  Implementation is heavily platform-dependent.
    322    *  @param ptr Pointer to signed integer.
    323    *  @param comparand Compare value.
    324    *  @param replacement Replacement value. */
    325   template<typename T>
    326   inline bool
    327   compare_and_swap(volatile T* ptr, T comparand, T replacement)
    328   {
    329     if (sizeof(T) == sizeof(int32))
    330       return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement);
    331     else if (sizeof(T) == sizeof(int64))
    332       return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement);
    333     else
    334       _GLIBCXX_PARALLEL_ASSERT(false);
    335   }
    336 
    337   /** @brief Yield the control to another thread, without waiting for
    338       the end to the time slice. */
    339   inline void
    340   yield()
    341   {
    342 #if defined (_WIN32) && !defined (__CYGWIN__)
    343     Sleep(0);
    344 #else
    345     sched_yield();
    346 #endif
    347   }
    348 } // end namespace
    349 
    350 #endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */
    351