1 /* Copyright (c) 2006, Google Inc. 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * --- 31 * Author: Sanjay Ghemawat 32 */ 33 34 // Implementation of atomic operations using Windows API 35 // functions. This file should not be included directly. Clients 36 // should instead include "base/atomicops.h". 37 38 #ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ 39 #define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ 40 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include "base/abort.h" 44 #include "base/basictypes.h" // For COMPILE_ASSERT 45 46 typedef int32 Atomic32; 47 48 #if defined(_WIN64) 49 #define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* 50 #endif 51 52 namespace base { 53 namespace subtle { 54 55 typedef int64 Atomic64; 56 57 // 32-bit low-level operations on any platform 58 59 extern "C" { 60 // We use windows intrinsics when we can (they seem to be supported 61 // well on MSVC 8.0 and above). Unfortunately, in some 62 // environments, <windows.h> and <intrin.h> have conflicting 63 // declarations of some other intrinsics, breaking compilation: 64 // http://connect.microsoft.com/VisualStudio/feedback/details/262047 65 // Therefore, we simply declare the relevant intrinsics ourself. 66 67 // MinGW has a bug in the header files where it doesn't indicate the 68 // first argument is volatile -- they're not up to date. See 69 // http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html 70 // We have to const_cast away the volatile to avoid compiler warnings. 71 // TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h 72 #if defined(__MINGW32__) 73 inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, 74 LONG newval, LONG oldval) { 75 return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval); 76 } 77 inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { 78 return ::InterlockedExchange(const_cast<LONG*>(ptr), newval); 79 } 80 inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { 81 return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment); 82 } 83 84 #elif _MSC_VER >= 1400 // intrinsics didn't work so well before MSVC 8.0 85 // Unfortunately, in some environments, <windows.h> and <intrin.h> 86 // have conflicting declarations of some intrinsics, breaking 87 // compilation. So we declare the intrinsics we need ourselves. See 88 // http://connect.microsoft.com/VisualStudio/feedback/details/262047 89 LONG _InterlockedCompareExchange(volatile LONG* ptr, LONG newval, LONG oldval); 90 #pragma intrinsic(_InterlockedCompareExchange) 91 inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, 92 LONG newval, LONG oldval) { 93 return _InterlockedCompareExchange(ptr, newval, oldval); 94 } 95 96 LONG _InterlockedExchange(volatile LONG* ptr, LONG newval); 97 #pragma intrinsic(_InterlockedExchange) 98 inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { 99 return _InterlockedExchange(ptr, newval); 100 } 101 102 LONG _InterlockedExchangeAdd(volatile LONG* ptr, LONG increment); 103 #pragma intrinsic(_InterlockedExchangeAdd) 104 inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { 105 return _InterlockedExchangeAdd(ptr, increment); 106 } 107 108 #else 109 inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, 110 LONG newval, LONG oldval) { 111 return ::InterlockedCompareExchange(ptr, newval, oldval); 112 } 113 inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { 114 return ::InterlockedExchange(ptr, newval); 115 } 116 inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { 117 return ::InterlockedExchangeAdd(ptr, increment); 118 } 119 120 #endif // ifdef __MINGW32__ 121 } // extern "C" 122 123 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 124 Atomic32 old_value, 125 Atomic32 new_value) { 126 LONG result = FastInterlockedCompareExchange( 127 reinterpret_cast<volatile LONG*>(ptr), 128 static_cast<LONG>(new_value), 129 static_cast<LONG>(old_value)); 130 return static_cast<Atomic32>(result); 131 } 132 133 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 134 Atomic32 new_value) { 135 LONG result = FastInterlockedExchange( 136 reinterpret_cast<volatile LONG*>(ptr), 137 static_cast<LONG>(new_value)); 138 return static_cast<Atomic32>(result); 139 } 140 141 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 142 Atomic32 increment) { 143 return FastInterlockedExchangeAdd( 144 reinterpret_cast<volatile LONG*>(ptr), 145 static_cast<LONG>(increment)) + increment; 146 } 147 148 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 149 Atomic32 increment) { 150 return Barrier_AtomicIncrement(ptr, increment); 151 } 152 153 } // namespace base::subtle 154 } // namespace base 155 156 157 // In msvc8/vs2005, winnt.h already contains a definition for 158 // MemoryBarrier in the global namespace. Add it there for earlier 159 // versions and forward to it from within the namespace. 160 #if !(defined(_MSC_VER) && _MSC_VER >= 1400) 161 inline void MemoryBarrier() { 162 Atomic32 value = 0; 163 base::subtle::NoBarrier_AtomicExchange(&value, 0); 164 // actually acts as a barrier in thisd implementation 165 } 166 #endif 167 168 namespace base { 169 namespace subtle { 170 171 inline void MemoryBarrier() { 172 ::MemoryBarrier(); 173 } 174 175 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 176 Atomic32 old_value, 177 Atomic32 new_value) { 178 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 179 } 180 181 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 182 Atomic32 old_value, 183 Atomic32 new_value) { 184 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 185 } 186 187 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { 188 *ptr = value; 189 } 190 191 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 192 NoBarrier_AtomicExchange(ptr, value); 193 // acts as a barrier in this implementation 194 } 195 196 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { 197 *ptr = value; // works w/o barrier for current Intel chips as of June 2005 198 // See comments in Atomic64 version of Release_Store() below. 199 } 200 201 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { 202 return *ptr; 203 } 204 205 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { 206 Atomic32 value = *ptr; 207 return value; 208 } 209 210 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { 211 MemoryBarrier(); 212 return *ptr; 213 } 214 215 // 64-bit operations 216 217 #if defined(_WIN64) || defined(__MINGW64__) 218 219 // 64-bit low-level operations on 64-bit platform. 220 221 COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic); 222 223 // These are the intrinsics needed for 64-bit operations. Similar to the 224 // 32-bit case above. 225 226 extern "C" { 227 #if defined(__MINGW64__) 228 inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, 229 PVOID newval, PVOID oldval) { 230 return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), 231 newval, oldval); 232 } 233 inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { 234 return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); 235 } 236 inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, 237 LONGLONG increment) { 238 return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); 239 } 240 241 #elif _MSC_VER >= 1400 // intrinsics didn't work so well before MSVC 8.0 242 // Like above, we need to declare the intrinsics ourselves. 243 PVOID _InterlockedCompareExchangePointer(volatile PVOID* ptr, 244 PVOID newval, PVOID oldval); 245 #pragma intrinsic(_InterlockedCompareExchangePointer) 246 inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, 247 PVOID newval, PVOID oldval) { 248 return _InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), 249 newval, oldval); 250 } 251 252 PVOID _InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval); 253 #pragma intrinsic(_InterlockedExchangePointer) 254 inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { 255 return _InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); 256 } 257 258 LONGLONG _InterlockedExchangeAdd64(volatile LONGLONG* ptr, LONGLONG increment); 259 #pragma intrinsic(_InterlockedExchangeAdd64) 260 inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, 261 LONGLONG increment) { 262 return _InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); 263 } 264 265 #else 266 inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, 267 PVOID newval, PVOID oldval) { 268 return ::InterlockedCompareExchangePointer(ptr, newval, oldval); 269 } 270 inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { 271 return ::InterlockedExchangePointer(ptr, newval); 272 } 273 inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, 274 LONGLONG increment) { 275 return ::InterlockedExchangeAdd64(ptr, increment); 276 } 277 278 #endif // ifdef __MINGW64__ 279 } // extern "C" 280 281 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 282 Atomic64 old_value, 283 Atomic64 new_value) { 284 PVOID result = FastInterlockedCompareExchangePointer( 285 reinterpret_cast<volatile PVOID*>(ptr), 286 reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value)); 287 return reinterpret_cast<Atomic64>(result); 288 } 289 290 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 291 Atomic64 new_value) { 292 PVOID result = FastInterlockedExchangePointer( 293 reinterpret_cast<volatile PVOID*>(ptr), 294 reinterpret_cast<PVOID>(new_value)); 295 return reinterpret_cast<Atomic64>(result); 296 } 297 298 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 299 Atomic64 increment) { 300 return FastInterlockedExchangeAdd64( 301 reinterpret_cast<volatile LONGLONG*>(ptr), 302 static_cast<LONGLONG>(increment)) + increment; 303 } 304 305 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 306 Atomic64 increment) { 307 return Barrier_AtomicIncrement(ptr, increment); 308 } 309 310 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 311 *ptr = value; 312 } 313 314 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 315 NoBarrier_AtomicExchange(ptr, value); 316 // acts as a barrier in this implementation 317 } 318 319 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 320 *ptr = value; // works w/o barrier for current Intel chips as of June 2005 321 322 // When new chips come out, check: 323 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: 324 // System Programming Guide, Chatper 7: Multiple-processor management, 325 // Section 7.2, Memory Ordering. 326 // Last seen at: 327 // http://developer.intel.com/design/pentium4/manuals/index_new.htm 328 } 329 330 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 331 return *ptr; 332 } 333 334 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 335 Atomic64 value = *ptr; 336 return value; 337 } 338 339 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 340 MemoryBarrier(); 341 return *ptr; 342 } 343 344 #else // defined(_WIN64) || defined(__MINGW64__) 345 346 // 64-bit low-level operations on 32-bit platform 347 348 // TODO(vchen): The GNU assembly below must be converted to MSVC inline 349 // assembly. Then the file should be renamed to ...-x86-msvc.h, probably. 350 351 inline void NotImplementedFatalError(const char *function_name) { 352 fprintf(stderr, "64-bit %s() not implemented on this platform\n", 353 function_name); 354 tcmalloc::Abort(); 355 } 356 357 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 358 Atomic64 old_value, 359 Atomic64 new_value) { 360 #if 0 // Not implemented 361 Atomic64 prev; 362 __asm__ __volatile__("movl (%3), %%ebx\n\t" // Move 64-bit new_value into 363 "movl 4(%3), %%ecx\n\t" // ecx:ebx 364 "lock; cmpxchg8b %1\n\t" // If edx:eax (old_value) same 365 : "=A" (prev) // as contents of ptr: 366 : "m" (*ptr), // ecx:ebx => ptr 367 "0" (old_value), // else: 368 "r" (&new_value) // old *ptr => edx:eax 369 : "memory", "%ebx", "%ecx"); 370 return prev; 371 #else 372 NotImplementedFatalError("NoBarrier_CompareAndSwap"); 373 return 0; 374 #endif 375 } 376 377 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 378 Atomic64 new_value) { 379 #if 0 // Not implemented 380 __asm__ __volatile__( 381 "movl (%2), %%ebx\n\t" // Move 64-bit new_value into 382 "movl 4(%2), %%ecx\n\t" // ecx:ebx 383 "0:\n\t" 384 "movl %1, %%eax\n\t" // Read contents of ptr into 385 "movl 4%1, %%edx\n\t" // edx:eax 386 "lock; cmpxchg8b %1\n\t" // Attempt cmpxchg; if *ptr 387 "jnz 0b\n\t" // is no longer edx:eax, loop 388 : "=A" (new_value) 389 : "m" (*ptr), 390 "r" (&new_value) 391 : "memory", "%ebx", "%ecx"); 392 return new_value; // Now it's the previous value. 393 #else 394 NotImplementedFatalError("NoBarrier_AtomicExchange"); 395 return 0; 396 #endif 397 } 398 399 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 400 Atomic64 increment) { 401 #if 0 // Not implemented 402 Atomic64 temp = increment; 403 __asm__ __volatile__( 404 "0:\n\t" 405 "movl (%3), %%ebx\n\t" // Move 64-bit increment into 406 "movl 4(%3), %%ecx\n\t" // ecx:ebx 407 "movl (%2), %%eax\n\t" // Read contents of ptr into 408 "movl 4(%2), %%edx\n\t" // edx:eax 409 "add %%eax, %%ebx\n\t" // sum => ecx:ebx 410 "adc %%edx, %%ecx\n\t" // edx:eax still has old *ptr 411 "lock; cmpxchg8b (%2)\n\t"// Attempt cmpxchg; if *ptr 412 "jnz 0b\n\t" // is no longer edx:eax, loop 413 : "=A"(temp), "+m"(*ptr) 414 : "D" (ptr), "S" (&increment) 415 : "memory", "%ebx", "%ecx"); 416 // temp now contains the previous value of *ptr 417 return temp + increment; 418 #else 419 NotImplementedFatalError("NoBarrier_AtomicIncrement"); 420 return 0; 421 #endif 422 } 423 424 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 425 Atomic64 increment) { 426 #if 0 // Not implemented 427 Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); 428 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 429 __asm__ __volatile__("lfence" : : : "memory"); 430 } 431 return new_val; 432 #else 433 NotImplementedFatalError("Barrier_AtomicIncrement"); 434 return 0; 435 #endif 436 } 437 438 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 439 #if 0 // Not implemented 440 __asm { 441 mov mm0, value; // Use mmx reg for 64-bit atomic moves 442 mov ptr, mm0; 443 emms; // Empty mmx state to enable FP registers 444 } 445 #else 446 NotImplementedFatalError("NoBarrier_Store"); 447 #endif 448 } 449 450 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 451 NoBarrier_AtomicExchange(ptr, value); 452 // acts as a barrier in this implementation 453 } 454 455 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 456 NoBarrier_Store(ptr, value); 457 } 458 459 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 460 #if 0 // Not implemented 461 Atomic64 value; 462 __asm { 463 mov mm0, ptr; // Use mmx reg for 64-bit atomic moves 464 mov value, mm0; 465 emms; // Empty mmx state to enable FP registers 466 } 467 return value; 468 #else 469 NotImplementedFatalError("NoBarrier_Store"); 470 return 0; 471 #endif 472 } 473 474 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 475 Atomic64 value = NoBarrier_Load(ptr); 476 return value; 477 } 478 479 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 480 MemoryBarrier(); 481 return NoBarrier_Load(ptr); 482 } 483 484 #endif // defined(_WIN64) || defined(__MINGW64__) 485 486 487 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, 488 Atomic64 old_value, 489 Atomic64 new_value) { 490 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 491 } 492 493 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, 494 Atomic64 old_value, 495 Atomic64 new_value) { 496 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 497 } 498 499 } // namespace base::subtle 500 } // namespace base 501 502 #endif // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ 503