1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/platform/denormal.h" 17 #include "third_party/eigen3/Eigen/Core" 18 #include "tensorflow/core/platform/cpu_info.h" 19 #include "tensorflow/core/platform/logging.h" 20 #include "tensorflow/core/platform/platform.h" 21 // If we're on gcc 4.8 or older, there's a known bug that prevents the use of 22 // intrinsics when the architecture is not defined in the flags. See 23 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202 24 #if !defined(__SSE3__) && !defined(__clang__) && \ 25 (defined(__GNUC__) && (__GNUC__ < 4) || \ 26 ((__GNUC__ == 4) && (__GNUC_MINOR__ < 9))) 27 #define GCC_WITHOUT_INTRINSICS 28 #endif 29 // Only try to use SSE3 instructions if we're on an x86 platform, and it's not 30 // mobile, and we're not on a known bad gcc version. 31 #if defined(PLATFORM_IS_X86) && !defined(IS_MOBILE_PLATFORM) && \ 32 !defined(GCC_WITHOUT_INTRINSICS) 33 #define DENORM_USE_INTRINSICS 34 #endif 35 36 #ifdef DENORM_USE_INTRINSICS 37 #include <pmmintrin.h> 38 #endif 39 40 namespace tensorflow { 41 namespace port { 42 43 static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) { 44 // For now, we flush denormals only on SSE 3. Other architectures such as ARM 45 // can be added as needed. 46 47 #ifdef DENORM_USE_INTRINSICS 48 if (TestCPUFeature(SSE3)) { 49 // Restore flags 50 _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode ? _MM_FLUSH_ZERO_ON 51 : _MM_FLUSH_ZERO_OFF); 52 _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode ? _MM_DENORMALS_ZERO_ON 53 : _MM_DENORMALS_ZERO_OFF); 54 } 55 #endif 56 } 57 58 static std::pair<bool, bool> GetDernormalState() { 59 // For now, we flush denormals only on SSE 3. Other architectures such as ARM 60 // can be added as needed. 61 62 #ifdef DENORM_USE_INTRINSICS 63 if (TestCPUFeature(SSE3)) { 64 // Save existing flags 65 bool flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON; 66 bool denormals_zero_mode = 67 _MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON; 68 return {flush_zero_mode, denormals_zero_mode}; 69 } 70 #endif 71 return {false, false}; 72 } 73 74 ScopedRestoreFlushDenormalState::ScopedRestoreFlushDenormalState() { 75 std::tie(flush_zero_mode_, denormals_zero_mode_) = GetDernormalState(); 76 } 77 78 ScopedRestoreFlushDenormalState::~ScopedRestoreFlushDenormalState() { 79 SetDenormalState(flush_zero_mode_, denormals_zero_mode_); 80 } 81 82 ScopedFlushDenormal::ScopedFlushDenormal() { 83 SetDenormalState(/*flush_zero_mode=*/true, /*denormals_zero_mode=*/true); 84 } 85 86 ScopedDontFlushDenormal::ScopedDontFlushDenormal() { 87 SetDenormalState(/*flush_zero_mode=*/false, /*denormals_zero_mode=*/false); 88 } 89 90 } // namespace port 91 } // namespace tensorflow 92