1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // IWYU pragma: private, include "perftools/gputools/executor/stream_executor.h" 17 18 #ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_CASTS_H_ 19 #define TENSORFLOW_STREAM_EXECUTOR_LIB_CASTS_H_ 20 21 #include <stdlib.h> 22 23 namespace perftools { 24 namespace gputools { 25 namespace port { 26 27 // port::bit_cast<Dest,Source> is a template function that implements the 28 // equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in 29 // very low-level functions like the protobuf library and fast math 30 // support. 31 // 32 // float f = 3.14159265358979; 33 // int i = port::bit_cast<int32>(f); 34 // // i = 0x40490fdb 35 // 36 // The classical address-casting method is: 37 // 38 // // WRONG 39 // float f = 3.14159265358979; // WRONG 40 // int i = * reinterpret_cast<int*>(&f); // WRONG 41 // 42 // The address-casting method actually produces undefined behavior 43 // according to ISO C++ specification section 3.10 -15 -. Roughly, this 44 // section says: if an object in memory has one type, and a program 45 // accesses it with a different type, then the result is undefined 46 // behavior for most values of "different type". 47 // 48 // This is true for any cast syntax, either *(int*)&f or 49 // *reinterpret_cast<int*>(&f). And it is particularly true for 50 // conversions between integral lvalues and floating-point lvalues. 51 // 52 // The purpose of 3.10 -15- is to allow optimizing compilers to assume 53 // that expressions with different types refer to different memory. gcc 54 // 4.0.1 has an optimizer that takes advantage of this. So a 55 // non-conforming program quietly produces wildly incorrect output. 56 // 57 // The problem is not the use of reinterpret_cast. The problem is type 58 // punning: holding an object in memory of one type and reading its bits 59 // back using a different type. 60 // 61 // The C++ standard is more subtle and complex than this, but that 62 // is the basic idea. 63 // 64 // Anyways ... 65 // 66 // port::bit_cast<> calls memcpy() which is blessed by the standard, 67 // especially by the example in section 3.9 . Also, of course, 68 // port::bit_cast<> wraps up the nasty logic in one place. 69 // 70 // Fortunately memcpy() is very fast. In optimized mode, with a 71 // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline 72 // code with the minimal amount of data movement. On a 32-bit system, 73 // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8) 74 // compiles to two loads and two stores. 75 // 76 // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1. 77 // 78 // WARNING: if Dest or Source is a non-POD type, the result of the memcpy 79 // is likely to surprise you. 80 // 81 // Props to Bill Gibbons for the compile time assertion technique and 82 // Art Komninos and Igor Tandetnik for the msvc experiments. 83 // 84 // -- mec 2005-10-17 85 86 template <class Dest, class Source> 87 inline Dest bit_cast(const Source& source) { 88 // Compile time assertion: sizeof(Dest) == sizeof(Source) 89 // A compile error here means your Dest and Source have different sizes. 90 static_assert(sizeof(Dest) == sizeof(Source), 91 "src and dst types must have equal sizes"); 92 93 Dest dest; 94 memcpy(&dest, &source, sizeof(dest)); 95 return dest; 96 } 97 98 } // namespace port 99 } // namespace gputools 100 } // namespace perftools 101 102 #endif // TENSORFLOW_STREAM_EXECUTOR_LIB_CASTS_H_ 103