1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_COMMON_FLOAT16_H_ 18 #define LIBTEXTCLASSIFIER_COMMON_FLOAT16_H_ 19 20 #include "util/base/casts.h" 21 #include "util/base/integral_types.h" 22 23 namespace libtextclassifier { 24 namespace nlp_core { 25 26 // 16 bit encoding of a float. NOTE: can't be used directly for computation: 27 // one first needs to convert it to a normal float, using Float16To32. 28 // 29 // Documentation copied from original file: 30 // 31 // Compact 16-bit encoding of floating point numbers. This 32 // representation uses 1 bit for the sign, 8 bits for the exponent and 33 // 7 bits for the mantissa. It is assumed that floats are in IEEE 754 34 // format so a float16 is just bits 16-31 of a single precision float. 35 // 36 // NOTE: The IEEE floating point standard defines a float16 format that 37 // is different than this format (it has fewer bits of exponent and more 38 // bits of mantissa). We don't use that format here because conversion 39 // to/from 32-bit floats is more complex for that format, and the 40 // conversion for this format is very simple. 41 // 42 // <---------float16------------> 43 // s e e e e e e e e f f f f f f f f f f f f f f f f f f f f f f f 44 // <------------------------------float--------------------------> 45 // 3 3 2 2 1 1 0 46 // 1 0 3 2 5 4 0 47 48 typedef uint16 float16; 49 50 static inline float16 Float32To16(float f) { 51 // Note that we just truncate the mantissa bits: we make no effort to 52 // do any smarter rounding. 53 return (bit_cast<uint32>(f) >> 16) & 0xffff; 54 } 55 56 static inline float Float16To32(float16 f) { 57 // We fill in the new mantissa bits with 0, and don't do anything smarter. 58 return bit_cast<float>(f << 16); 59 } 60 61 } // namespace nlp_core 62 } // namespace libtextclassifier 63 64 #endif // LIBTEXTCLASSIFIER_COMMON_FLOAT16_H_ 65