1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_ 18 #define LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_ 19 20 #include <algorithm> 21 #include <string> 22 #include <vector> 23 24 #include "base.h" 25 #include "util/base/logging.h" 26 27 namespace libtextclassifier { 28 namespace nlp_core { 29 30 // Swaps the sizeof(T) bytes that start at addr. E.g., if sizeof(T) == 2, 31 // then (addr[0], addr[1]) -> (addr[1], addr[0]). Useful for little endian 32 // <-> big endian conversions. 33 template <class T> 34 void SwapBytes(T *addr) { 35 char *char_ptr = reinterpret_cast<char *>(addr); 36 std::reverse(char_ptr, char_ptr + sizeof(T)); 37 } 38 39 // Assuming addr points to a piece of data of type T, with its bytes in the 40 // little/big endian order specific to the machine this code runs on, this 41 // method will re-arrange the bytes (in place) in little-endian order. 42 template <class T> 43 void HostToLittleEndian(T *addr) { 44 if (LittleEndian::IsLittleEndian()) { 45 // Do nothing: current machine is little-endian. 46 } else { 47 SwapBytes(addr); 48 } 49 } 50 51 // Reverse of HostToLittleEndian. 52 template <class T> 53 void LittleEndianToHost(T *addr) { 54 // It turns out it's the same function: on little-endian machines, do nothing 55 // (source and target formats are identical). Otherwise, swap bytes. 56 HostToLittleEndian(addr); 57 } 58 59 // Returns string obtained by concatenating the bytes of the elements from a 60 // vector (in order: v[0], v[1], etc). If the type T requires more than one 61 // byte, the byte for each element are first converted to little-endian format. 62 template<typename T> 63 std::string GetDataBytesInLittleEndianOrder(const std::vector<T> &v) { 64 std::string data_bytes; 65 for (const T element : v) { 66 T little_endian_element = element; 67 HostToLittleEndian(&little_endian_element); 68 data_bytes.append( 69 reinterpret_cast<const char *>(&little_endian_element), 70 sizeof(T)); 71 } 72 return data_bytes; 73 } 74 75 // Performs reverse of GetDataBytesInLittleEndianOrder. 76 // 77 // I.e., decodes the data bytes from parameter bytes into num_elements Ts, and 78 // places them in the vector v (previous content of that vector is erased). 79 // 80 // We expect bytes to contain the concatenation of the bytes for exactly 81 // num_elements elements of type T. If the type T requires more than one byte, 82 // those bytes should be arranged in little-endian form. 83 // 84 // Returns true on success and false otherwise (e.g., bytes has the wrong size). 85 // Note: we do not want to crash on corrupted data (some clients, e..g, GMSCore, 86 // have asked us not to do so). Instead, we report the error and let the client 87 // decide what to do. On error, we also fill the vector with zeros, such that 88 // at least the dimension of v matches expectations. 89 template<typename T> 90 bool FillVectorFromDataBytesInLittleEndian( 91 const std::string &bytes, int num_elements, std::vector<T> *v) { 92 if (bytes.size() != num_elements * sizeof(T)) { 93 TC_LOG(ERROR) << "Wrong number of bytes: actual " << bytes.size() 94 << " vs expected " << num_elements 95 << " elements of sizeof(element) = " << sizeof(T) 96 << " bytes each ; will fill vector with zeros"; 97 v->assign(num_elements, static_cast<T>(0)); 98 return false; 99 } 100 v->clear(); 101 v->reserve(num_elements); 102 const T *start = reinterpret_cast<const T *>(bytes.data()); 103 if (LittleEndian::IsLittleEndian() || (sizeof(T) == 1)) { 104 // Fast in the common case ([almost] all hardware today is little-endian): 105 // if same endianness (or type T requires a single byte and endianness 106 // irrelevant), just use the bytes. 107 v->assign(start, start + num_elements); 108 } else { 109 // Slower (but very rare case): this code runs on a big endian machine and 110 // the type T requires more than one byte. Hence, some conversion is 111 // necessary. 112 for (int i = 0; i < num_elements; ++i) { 113 T temp = start[i]; 114 SwapBytes(&temp); 115 v->push_back(temp); 116 } 117 } 118 return true; 119 } 120 121 } // namespace nlp_core 122 } // namespace libtextclassifier 123 124 #endif // LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_ 125