Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
     18 #define LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
     19 
     20 #include <algorithm>
     21 #include <string>
     22 #include <vector>
     23 
     24 #include "base.h"
     25 #include "util/base/logging.h"
     26 
     27 namespace libtextclassifier {
     28 namespace nlp_core {
     29 
     30 // Swaps the sizeof(T) bytes that start at addr.  E.g., if sizeof(T) == 2,
     31 // then (addr[0], addr[1]) -> (addr[1], addr[0]).  Useful for little endian
     32 // <-> big endian conversions.
     33 template <class T>
     34 void SwapBytes(T *addr) {
     35   char *char_ptr = reinterpret_cast<char *>(addr);
     36   std::reverse(char_ptr, char_ptr + sizeof(T));
     37 }
     38 
     39 // Assuming addr points to a piece of data of type T, with its bytes in the
     40 // little/big endian order specific to the machine this code runs on, this
     41 // method will re-arrange the bytes (in place) in little-endian order.
     42 template <class T>
     43 void HostToLittleEndian(T *addr) {
     44   if (LittleEndian::IsLittleEndian()) {
     45     // Do nothing: current machine is little-endian.
     46   } else {
     47     SwapBytes(addr);
     48   }
     49 }
     50 
     51 // Reverse of HostToLittleEndian.
     52 template <class T>
     53 void LittleEndianToHost(T *addr) {
     54   // It turns out it's the same function: on little-endian machines, do nothing
     55   // (source and target formats are identical).  Otherwise, swap bytes.
     56   HostToLittleEndian(addr);
     57 }
     58 
     59 // Returns string obtained by concatenating the bytes of the elements from a
     60 // vector (in order: v[0], v[1], etc).  If the type T requires more than one
     61 // byte, the byte for each element are first converted to little-endian format.
     62 template<typename T>
     63 std::string GetDataBytesInLittleEndianOrder(const std::vector<T> &v) {
     64   std::string data_bytes;
     65   for (const T element : v) {
     66     T little_endian_element = element;
     67     HostToLittleEndian(&little_endian_element);
     68     data_bytes.append(
     69         reinterpret_cast<const char *>(&little_endian_element),
     70         sizeof(T));
     71   }
     72   return data_bytes;
     73 }
     74 
     75 // Performs reverse of GetDataBytesInLittleEndianOrder.
     76 //
     77 // I.e., decodes the data bytes from parameter bytes into num_elements Ts, and
     78 // places them in the vector v (previous content of that vector is erased).
     79 //
     80 // We expect bytes to contain the concatenation of the bytes for exactly
     81 // num_elements elements of type T.  If the type T requires more than one byte,
     82 // those bytes should be arranged in little-endian form.
     83 //
     84 // Returns true on success and false otherwise (e.g., bytes has the wrong size).
     85 // Note: we do not want to crash on corrupted data (some clients, e..g, GMSCore,
     86 // have asked us not to do so).  Instead, we report the error and let the client
     87 // decide what to do.  On error, we also fill the vector with zeros, such that
     88 // at least the dimension of v matches expectations.
     89 template<typename T>
     90 bool FillVectorFromDataBytesInLittleEndian(
     91     const std::string &bytes, int num_elements, std::vector<T> *v) {
     92   if (bytes.size() != num_elements * sizeof(T)) {
     93     TC_LOG(ERROR) << "Wrong number of bytes: actual " << bytes.size()
     94                   << " vs expected " << num_elements
     95                   << " elements of sizeof(element) = " << sizeof(T)
     96                   << " bytes each ; will fill vector with zeros";
     97     v->assign(num_elements, static_cast<T>(0));
     98     return false;
     99   }
    100   v->clear();
    101   v->reserve(num_elements);
    102   const T *start = reinterpret_cast<const T *>(bytes.data());
    103   if (LittleEndian::IsLittleEndian() || (sizeof(T) == 1)) {
    104     // Fast in the common case ([almost] all hardware today is little-endian):
    105     // if same endianness (or type T requires a single byte and endianness
    106     // irrelevant), just use the bytes.
    107     v->assign(start, start + num_elements);
    108   } else {
    109     // Slower (but very rare case): this code runs on a big endian machine and
    110     // the type T requires more than one byte.  Hence, some conversion is
    111     // necessary.
    112     for (int i = 0; i < num_elements; ++i) {
    113       T temp = start[i];
    114       SwapBytes(&temp);
    115       v->push_back(temp);
    116     }
    117   }
    118   return true;
    119 }
    120 
    121 }  // namespace nlp_core
    122 }  // namespace libtextclassifier
    123 
    124 #endif  // LIBTEXTCLASSIFIER_COMMON_LITTLE_ENDIAN_DATA_H_
    125