Home | History | Annotate | Download | only in base
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_LIBARTBASE_BASE_LEB128_H_
     18 #define ART_LIBARTBASE_BASE_LEB128_H_
     19 
     20 #include <vector>
     21 
     22 #include <android-base/logging.h>
     23 
     24 #include "base/bit_utils.h"
     25 #include "base/globals.h"
     26 #include "base/macros.h"
     27 
     28 namespace art {
     29 
     30 // Reads an unsigned LEB128 value, updating the given pointer to point
     31 // just past the end of the read value. This function tolerates
     32 // non-zero high-order bits in the fifth encoded byte.
     33 static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
     34   const uint8_t* ptr = *data;
     35   int result = *(ptr++);
     36   if (UNLIKELY(result > 0x7f)) {
     37     int cur = *(ptr++);
     38     result = (result & 0x7f) | ((cur & 0x7f) << 7);
     39     if (cur > 0x7f) {
     40       cur = *(ptr++);
     41       result |= (cur & 0x7f) << 14;
     42       if (cur > 0x7f) {
     43         cur = *(ptr++);
     44         result |= (cur & 0x7f) << 21;
     45         if (cur > 0x7f) {
     46           // Note: We don't check to see if cur is out of range here,
     47           // meaning we tolerate garbage in the four high-order bits.
     48           cur = *(ptr++);
     49           result |= cur << 28;
     50         }
     51       }
     52     }
     53   }
     54   *data = ptr;
     55   return static_cast<uint32_t>(result);
     56 }
     57 
     58 static inline uint32_t DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t* data) {
     59   return DecodeUnsignedLeb128(&data);
     60 }
     61 
     62 static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
     63                                                const void* end,
     64                                                uint32_t* out) {
     65   const uint8_t* ptr = *data;
     66   if (ptr >= end) {
     67     return false;
     68   }
     69   int result = *(ptr++);
     70   if (UNLIKELY(result > 0x7f)) {
     71     if (ptr >= end) {
     72       return false;
     73     }
     74     int cur = *(ptr++);
     75     result = (result & 0x7f) | ((cur & 0x7f) << 7);
     76     if (cur > 0x7f) {
     77       if (ptr >= end) {
     78         return false;
     79       }
     80       cur = *(ptr++);
     81       result |= (cur & 0x7f) << 14;
     82       if (cur > 0x7f) {
     83         if (ptr >= end) {
     84           return false;
     85         }
     86         cur = *(ptr++);
     87         result |= (cur & 0x7f) << 21;
     88         if (cur > 0x7f) {
     89           if (ptr >= end) {
     90             return false;
     91           }
     92           // Note: We don't check to see if cur is out of range here,
     93           // meaning we tolerate garbage in the four high-order bits.
     94           cur = *(ptr++);
     95           result |= cur << 28;
     96         }
     97       }
     98     }
     99   }
    100   *data = ptr;
    101   *out = static_cast<uint32_t>(result);
    102   return true;
    103 }
    104 
    105 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point
    106 // just past the end of the read value. This function tolerates
    107 // non-zero high-order bits in the fifth encoded byte.
    108 // It is possible for this function to return -1.
    109 static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
    110   return DecodeUnsignedLeb128(data) - 1;
    111 }
    112 
    113 // Reads a signed LEB128 value, updating the given pointer to point
    114 // just past the end of the read value. This function tolerates
    115 // non-zero high-order bits in the fifth encoded byte.
    116 static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
    117   const uint8_t* ptr = *data;
    118   int32_t result = *(ptr++);
    119   if (result <= 0x7f) {
    120     result = (result << 25) >> 25;
    121   } else {
    122     int cur = *(ptr++);
    123     result = (result & 0x7f) | ((cur & 0x7f) << 7);
    124     if (cur <= 0x7f) {
    125       result = (result << 18) >> 18;
    126     } else {
    127       cur = *(ptr++);
    128       result |= (cur & 0x7f) << 14;
    129       if (cur <= 0x7f) {
    130         result = (result << 11) >> 11;
    131       } else {
    132         cur = *(ptr++);
    133         result |= (cur & 0x7f) << 21;
    134         if (cur <= 0x7f) {
    135           result = (result << 4) >> 4;
    136         } else {
    137           // Note: We don't check to see if cur is out of range here,
    138           // meaning we tolerate garbage in the four high-order bits.
    139           cur = *(ptr++);
    140           result |= cur << 28;
    141         }
    142       }
    143     }
    144   }
    145   *data = ptr;
    146   return result;
    147 }
    148 
    149 static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
    150                                              const void* end,
    151                                              int32_t* out) {
    152   const uint8_t* ptr = *data;
    153   if (ptr >= end) {
    154     return false;
    155   }
    156   int32_t result = *(ptr++);
    157   if (result <= 0x7f) {
    158     result = (result << 25) >> 25;
    159   } else {
    160     if (ptr >= end) {
    161       return false;
    162     }
    163     int cur = *(ptr++);
    164     result = (result & 0x7f) | ((cur & 0x7f) << 7);
    165     if (cur <= 0x7f) {
    166       result = (result << 18) >> 18;
    167     } else {
    168       if (ptr >= end) {
    169         return false;
    170       }
    171       cur = *(ptr++);
    172       result |= (cur & 0x7f) << 14;
    173       if (cur <= 0x7f) {
    174         result = (result << 11) >> 11;
    175       } else {
    176         if (ptr >= end) {
    177           return false;
    178         }
    179         cur = *(ptr++);
    180         result |= (cur & 0x7f) << 21;
    181         if (cur <= 0x7f) {
    182           result = (result << 4) >> 4;
    183         } else {
    184           if (ptr >= end) {
    185             return false;
    186           }
    187           // Note: We don't check to see if cur is out of range here,
    188           // meaning we tolerate garbage in the four high-order bits.
    189           cur = *(ptr++);
    190           result |= cur << 28;
    191         }
    192       }
    193     }
    194   }
    195   *data = ptr;
    196   *out = static_cast<uint32_t>(result);
    197   return true;
    198 }
    199 
    200 // Returns the number of bytes needed to encode the value in unsigned LEB128.
    201 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
    202   // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
    203   // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
    204   uint32_t x = 6 + 32 - CLZ(data | 1U);
    205   // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
    206   // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
    207   return (x * 37) >> 8;
    208 }
    209 
    210 static inline bool IsLeb128Terminator(const uint8_t* ptr) {
    211   return *ptr <= 0x7f;
    212 }
    213 
    214 // Returns the first byte of a Leb128 value assuming that:
    215 // (1) `end_ptr` points to the first byte after the Leb128 value, and
    216 // (2) there is another Leb128 value before this one.
    217 template <typename T>
    218 static inline T* ReverseSearchUnsignedLeb128(T* end_ptr) {
    219   static_assert(std::is_same<typename std::remove_const<T>::type, uint8_t>::value,
    220                 "T must be a uint8_t");
    221   T* ptr = end_ptr;
    222 
    223   // Move one byte back, check that this is the terminating byte.
    224   ptr--;
    225   DCHECK(IsLeb128Terminator(ptr));
    226 
    227   // Keep moving back while the previous byte is not a terminating byte.
    228   // Fail after reading five bytes in case there isn't another Leb128 value
    229   // before this one.
    230   while (!IsLeb128Terminator(ptr - 1)) {
    231     ptr--;
    232     DCHECK_LE(static_cast<ptrdiff_t>(end_ptr - ptr), 5);
    233   }
    234 
    235   return ptr;
    236 }
    237 
    238 // Returns the number of bytes needed to encode the value in unsigned LEB128.
    239 static inline uint32_t SignedLeb128Size(int32_t data) {
    240   // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
    241   data = data ^ (data >> 31);
    242   uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
    243   return (x * 37) >> 8;
    244 }
    245 
    246 static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
    247   uint8_t out = value & 0x7f;
    248   value >>= 7;
    249   while (value != 0) {
    250     *dest++ = out | 0x80;
    251     out = value & 0x7f;
    252     value >>= 7;
    253   }
    254   *dest++ = out;
    255   return dest;
    256 }
    257 
    258 template <typename Vector>
    259 static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
    260   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
    261   uint8_t out = value & 0x7f;
    262   value >>= 7;
    263   while (value != 0) {
    264     dest->push_back(out | 0x80);
    265     out = value & 0x7f;
    266     value >>= 7;
    267   }
    268   dest->push_back(out);
    269 }
    270 
    271 // Overwrite encoded Leb128 with a new value. The new value must be less than
    272 // or equal to the old value to ensure that it fits the allocated space.
    273 static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
    274   const uint8_t* old_end = dest;
    275   uint32_t old_value = DecodeUnsignedLeb128(&old_end);
    276   DCHECK_LE(UnsignedLeb128Size(value), UnsignedLeb128Size(old_value));
    277   for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
    278     // Use longer encoding than necessary to fill the allocated space.
    279     end[-1] |= 0x80;
    280     end[0] = 0;
    281   }
    282 }
    283 
    284 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
    285   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
    286   uint8_t out = value & 0x7f;
    287   while (extra_bits != 0u) {
    288     *dest++ = out | 0x80;
    289     value >>= 7;
    290     out = value & 0x7f;
    291     extra_bits >>= 7;
    292   }
    293   *dest++ = out;
    294   return dest;
    295 }
    296 
    297 template<typename Vector>
    298 static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
    299   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
    300   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
    301   uint8_t out = value & 0x7f;
    302   while (extra_bits != 0u) {
    303     dest->push_back(out | 0x80);
    304     value >>= 7;
    305     out = value & 0x7f;
    306     extra_bits >>= 7;
    307   }
    308   dest->push_back(out);
    309 }
    310 
    311 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
    312 template <typename Vector = std::vector<uint8_t>>
    313 class Leb128Encoder {
    314   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
    315 
    316  public:
    317   explicit Leb128Encoder(Vector* data) : data_(data) {
    318     DCHECK(data != nullptr);
    319   }
    320 
    321   void Reserve(uint32_t size) {
    322     data_->reserve(size);
    323   }
    324 
    325   void PushBackUnsigned(uint32_t value) {
    326     EncodeUnsignedLeb128(data_, value);
    327   }
    328 
    329   template<typename It>
    330   void InsertBackUnsigned(It cur, It end) {
    331     for (; cur != end; ++cur) {
    332       PushBackUnsigned(*cur);
    333     }
    334   }
    335 
    336   void PushBackSigned(int32_t value) {
    337     EncodeSignedLeb128(data_, value);
    338   }
    339 
    340   template<typename It>
    341   void InsertBackSigned(It cur, It end) {
    342     for (; cur != end; ++cur) {
    343       PushBackSigned(*cur);
    344     }
    345   }
    346 
    347   const Vector& GetData() const {
    348     return *data_;
    349   }
    350 
    351  protected:
    352   Vector* const data_;
    353 
    354  private:
    355   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
    356 };
    357 
    358 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
    359 template <typename Vector = std::vector<uint8_t>>
    360 class Leb128EncodingVector FINAL : private Vector,
    361                                    public Leb128Encoder<Vector> {
    362   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
    363 
    364  public:
    365   Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
    366 
    367   explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
    368     : Vector(alloc),
    369       Leb128Encoder<Vector>(this) { }
    370 
    371  private:
    372   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
    373 };
    374 
    375 }  // namespace art
    376 
    377 #endif  // ART_LIBARTBASE_BASE_LEB128_H_
    378