1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_LIBARTBASE_BASE_LEB128_H_ 18 #define ART_LIBARTBASE_BASE_LEB128_H_ 19 20 #include <vector> 21 22 #include <android-base/logging.h> 23 24 #include "base/bit_utils.h" 25 #include "base/globals.h" 26 #include "base/macros.h" 27 28 namespace art { 29 30 // Reads an unsigned LEB128 value, updating the given pointer to point 31 // just past the end of the read value. This function tolerates 32 // non-zero high-order bits in the fifth encoded byte. 33 static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) { 34 const uint8_t* ptr = *data; 35 int result = *(ptr++); 36 if (UNLIKELY(result > 0x7f)) { 37 int cur = *(ptr++); 38 result = (result & 0x7f) | ((cur & 0x7f) << 7); 39 if (cur > 0x7f) { 40 cur = *(ptr++); 41 result |= (cur & 0x7f) << 14; 42 if (cur > 0x7f) { 43 cur = *(ptr++); 44 result |= (cur & 0x7f) << 21; 45 if (cur > 0x7f) { 46 // Note: We don't check to see if cur is out of range here, 47 // meaning we tolerate garbage in the four high-order bits. 48 cur = *(ptr++); 49 result |= cur << 28; 50 } 51 } 52 } 53 } 54 *data = ptr; 55 return static_cast<uint32_t>(result); 56 } 57 58 static inline uint32_t DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t* data) { 59 return DecodeUnsignedLeb128(&data); 60 } 61 62 static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data, 63 const void* end, 64 uint32_t* out) { 65 const uint8_t* ptr = *data; 66 if (ptr >= end) { 67 return false; 68 } 69 int result = *(ptr++); 70 if (UNLIKELY(result > 0x7f)) { 71 if (ptr >= end) { 72 return false; 73 } 74 int cur = *(ptr++); 75 result = (result & 0x7f) | ((cur & 0x7f) << 7); 76 if (cur > 0x7f) { 77 if (ptr >= end) { 78 return false; 79 } 80 cur = *(ptr++); 81 result |= (cur & 0x7f) << 14; 82 if (cur > 0x7f) { 83 if (ptr >= end) { 84 return false; 85 } 86 cur = *(ptr++); 87 result |= (cur & 0x7f) << 21; 88 if (cur > 0x7f) { 89 if (ptr >= end) { 90 return false; 91 } 92 // Note: We don't check to see if cur is out of range here, 93 // meaning we tolerate garbage in the four high-order bits. 94 cur = *(ptr++); 95 result |= cur << 28; 96 } 97 } 98 } 99 } 100 *data = ptr; 101 *out = static_cast<uint32_t>(result); 102 return true; 103 } 104 105 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point 106 // just past the end of the read value. This function tolerates 107 // non-zero high-order bits in the fifth encoded byte. 108 // It is possible for this function to return -1. 109 static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) { 110 return DecodeUnsignedLeb128(data) - 1; 111 } 112 113 // Reads a signed LEB128 value, updating the given pointer to point 114 // just past the end of the read value. This function tolerates 115 // non-zero high-order bits in the fifth encoded byte. 116 static inline int32_t DecodeSignedLeb128(const uint8_t** data) { 117 const uint8_t* ptr = *data; 118 int32_t result = *(ptr++); 119 if (result <= 0x7f) { 120 result = (result << 25) >> 25; 121 } else { 122 int cur = *(ptr++); 123 result = (result & 0x7f) | ((cur & 0x7f) << 7); 124 if (cur <= 0x7f) { 125 result = (result << 18) >> 18; 126 } else { 127 cur = *(ptr++); 128 result |= (cur & 0x7f) << 14; 129 if (cur <= 0x7f) { 130 result = (result << 11) >> 11; 131 } else { 132 cur = *(ptr++); 133 result |= (cur & 0x7f) << 21; 134 if (cur <= 0x7f) { 135 result = (result << 4) >> 4; 136 } else { 137 // Note: We don't check to see if cur is out of range here, 138 // meaning we tolerate garbage in the four high-order bits. 139 cur = *(ptr++); 140 result |= cur << 28; 141 } 142 } 143 } 144 } 145 *data = ptr; 146 return result; 147 } 148 149 static inline bool DecodeSignedLeb128Checked(const uint8_t** data, 150 const void* end, 151 int32_t* out) { 152 const uint8_t* ptr = *data; 153 if (ptr >= end) { 154 return false; 155 } 156 int32_t result = *(ptr++); 157 if (result <= 0x7f) { 158 result = (result << 25) >> 25; 159 } else { 160 if (ptr >= end) { 161 return false; 162 } 163 int cur = *(ptr++); 164 result = (result & 0x7f) | ((cur & 0x7f) << 7); 165 if (cur <= 0x7f) { 166 result = (result << 18) >> 18; 167 } else { 168 if (ptr >= end) { 169 return false; 170 } 171 cur = *(ptr++); 172 result |= (cur & 0x7f) << 14; 173 if (cur <= 0x7f) { 174 result = (result << 11) >> 11; 175 } else { 176 if (ptr >= end) { 177 return false; 178 } 179 cur = *(ptr++); 180 result |= (cur & 0x7f) << 21; 181 if (cur <= 0x7f) { 182 result = (result << 4) >> 4; 183 } else { 184 if (ptr >= end) { 185 return false; 186 } 187 // Note: We don't check to see if cur is out of range here, 188 // meaning we tolerate garbage in the four high-order bits. 189 cur = *(ptr++); 190 result |= cur << 28; 191 } 192 } 193 } 194 } 195 *data = ptr; 196 *out = static_cast<uint32_t>(result); 197 return true; 198 } 199 200 // Returns the number of bytes needed to encode the value in unsigned LEB128. 201 static inline uint32_t UnsignedLeb128Size(uint32_t data) { 202 // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1 // 32 - CLZ(data | 1) 203 // bytes = ceil(bits_to_encode / 7.0); // (6 + bits_to_encode) / 7 204 uint32_t x = 6 + 32 - CLZ(data | 1U); 205 // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7). 206 // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85. 207 return (x * 37) >> 8; 208 } 209 210 static inline bool IsLeb128Terminator(const uint8_t* ptr) { 211 return *ptr <= 0x7f; 212 } 213 214 // Returns the first byte of a Leb128 value assuming that: 215 // (1) `end_ptr` points to the first byte after the Leb128 value, and 216 // (2) there is another Leb128 value before this one. 217 template <typename T> 218 static inline T* ReverseSearchUnsignedLeb128(T* end_ptr) { 219 static_assert(std::is_same<typename std::remove_const<T>::type, uint8_t>::value, 220 "T must be a uint8_t"); 221 T* ptr = end_ptr; 222 223 // Move one byte back, check that this is the terminating byte. 224 ptr--; 225 DCHECK(IsLeb128Terminator(ptr)); 226 227 // Keep moving back while the previous byte is not a terminating byte. 228 // Fail after reading five bytes in case there isn't another Leb128 value 229 // before this one. 230 while (!IsLeb128Terminator(ptr - 1)) { 231 ptr--; 232 DCHECK_LE(static_cast<ptrdiff_t>(end_ptr - ptr), 5); 233 } 234 235 return ptr; 236 } 237 238 // Returns the number of bytes needed to encode the value in unsigned LEB128. 239 static inline uint32_t SignedLeb128Size(int32_t data) { 240 // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign. 241 data = data ^ (data >> 31); 242 uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U); 243 return (x * 37) >> 8; 244 } 245 246 static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) { 247 uint8_t out = value & 0x7f; 248 value >>= 7; 249 while (value != 0) { 250 *dest++ = out | 0x80; 251 out = value & 0x7f; 252 value >>= 7; 253 } 254 *dest++ = out; 255 return dest; 256 } 257 258 template <typename Vector> 259 static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) { 260 static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); 261 uint8_t out = value & 0x7f; 262 value >>= 7; 263 while (value != 0) { 264 dest->push_back(out | 0x80); 265 out = value & 0x7f; 266 value >>= 7; 267 } 268 dest->push_back(out); 269 } 270 271 // Overwrite encoded Leb128 with a new value. The new value must be less than 272 // or equal to the old value to ensure that it fits the allocated space. 273 static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) { 274 const uint8_t* old_end = dest; 275 uint32_t old_value = DecodeUnsignedLeb128(&old_end); 276 DCHECK_LE(UnsignedLeb128Size(value), UnsignedLeb128Size(old_value)); 277 for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) { 278 // Use longer encoding than necessary to fill the allocated space. 279 end[-1] |= 0x80; 280 end[0] = 0; 281 } 282 } 283 284 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) { 285 uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6; 286 uint8_t out = value & 0x7f; 287 while (extra_bits != 0u) { 288 *dest++ = out | 0x80; 289 value >>= 7; 290 out = value & 0x7f; 291 extra_bits >>= 7; 292 } 293 *dest++ = out; 294 return dest; 295 } 296 297 template<typename Vector> 298 static inline void EncodeSignedLeb128(Vector* dest, int32_t value) { 299 static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); 300 uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6; 301 uint8_t out = value & 0x7f; 302 while (extra_bits != 0u) { 303 dest->push_back(out | 0x80); 304 value >>= 7; 305 out = value & 0x7f; 306 extra_bits >>= 7; 307 } 308 dest->push_back(out); 309 } 310 311 // An encoder that pushes int32_t/uint32_t data onto the given std::vector. 312 template <typename Vector = std::vector<uint8_t>> 313 class Leb128Encoder { 314 static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); 315 316 public: 317 explicit Leb128Encoder(Vector* data) : data_(data) { 318 DCHECK(data != nullptr); 319 } 320 321 void Reserve(uint32_t size) { 322 data_->reserve(size); 323 } 324 325 void PushBackUnsigned(uint32_t value) { 326 EncodeUnsignedLeb128(data_, value); 327 } 328 329 template<typename It> 330 void InsertBackUnsigned(It cur, It end) { 331 for (; cur != end; ++cur) { 332 PushBackUnsigned(*cur); 333 } 334 } 335 336 void PushBackSigned(int32_t value) { 337 EncodeSignedLeb128(data_, value); 338 } 339 340 template<typename It> 341 void InsertBackSigned(It cur, It end) { 342 for (; cur != end; ++cur) { 343 PushBackSigned(*cur); 344 } 345 } 346 347 const Vector& GetData() const { 348 return *data_; 349 } 350 351 protected: 352 Vector* const data_; 353 354 private: 355 DISALLOW_COPY_AND_ASSIGN(Leb128Encoder); 356 }; 357 358 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format. 359 template <typename Vector = std::vector<uint8_t>> 360 class Leb128EncodingVector FINAL : private Vector, 361 public Leb128Encoder<Vector> { 362 static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); 363 364 public: 365 Leb128EncodingVector() : Leb128Encoder<Vector>(this) { } 366 367 explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc) 368 : Vector(alloc), 369 Leb128Encoder<Vector>(this) { } 370 371 private: 372 DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector); 373 }; 374 375 } // namespace art 376 377 #endif // ART_LIBARTBASE_BASE_LEB128_H_ 378