1 // Copyright 2015 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 //////////////////////////////////////////////////////////////////////////////// 16 17 #include "src/binary_parse/range_checked_byte_ptr.h" 18 19 #include <assert.h> 20 #include <cstddef> 21 #include <cstring> 22 23 namespace piex { 24 namespace binary_parse { 25 26 #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE 27 #define BREAK_IF_DEBUGGING() assert(false) 28 #else 29 #define BREAK_IF_DEBUGGING() assert(true) 30 #endif 31 32 namespace { 33 class MemoryPagedByteArray : public PagedByteArray { 34 public: 35 MemoryPagedByteArray(const unsigned char *buffer, const size_t len); 36 37 virtual size_t length() const; 38 virtual size_t pageSize() const; 39 virtual void getPage(size_t page_index, const unsigned char **begin, 40 const unsigned char **end, PagePtr *page) const; 41 42 private: 43 const unsigned char *buffer_; 44 const size_t len_; 45 }; 46 47 MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer, 48 const size_t len) 49 : buffer_(buffer), len_(len) {} 50 51 size_t MemoryPagedByteArray::length() const { return len_; } 52 53 size_t MemoryPagedByteArray::pageSize() const { return len_; } 54 55 void MemoryPagedByteArray::getPage(size_t /* page_index */, 56 const unsigned char **begin, 57 const unsigned char **end, 58 PagePtr *page) const { 59 *begin = buffer_; 60 *end = buffer_ + len_; 61 *page = PagePtr(); 62 } 63 64 // A functor that does nothing. This is used as a no-op shared pointer 65 // deallocator below. 66 class NullFunctor { 67 public: 68 void operator()() {} 69 void operator()(PagedByteArray * /* p */) const {} 70 }; 71 } // namespace 72 73 PagedByteArray::~PagedByteArray() {} 74 75 RangeCheckedBytePtr::RangeCheckedBytePtr() 76 : array_(), 77 page_data_(NULL), 78 current_pos_(0), 79 sub_array_begin_(0), 80 sub_array_end_(0), 81 page_begin_offset_(0), 82 current_page_len_(0), 83 error_flag_(RANGE_CHECKED_BYTE_ERROR) {} 84 85 RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array, 86 const size_t len) 87 : array_(new MemoryPagedByteArray(array, len)), 88 page_data_(NULL), 89 current_pos_(0), 90 sub_array_begin_(0), 91 sub_array_end_(len), 92 page_begin_offset_(0), 93 current_page_len_(0), 94 error_flag_(RANGE_CHECKED_BYTE_SUCCESS) { 95 assert(array); 96 if (array == NULL) { 97 error_flag_ = RANGE_CHECKED_BYTE_ERROR; 98 } 99 } 100 101 RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array) 102 : array_(array, NullFunctor()), 103 page_data_(NULL), 104 current_pos_(0), 105 sub_array_begin_(0), 106 sub_array_end_(array->length()), 107 page_begin_offset_(0), 108 current_page_len_(0), 109 error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {} 110 111 RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() { 112 return RangeCheckedBytePtr(); 113 } 114 115 RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray( 116 size_t pos, size_t length) const { 117 RangeCheckedBytePtr sub_result = (*this) + pos; 118 if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) { 119 sub_result.sub_array_begin_ = sub_result.current_pos_; 120 sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length; 121 122 // Restrict the boundaries of the current page to the newly set sub-array. 123 sub_result.restrictPageToSubArray(); 124 125 return sub_result; 126 } else { 127 error_flag_ = RANGE_CHECKED_BYTE_ERROR; 128 return invalidPointer(); 129 } 130 } 131 132 size_t RangeCheckedBytePtr::offsetInArray() const { 133 // sub_array_begin_ <= current_pos_ is a class invariant, but protect 134 // against violations of this invariant. 135 if (sub_array_begin_ <= current_pos_) { 136 return current_pos_ - sub_array_begin_; 137 } else { 138 assert(false); 139 return 0; 140 } 141 } 142 143 std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const { 144 std::vector<unsigned char> bytes = extractBytes(pos, length); 145 std::string result; 146 result.reserve(bytes.size()); 147 for (size_t i = 0; i < bytes.size(); ++i) { 148 result.push_back(static_cast<char>(bytes[i])); 149 } 150 return result; 151 } 152 153 std::vector<unsigned char> RangeCheckedBytePtr::extractBytes( 154 size_t pos, size_t length) const { 155 std::vector<unsigned char> result; 156 if (pos + length < pos /* overflow */ || remainingLength() < pos + length) { 157 BREAK_IF_DEBUGGING(); 158 error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; 159 return result; 160 } 161 result.reserve(length); 162 for (size_t i = 0; i < length; ++i) { 163 result.push_back((*this)[pos + i]); 164 } 165 return result; 166 } 167 168 bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) { 169 if (x.array_ != y.array_) { 170 assert(false); 171 return false; 172 } 173 174 return x.current_pos_ == y.current_pos_; 175 } 176 177 bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) { 178 return !(x == y); 179 } 180 181 void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const { 182 // The offset should always lie within the bounds of the sub-array (this 183 // condition is enforced at the callsite). However, even if the offset lies 184 // outside the sub-array, the restrictPageToSubArray() call at the end 185 // ensures that the object is left in a consistent state that maintains the 186 // class invariants. 187 assert(offset >= sub_array_begin_ && offset < sub_array_end_); 188 189 // Ensure that offset lies within the array. 190 if (offset >= array_->length()) { 191 assert(false); 192 return; 193 } 194 195 // Determine the index of the page to request. 196 size_t page_index = offset / array_->pageSize(); 197 198 // Get the page. 199 const unsigned char *page_begin; 200 const unsigned char *page_end; 201 array_->getPage(page_index, &page_begin, &page_end, &page_); 202 203 // Ensure that the page has the expected length (as specified in the 204 // PagedByteArray interface). 205 size_t expected_page_size = array_->pageSize(); 206 if (page_index == (array_->length() - 1) / array_->pageSize()) { 207 expected_page_size = array_->length() - array_->pageSize() * page_index; 208 } 209 if ((page_end < page_begin) || 210 (static_cast<size_t>(page_end - page_begin) != expected_page_size)) { 211 assert(false); 212 return; 213 } 214 215 // Remember information about page. 216 page_data_ = page_begin; 217 page_begin_offset_ = page_index * array_->pageSize(); 218 current_page_len_ = static_cast<size_t>(page_end - page_begin); 219 220 // Restrict the boundaries of the page to lie within the sub-array. 221 restrictPageToSubArray(); 222 } 223 224 void RangeCheckedBytePtr::restrictPageToSubArray() const { 225 // Restrict the current page's boundaries so that it is always contained 226 // completely within the extent of the sub-array. 227 // This function is purposely designed to work correctly in the following 228 // two special cases: 229 // a) The current page lies entirely outside the sub-array. In this case, 230 // current_page_len_ will be set to zero. page_data_ may either remain 231 // unchanged or may be changed to point one element beyond the end of the 232 // page, depending on whether the current page lies before or after the 233 // sub-array. 234 // b) The current page is in the state as initialized by the constructor 235 // (i.e. page_data_ is NULL and current_page_len_ is zero). In this case, 236 // page_data_ and current_page_len_ will remain unchanged. 237 238 // Does the beginning of the page lie before the beginning of the sub-array? 239 if (page_begin_offset_ < sub_array_begin_) { 240 // Compute amount by which to shorten page. 241 size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_; 242 if (amount_to_shorten > current_page_len_) { 243 amount_to_shorten = current_page_len_; 244 } 245 246 // Adjust beginning of page accordingly. 247 page_begin_offset_ += amount_to_shorten; 248 page_data_ += amount_to_shorten; 249 current_page_len_ -= amount_to_shorten; 250 } 251 252 // Does the end of the page lie beyond the end of the sub-array? 253 if (page_begin_offset_ + current_page_len_ > sub_array_end_) { 254 // Reduce length of page accordingly. 255 size_t new_len = sub_array_end_ - page_begin_offset_; 256 if (new_len > current_page_len_) { 257 new_len = current_page_len_; 258 } 259 current_page_len_ = new_len; 260 } 261 } 262 263 int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y, 264 size_t num) { 265 std::vector<unsigned char> x_vec = x.extractBytes(0, num); 266 std::vector<unsigned char> y_vec = y.extractBytes(0, num); 267 268 if (!x.errorOccurred() && !y.errorOccurred()) { 269 return ::memcmp(&x_vec[0], &y_vec[0], num); 270 } else { 271 // return an arbitrary value 272 return -1; 273 } 274 } 275 276 int strcmp(const RangeCheckedBytePtr &x, const std::string &y) { 277 std::vector<unsigned char> x_vec = x.extractBytes(0, y.length()); 278 279 if (!x.errorOccurred()) { 280 return ::memcmp(&x_vec[0], y.c_str(), y.length()); 281 } else { 282 // return an arbitrary value 283 return -1; 284 } 285 } 286 287 size_t strlen(const RangeCheckedBytePtr &src) { 288 size_t len = 0; 289 RangeCheckedBytePtr str = src; 290 while (!str.errorOccurred() && (str[0] != '\0')) { 291 str++; 292 len++; 293 } 294 return len; 295 } 296 297 int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian, 298 MemoryStatus *status) { 299 const uint16 unsigned_value = Get16u(input, big_endian, status); 300 if (*status != RANGE_CHECKED_BYTE_SUCCESS) { 301 // Return an arbitrary value. 302 return 0; 303 } 304 305 // Convert the two's-complement signed integer encoded in 'unsigned_value' 306 // into a signed representation in the implementation's native representation 307 // for signed integers. An optimized Blaze build (x64) compiles all of the 308 // following code to a no-op (as of this writing). 309 // For further details, see the corresponding comment in Get32s(). 310 if (unsigned_value == 0x8000u) { 311 return static_cast<int16>(-0x8000); 312 } else if (unsigned_value > 0x8000u) { 313 return -static_cast<int16>(0x10000u - unsigned_value); 314 } else { 315 return static_cast<int16>(unsigned_value); 316 } 317 } 318 319 uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian, 320 MemoryStatus *status) { 321 if (input.remainingLength() < 2) { 322 if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) { 323 *status = RANGE_CHECKED_BYTE_ERROR; 324 } 325 // Return an arbitrary value. 326 return 0; 327 } 328 if (big_endian) { 329 return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]); 330 } else { 331 return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]); 332 } 333 } 334 335 int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian, 336 MemoryStatus *status) { 337 const uint32 unsigned_value = Get32u(input, big_endian, status); 338 if (*status != RANGE_CHECKED_BYTE_SUCCESS) { 339 // Return an arbitrary value. 340 return 0; 341 } 342 343 // Convert the two's-complement signed integer encoded in 'unsigned_value' 344 // into a signed representation in the implementation's native representation 345 // for signed integers. 346 // For all practical purposes, the same result could be obtained simply by 347 // casting unsigned_value to int32; the result of this is 348 // implementation-defined, but on all of the platforms we care about, it does 349 // what we want. 350 // The code below, however, arguably has the aesthetic advantage of being 351 // independent of the representation for signed integers chosen by the 352 // implementation, as long as 'int' and 'unsigned' have the required range to 353 // represent all of the required values. 354 // An optimized Blaze build (x64) compiles all of the following code to a 355 // no-op (as of this writing); i.e. the value that Get32u() returned in %eax 356 // is left unchanged. 357 if (unsigned_value == 0x80000000u) { 358 // Read here on why the constant expression is written this way: 359 // http://stackoverflow.com/questions/14695118 360 return -0x7fffffff - 1; 361 } else if (unsigned_value > 0x80000000u) { 362 // The expression 363 // 0xffffffffu - unsigned_value + 1 364 // is a portable way of flipping the sign of a twos-complement signed 365 // integer whose binary representation is stored in an unsigned integer. 366 // '0xffffffffu + 1' is used in preference to simply '0' because it makes 367 // it clearer that the correct result will be obtained even if an int is 368 // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around 369 // 'unsigned_value' to prevent the compiler from warning about an 370 // integral constant overflow. ('0' would produce the correct result in 371 // this case too but would rely in a more subtle way on the rules for 372 // unsigned wraparound.) 373 return -static_cast<int32>(0xffffffffu - unsigned_value + 1); 374 } else { 375 return static_cast<int32>(unsigned_value); 376 } 377 } 378 379 uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian, 380 MemoryStatus *status) { 381 if (input.remainingLength() < 4) { 382 if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) { 383 *status = RANGE_CHECKED_BYTE_ERROR; 384 } 385 // Return an arbitrary value. 386 return 0; 387 } 388 if (big_endian) { 389 return (static_cast<uint32>(input[0]) << 24) | 390 (static_cast<uint32>(input[1]) << 16) | 391 (static_cast<uint32>(input[2]) << 8) | 392 (static_cast<uint32>(input[3]) << 0); 393 } else { 394 return (static_cast<uint32>(input[3]) << 24) | 395 (static_cast<uint32>(input[2]) << 16) | 396 (static_cast<uint32>(input[1]) << 8) | 397 (static_cast<uint32>(input[0]) << 0); 398 } 399 } 400 401 } // namespace binary_parse 402 } // namespace piex 403