Home | History | Annotate | Download | only in binary_parse
      1 // Copyright 2015 Google Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 ////////////////////////////////////////////////////////////////////////////////
     16 
     17 #include "src/binary_parse/range_checked_byte_ptr.h"
     18 
     19 #include <assert.h>
     20 #include <cstddef>
     21 #include <cstring>
     22 
     23 namespace piex {
     24 namespace binary_parse {
     25 
     26 #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
     27 #define BREAK_IF_DEBUGGING() assert(false)
     28 #else
     29 #define BREAK_IF_DEBUGGING() assert(true)
     30 #endif
     31 
     32 namespace {
     33 class MemoryPagedByteArray : public PagedByteArray {
     34  public:
     35   MemoryPagedByteArray(const unsigned char *buffer, const size_t len);
     36 
     37   virtual size_t length() const;
     38   virtual size_t pageSize() const;
     39   virtual void getPage(size_t page_index, const unsigned char **begin,
     40                        const unsigned char **end, PagePtr *page) const;
     41 
     42  private:
     43   const unsigned char *buffer_;
     44   const size_t len_;
     45 };
     46 
     47 MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
     48                                            const size_t len)
     49     : buffer_(buffer), len_(len) {}
     50 
     51 size_t MemoryPagedByteArray::length() const { return len_; }
     52 
     53 size_t MemoryPagedByteArray::pageSize() const { return len_; }
     54 
     55 void MemoryPagedByteArray::getPage(size_t /* page_index */,
     56                                    const unsigned char **begin,
     57                                    const unsigned char **end,
     58                                    PagePtr *page) const {
     59   *begin = buffer_;
     60   *end = buffer_ + len_;
     61   *page = PagePtr();
     62 }
     63 
     64 // A functor that does nothing. This is used as a no-op shared pointer
     65 // deallocator below.
     66 class NullFunctor {
     67  public:
     68   void operator()() {}
     69   void operator()(PagedByteArray * /* p */) const {}
     70 };
     71 }  // namespace
     72 
     73 PagedByteArray::~PagedByteArray() {}
     74 
     75 RangeCheckedBytePtr::RangeCheckedBytePtr()
     76     : array_(),
     77       page_data_(NULL),
     78       current_pos_(0),
     79       sub_array_begin_(0),
     80       sub_array_end_(0),
     81       page_begin_offset_(0),
     82       current_page_len_(0),
     83       error_flag_(RANGE_CHECKED_BYTE_ERROR) {}
     84 
     85 RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
     86                                          const size_t len)
     87     : array_(new MemoryPagedByteArray(array, len)),
     88       page_data_(NULL),
     89       current_pos_(0),
     90       sub_array_begin_(0),
     91       sub_array_end_(len),
     92       page_begin_offset_(0),
     93       current_page_len_(0),
     94       error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
     95   assert(array);
     96   if (array == NULL) {
     97     error_flag_ = RANGE_CHECKED_BYTE_ERROR;
     98   }
     99 }
    100 
    101 RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
    102     : array_(array, NullFunctor()),
    103       page_data_(NULL),
    104       current_pos_(0),
    105       sub_array_begin_(0),
    106       sub_array_end_(array->length()),
    107       page_begin_offset_(0),
    108       current_page_len_(0),
    109       error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}
    110 
    111 RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
    112   return RangeCheckedBytePtr();
    113 }
    114 
    115 RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
    116     size_t pos, size_t length) const {
    117   RangeCheckedBytePtr sub_result = (*this) + pos;
    118   if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
    119     sub_result.sub_array_begin_ = sub_result.current_pos_;
    120     sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;
    121 
    122     // Restrict the boundaries of the current page to the newly set sub-array.
    123     sub_result.restrictPageToSubArray();
    124 
    125     return sub_result;
    126   } else {
    127     error_flag_ = RANGE_CHECKED_BYTE_ERROR;
    128     return invalidPointer();
    129   }
    130 }
    131 
    132 size_t RangeCheckedBytePtr::offsetInArray() const {
    133   // sub_array_begin_ <= current_pos_ is a class invariant, but protect
    134   // against violations of this invariant.
    135   if (sub_array_begin_ <= current_pos_) {
    136     return current_pos_ - sub_array_begin_;
    137   } else {
    138     assert(false);
    139     return 0;
    140   }
    141 }
    142 
    143 std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
    144   std::vector<unsigned char> bytes = extractBytes(pos, length);
    145   std::string result;
    146   result.reserve(bytes.size());
    147   for (size_t i = 0; i < bytes.size(); ++i) {
    148     result.push_back(static_cast<char>(bytes[i]));
    149   }
    150   return result;
    151 }
    152 
    153 std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
    154     size_t pos, size_t length) const {
    155   std::vector<unsigned char> result;
    156   if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
    157     BREAK_IF_DEBUGGING();
    158     error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
    159     return result;
    160   }
    161   result.reserve(length);
    162   for (size_t i = 0; i < length; ++i) {
    163     result.push_back((*this)[pos + i]);
    164   }
    165   return result;
    166 }
    167 
    168 bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
    169   if (x.array_ != y.array_) {
    170     assert(false);
    171     return false;
    172   }
    173 
    174   return x.current_pos_ == y.current_pos_;
    175 }
    176 
    177 bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
    178   return !(x == y);
    179 }
    180 
    181 void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
    182   // The offset should always lie within the bounds of the sub-array (this
    183   // condition is enforced at the callsite). However, even if the offset lies
    184   // outside the sub-array, the restrictPageToSubArray() call at the end
    185   // ensures that the object is left in a consistent state that maintains the
    186   // class invariants.
    187   assert(offset >= sub_array_begin_ && offset < sub_array_end_);
    188 
    189   // Ensure that offset lies within the array.
    190   if (offset >= array_->length()) {
    191     assert(false);
    192     return;
    193   }
    194 
    195   // Determine the index of the page to request.
    196   size_t page_index = offset / array_->pageSize();
    197 
    198   // Get the page.
    199   const unsigned char *page_begin;
    200   const unsigned char *page_end;
    201   array_->getPage(page_index, &page_begin, &page_end, &page_);
    202 
    203   // Ensure that the page has the expected length (as specified in the
    204   // PagedByteArray interface).
    205   size_t expected_page_size = array_->pageSize();
    206   if (page_index == (array_->length() - 1) / array_->pageSize()) {
    207     expected_page_size = array_->length() - array_->pageSize() * page_index;
    208   }
    209   if ((page_end < page_begin) ||
    210       (static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
    211     assert(false);
    212     return;
    213   }
    214 
    215   // Remember information about page.
    216   page_data_ = page_begin;
    217   page_begin_offset_ = page_index * array_->pageSize();
    218   current_page_len_ = static_cast<size_t>(page_end - page_begin);
    219 
    220   // Restrict the boundaries of the page to lie within the sub-array.
    221   restrictPageToSubArray();
    222 }
    223 
    224 void RangeCheckedBytePtr::restrictPageToSubArray() const {
    225   // Restrict the current page's boundaries so that it is always contained
    226   // completely within the extent of the sub-array.
    227   // This function is purposely designed to work correctly in the following
    228   // two special cases:
    229   // a) The current page lies entirely outside the sub-array. In this case,
    230   //    current_page_len_ will be set to zero. page_data_ may either remain
    231   //    unchanged or may be changed to point one element beyond the end of the
    232   //    page, depending on whether the current page lies before or after the
    233   //    sub-array.
    234   // b) The current page is in the state as initialized by the constructor
    235   //    (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
    236   //    page_data_ and current_page_len_ will remain unchanged.
    237 
    238   // Does the beginning of the page lie before the beginning of the sub-array?
    239   if (page_begin_offset_ < sub_array_begin_) {
    240     // Compute amount by which to shorten page.
    241     size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
    242     if (amount_to_shorten > current_page_len_) {
    243       amount_to_shorten = current_page_len_;
    244     }
    245 
    246     // Adjust beginning of page accordingly.
    247     page_begin_offset_ += amount_to_shorten;
    248     page_data_ += amount_to_shorten;
    249     current_page_len_ -= amount_to_shorten;
    250   }
    251 
    252   // Does the end of the page lie beyond the end of the sub-array?
    253   if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
    254     // Reduce length of page accordingly.
    255     size_t new_len = sub_array_end_ - page_begin_offset_;
    256     if (new_len > current_page_len_) {
    257       new_len = current_page_len_;
    258     }
    259     current_page_len_ = new_len;
    260   }
    261 }
    262 
    263 int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
    264            size_t num) {
    265   std::vector<unsigned char> x_vec = x.extractBytes(0, num);
    266   std::vector<unsigned char> y_vec = y.extractBytes(0, num);
    267 
    268   if (!x.errorOccurred() && !y.errorOccurred()) {
    269     return ::memcmp(&x_vec[0], &y_vec[0], num);
    270   } else {
    271     // return an arbitrary value
    272     return -1;
    273   }
    274 }
    275 
    276 int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
    277   std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());
    278 
    279   if (!x.errorOccurred()) {
    280     return ::memcmp(&x_vec[0], y.c_str(), y.length());
    281   } else {
    282     // return an arbitrary value
    283     return -1;
    284   }
    285 }
    286 
    287 size_t strlen(const RangeCheckedBytePtr &src) {
    288   size_t len = 0;
    289   RangeCheckedBytePtr str = src;
    290   while (!str.errorOccurred() && (str[0] != '\0')) {
    291     str++;
    292     len++;
    293   }
    294   return len;
    295 }
    296 
    297 int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
    298              MemoryStatus *status) {
    299   const uint16 unsigned_value = Get16u(input, big_endian, status);
    300   if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
    301     // Return an arbitrary value.
    302     return 0;
    303   }
    304 
    305   // Convert the two's-complement signed integer encoded in 'unsigned_value'
    306   // into a signed representation in the implementation's native representation
    307   // for signed integers. An optimized Blaze build (x64) compiles all of the
    308   // following code to a no-op (as of this writing).
    309   // For further details, see the corresponding comment in Get32s().
    310   if (unsigned_value == 0x8000u) {
    311     return static_cast<int16>(-0x8000);
    312   } else if (unsigned_value > 0x8000u) {
    313     return -static_cast<int16>(0x10000u - unsigned_value);
    314   } else {
    315     return static_cast<int16>(unsigned_value);
    316   }
    317 }
    318 
    319 uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
    320               MemoryStatus *status) {
    321   if (input.remainingLength() < 2) {
    322     if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
    323       *status = RANGE_CHECKED_BYTE_ERROR;
    324     }
    325     // Return an arbitrary value.
    326     return 0;
    327   }
    328   if (big_endian) {
    329     return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
    330   } else {
    331     return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
    332   }
    333 }
    334 
    335 int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
    336              MemoryStatus *status) {
    337   const uint32 unsigned_value = Get32u(input, big_endian, status);
    338   if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
    339     // Return an arbitrary value.
    340     return 0;
    341   }
    342 
    343   // Convert the two's-complement signed integer encoded in 'unsigned_value'
    344   // into a signed representation in the implementation's native representation
    345   // for signed integers.
    346   // For all practical purposes, the same result could be obtained simply by
    347   // casting unsigned_value to int32; the result of this is
    348   // implementation-defined, but on all of the platforms we care about, it does
    349   // what we want.
    350   // The code below, however, arguably has the aesthetic advantage of being
    351   // independent of the representation for signed integers chosen by the
    352   // implementation, as long as 'int' and 'unsigned' have the required range to
    353   // represent all of the required values.
    354   // An optimized Blaze build (x64) compiles all of the following code to a
    355   // no-op (as of this writing); i.e. the value that Get32u() returned in %eax
    356   // is left unchanged.
    357   if (unsigned_value == 0x80000000u) {
    358     // Read here on why the constant expression is written this way:
    359     // http://stackoverflow.com/questions/14695118
    360     return -0x7fffffff - 1;
    361   } else if (unsigned_value > 0x80000000u) {
    362     // The expression
    363     //   0xffffffffu - unsigned_value + 1
    364     // is a portable way of flipping the sign of a twos-complement signed
    365     // integer whose binary representation is stored in an unsigned integer.
    366     // '0xffffffffu + 1' is used in preference to simply '0' because it makes
    367     // it clearer that the correct result will be obtained even if an int is
    368     // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
    369     // 'unsigned_value' to prevent the compiler from warning about an
    370     // integral constant overflow. ('0' would produce the correct result in
    371     // this case too but would rely in a more subtle way on the rules for
    372     // unsigned wraparound.)
    373     return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
    374   } else {
    375     return static_cast<int32>(unsigned_value);
    376   }
    377 }
    378 
    379 uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
    380               MemoryStatus *status) {
    381   if (input.remainingLength() < 4) {
    382     if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
    383       *status = RANGE_CHECKED_BYTE_ERROR;
    384     }
    385     // Return an arbitrary value.
    386     return 0;
    387   }
    388   if (big_endian) {
    389     return (static_cast<uint32>(input[0]) << 24) |
    390            (static_cast<uint32>(input[1]) << 16) |
    391            (static_cast<uint32>(input[2]) << 8) |
    392            (static_cast<uint32>(input[3]) << 0);
    393   } else {
    394     return (static_cast<uint32>(input[3]) << 24) |
    395            (static_cast<uint32>(input[2]) << 16) |
    396            (static_cast<uint32>(input[1]) << 8) |
    397            (static_cast<uint32>(input[0]) << 0);
    398   }
    399 }
    400 
    401 }  // namespace binary_parse
    402 }  // namespace piex
    403