Home | History | Annotate | Download | only in wasm
      1 // Copyright 2015 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_WASM_DECODER_H_
      6 #define V8_WASM_DECODER_H_
      7 
      8 #include <memory>
      9 
     10 #include "src/base/compiler-specific.h"
     11 #include "src/flags.h"
     12 #include "src/signature.h"
     13 #include "src/utils.h"
     14 #include "src/wasm/wasm-result.h"
     15 #include "src/zone/zone-containers.h"
     16 
     17 namespace v8 {
     18 namespace internal {
     19 namespace wasm {
     20 
     21 #if DEBUG
     22 #define TRACE(...)                                    \
     23   do {                                                \
     24     if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
     25   } while (false)
     26 #else
     27 #define TRACE(...)
     28 #endif
     29 
     30 // A helper utility to decode bytes, integers, fields, varints, etc, from
     31 // a buffer of bytes.
     32 class Decoder {
     33  public:
     34   Decoder(const byte* start, const byte* end)
     35       : start_(start),
     36         pc_(start),
     37         limit_(end),
     38         end_(end),
     39         error_pc_(nullptr),
     40         error_pt_(nullptr) {}
     41 
     42   virtual ~Decoder() {}
     43 
     44   inline bool check(const byte* base, unsigned offset, unsigned length,
     45                     const char* msg) {
     46     DCHECK_GE(base, start_);
     47     if ((base + offset + length) > limit_) {
     48       error(base, base + offset, "%s", msg);
     49       return false;
     50     }
     51     return true;
     52   }
     53 
     54   // Reads a single 8-bit byte, reporting an error if out of bounds.
     55   inline uint8_t checked_read_u8(const byte* base, unsigned offset,
     56                                  const char* msg = "expected 1 byte") {
     57     return check(base, offset, 1, msg) ? base[offset] : 0;
     58   }
     59 
     60   // Reads 16-bit word, reporting an error if out of bounds.
     61   inline uint16_t checked_read_u16(const byte* base, unsigned offset,
     62                                    const char* msg = "expected 2 bytes") {
     63     return check(base, offset, 2, msg) ? read_u16(base + offset) : 0;
     64   }
     65 
     66   // Reads 32-bit word, reporting an error if out of bounds.
     67   inline uint32_t checked_read_u32(const byte* base, unsigned offset,
     68                                    const char* msg = "expected 4 bytes") {
     69     return check(base, offset, 4, msg) ? read_u32(base + offset) : 0;
     70   }
     71 
     72   // Reads 64-bit word, reporting an error if out of bounds.
     73   inline uint64_t checked_read_u64(const byte* base, unsigned offset,
     74                                    const char* msg = "expected 8 bytes") {
     75     return check(base, offset, 8, msg) ? read_u64(base + offset) : 0;
     76   }
     77 
     78   // Reads a variable-length unsigned integer (little endian).
     79   uint32_t checked_read_u32v(const byte* base, unsigned offset,
     80                              unsigned* length,
     81                              const char* msg = "expected LEB32") {
     82     return checked_read_leb<uint32_t, false>(base, offset, length, msg);
     83   }
     84 
     85   // Reads a variable-length signed integer (little endian).
     86   int32_t checked_read_i32v(const byte* base, unsigned offset, unsigned* length,
     87                             const char* msg = "expected SLEB32") {
     88     uint32_t result =
     89         checked_read_leb<uint32_t, true>(base, offset, length, msg);
     90     if (*length == 5) return bit_cast<int32_t>(result);
     91     if (*length > 0) {
     92       int shift = 32 - 7 * *length;
     93       // Perform sign extension.
     94       return bit_cast<int32_t>(result << shift) >> shift;
     95     }
     96     return 0;
     97   }
     98 
     99   // Reads a variable-length unsigned integer (little endian).
    100   uint64_t checked_read_u64v(const byte* base, unsigned offset,
    101                              unsigned* length,
    102                              const char* msg = "expected LEB64") {
    103     return checked_read_leb<uint64_t, false>(base, offset, length, msg);
    104   }
    105 
    106   // Reads a variable-length signed integer (little endian).
    107   int64_t checked_read_i64v(const byte* base, unsigned offset, unsigned* length,
    108                             const char* msg = "expected SLEB64") {
    109     uint64_t result =
    110         checked_read_leb<uint64_t, true>(base, offset, length, msg);
    111     if (*length == 10) return bit_cast<int64_t>(result);
    112     if (*length > 0) {
    113       int shift = 64 - 7 * *length;
    114       // Perform sign extension.
    115       return bit_cast<int64_t>(result << shift) >> shift;
    116     }
    117     return 0;
    118   }
    119 
    120   // Reads a single 16-bit unsigned integer (little endian).
    121   inline uint16_t read_u16(const byte* ptr) {
    122     DCHECK(ptr >= start_ && (ptr + 2) <= end_);
    123     return ReadLittleEndianValue<uint16_t>(ptr);
    124   }
    125 
    126   // Reads a single 32-bit unsigned integer (little endian).
    127   inline uint32_t read_u32(const byte* ptr) {
    128     DCHECK(ptr >= start_ && (ptr + 4) <= end_);
    129     return ReadLittleEndianValue<uint32_t>(ptr);
    130   }
    131 
    132   // Reads a single 64-bit unsigned integer (little endian).
    133   inline uint64_t read_u64(const byte* ptr) {
    134     DCHECK(ptr >= start_ && (ptr + 8) <= end_);
    135     return ReadLittleEndianValue<uint64_t>(ptr);
    136   }
    137 
    138   // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
    139   uint8_t consume_u8(const char* name = nullptr) {
    140     TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
    141           name ? name : "uint8_t");
    142     if (checkAvailable(1)) {
    143       byte val = *(pc_++);
    144       TRACE("%02x = %d\n", val, val);
    145       return val;
    146     }
    147     return traceOffEnd<uint8_t>();
    148   }
    149 
    150   // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
    151   uint16_t consume_u16(const char* name = nullptr) {
    152     TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
    153           name ? name : "uint16_t");
    154     if (checkAvailable(2)) {
    155       uint16_t val = read_u16(pc_);
    156       TRACE("%02x %02x = %d\n", pc_[0], pc_[1], val);
    157       pc_ += 2;
    158       return val;
    159     }
    160     return traceOffEnd<uint16_t>();
    161   }
    162 
    163   // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
    164   uint32_t consume_u32(const char* name = nullptr) {
    165     TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
    166           name ? name : "uint32_t");
    167     if (checkAvailable(4)) {
    168       uint32_t val = read_u32(pc_);
    169       TRACE("%02x %02x %02x %02x = %u\n", pc_[0], pc_[1], pc_[2], pc_[3], val);
    170       pc_ += 4;
    171       return val;
    172     }
    173     return traceOffEnd<uint32_t>();
    174   }
    175 
    176   // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}.
    177   uint32_t consume_u32v(const char* name = nullptr) {
    178     return consume_leb<uint32_t, false>(name);
    179   }
    180 
    181   // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}.
    182   int32_t consume_i32v(const char* name = nullptr) {
    183     return consume_leb<int32_t, true>(name);
    184   }
    185 
    186   // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
    187   void consume_bytes(uint32_t size, const char* name = "skip") {
    188     TRACE("  +%d  %-20s: %d bytes\n", static_cast<int>(pc_ - start_), name,
    189           size);
    190     if (checkAvailable(size)) {
    191       pc_ += size;
    192     } else {
    193       pc_ = limit_;
    194     }
    195   }
    196 
    197   // Check that at least {size} bytes exist between {pc_} and {limit_}.
    198   bool checkAvailable(int size) {
    199     intptr_t pc_overflow_value = std::numeric_limits<intptr_t>::max() - size;
    200     if (size < 0 || (intptr_t)pc_ > pc_overflow_value) {
    201       error(pc_, nullptr, "reading %d bytes would underflow/overflow", size);
    202       return false;
    203     } else if (pc_ < start_ || limit_ < (pc_ + size)) {
    204       error(pc_, nullptr, "expected %d bytes, fell off end", size);
    205       return false;
    206     } else {
    207       return true;
    208     }
    209   }
    210 
    211   void error(const char* msg) { error(pc_, nullptr, "%s", msg); }
    212 
    213   void error(const byte* pc, const char* msg) { error(pc, nullptr, "%s", msg); }
    214 
    215   // Sets internal error state.
    216   void PRINTF_FORMAT(4, 5)
    217       error(const byte* pc, const byte* pt, const char* format, ...) {
    218     if (ok()) {
    219 #if DEBUG
    220       if (FLAG_wasm_break_on_decoder_error) {
    221         base::OS::DebugBreak();
    222       }
    223 #endif
    224       const int kMaxErrorMsg = 256;
    225       char* buffer = new char[kMaxErrorMsg];
    226       va_list arguments;
    227       va_start(arguments, format);
    228       base::OS::VSNPrintF(buffer, kMaxErrorMsg - 1, format, arguments);
    229       va_end(arguments);
    230       error_msg_.reset(buffer);
    231       error_pc_ = pc;
    232       error_pt_ = pt;
    233       onFirstError();
    234     }
    235   }
    236 
    237   // Behavior triggered on first error, overridden in subclasses.
    238   virtual void onFirstError() {}
    239 
    240   // Debugging helper to print bytes up to the end.
    241   template <typename T>
    242   T traceOffEnd() {
    243     T t = 0;
    244     for (const byte* ptr = pc_; ptr < limit_; ptr++) {
    245       TRACE("%02x ", *ptr);
    246     }
    247     TRACE("<end>\n");
    248     pc_ = limit_;
    249     return t;
    250   }
    251 
    252   // Converts the given value to a {Result}, copying the error if necessary.
    253   template <typename T>
    254   Result<T> toResult(T val) {
    255     Result<T> result;
    256     if (failed()) {
    257       TRACE("Result error: %s\n", error_msg_.get());
    258       result.error_code = kError;
    259       result.start = start_;
    260       result.error_pc = error_pc_;
    261       result.error_pt = error_pt_;
    262       // transfer ownership of the error to the result.
    263       result.error_msg.reset(error_msg_.release());
    264     } else {
    265       result.error_code = kSuccess;
    266     }
    267     result.val = std::move(val);
    268     return result;
    269   }
    270 
    271   // Resets the boundaries of this decoder.
    272   void Reset(const byte* start, const byte* end) {
    273     start_ = start;
    274     pc_ = start;
    275     limit_ = end;
    276     end_ = end;
    277     error_pc_ = nullptr;
    278     error_pt_ = nullptr;
    279     error_msg_.reset();
    280   }
    281 
    282   bool ok() const { return error_msg_ == nullptr; }
    283   bool failed() const { return !ok(); }
    284   bool more() const { return pc_ < limit_; }
    285 
    286   const byte* start() { return start_; }
    287   const byte* pc() { return pc_; }
    288   uint32_t pc_offset() { return static_cast<uint32_t>(pc_ - start_); }
    289 
    290  protected:
    291   const byte* start_;
    292   const byte* pc_;
    293   const byte* limit_;
    294   const byte* end_;
    295   const byte* error_pc_;
    296   const byte* error_pt_;
    297   std::unique_ptr<char[]> error_msg_;
    298 
    299  private:
    300   template <typename IntType, bool is_signed>
    301   IntType checked_read_leb(const byte* base, unsigned offset, unsigned* length,
    302                            const char* msg) {
    303     if (!check(base, offset, 1, msg)) {
    304       *length = 0;
    305       return 0;
    306     }
    307 
    308     const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
    309     const byte* ptr = base + offset;
    310     const byte* end = ptr + kMaxLength;
    311     if (end > limit_) end = limit_;
    312     int shift = 0;
    313     byte b = 0;
    314     IntType result = 0;
    315     while (ptr < end) {
    316       b = *ptr++;
    317       result = result | (static_cast<IntType>(b & 0x7F) << shift);
    318       if ((b & 0x80) == 0) break;
    319       shift += 7;
    320     }
    321     DCHECK_LE(ptr - (base + offset), kMaxLength);
    322     *length = static_cast<unsigned>(ptr - (base + offset));
    323     if (ptr == end) {
    324       // Check there are no bits set beyond the bitwidth of {IntType}.
    325       const int kExtraBits = (1 + kMaxLength * 7) - (sizeof(IntType) * 8);
    326       const byte kExtraBitsMask =
    327           static_cast<byte>((0xFF << (8 - kExtraBits)) & 0xFF);
    328       int extra_bits_value;
    329       if (is_signed) {
    330         // A signed-LEB128 must sign-extend the final byte, excluding its
    331         // most-signifcant bit. e.g. for a 32-bit LEB128:
    332         //   kExtraBits = 4
    333         //   kExtraBitsMask = 0xf0
    334         // If b is 0x0f, the value is negative, so extra_bits_value is 0x70.
    335         // If b is 0x03, the value is positive, so extra_bits_value is 0x00.
    336         extra_bits_value = (static_cast<int8_t>(b << kExtraBits) >> 8) &
    337                            kExtraBitsMask & ~0x80;
    338       } else {
    339         extra_bits_value = 0;
    340       }
    341       if (*length == kMaxLength && (b & kExtraBitsMask) != extra_bits_value) {
    342         error(base, ptr, "extra bits in varint");
    343         return 0;
    344       }
    345       if ((b & 0x80) != 0) {
    346         error(base, ptr, "%s", msg);
    347         return 0;
    348       }
    349     }
    350     return result;
    351   }
    352 
    353   template <typename IntType, bool is_signed>
    354   IntType consume_leb(const char* name = nullptr) {
    355     TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
    356           name ? name : "varint");
    357     if (checkAvailable(1)) {
    358       const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
    359       const byte* pos = pc_;
    360       const byte* end = pc_ + kMaxLength;
    361       if (end > limit_) end = limit_;
    362 
    363       IntType result = 0;
    364       int shift = 0;
    365       byte b = 0;
    366       while (pc_ < end) {
    367         b = *pc_++;
    368         TRACE("%02x ", b);
    369         result = result | (static_cast<IntType>(b & 0x7F) << shift);
    370         shift += 7;
    371         if ((b & 0x80) == 0) break;
    372       }
    373 
    374       int length = static_cast<int>(pc_ - pos);
    375       if (pc_ == end && (b & 0x80)) {
    376         error(pc_ - 1, "varint too large");
    377       } else if (length == 0) {
    378         error(pc_, "varint of length 0");
    379       } else if (is_signed) {
    380         if (length < kMaxLength) {
    381           int sign_ext_shift = 8 * sizeof(IntType) - shift;
    382           // Perform sign extension.
    383           result = (result << sign_ext_shift) >> sign_ext_shift;
    384         }
    385         TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result));
    386       } else {
    387         TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result));
    388       }
    389       return result;
    390     }
    391     return traceOffEnd<uint32_t>();
    392   }
    393 };
    394 
    395 #undef TRACE
    396 }  // namespace wasm
    397 }  // namespace internal
    398 }  // namespace v8
    399 
    400 #endif  // V8_WASM_DECODER_H_
    401