Home | History | Annotate | Download | only in wasm
      1 // Copyright 2015 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_WASM_DECODER_H_
      6 #define V8_WASM_DECODER_H_
      7 
      8 #include <cstdarg>
      9 #include <memory>
     10 
     11 #include "src/base/compiler-specific.h"
     12 #include "src/flags.h"
     13 #include "src/signature.h"
     14 #include "src/v8memory.h"
     15 #include "src/wasm/wasm-result.h"
     16 #include "src/zone/zone-containers.h"
     17 
     18 namespace v8 {
     19 namespace internal {
     20 namespace wasm {
     21 
     22 #define TRACE(...)                                    \
     23   do {                                                \
     24     if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
     25   } while (false)
     26 #define TRACE_IF(cond, ...)                                     \
     27   do {                                                          \
     28     if (FLAG_trace_wasm_decoder && (cond)) PrintF(__VA_ARGS__); \
     29   } while (false)
     30 
     31 // A {DecodeResult} only stores the failure / success status, but no data. Thus
     32 // we use {nullptr_t} as data value, such that the only valid data stored in
     33 // this type is a nullptr.
     34 // Storing {void} would require template specialization.
     35 using DecodeResult = Result<std::nullptr_t>;
     36 
     37 // A helper utility to decode bytes, integers, fields, varints, etc, from
     38 // a buffer of bytes.
     39 class Decoder {
     40  public:
     41   enum ValidateFlag : bool { kValidate = true, kNoValidate = false };
     42 
     43   enum AdvancePCFlag : bool { kAdvancePc = true, kNoAdvancePc = false };
     44 
     45   enum TraceFlag : bool { kTrace = true, kNoTrace = false };
     46 
     47   Decoder(const byte* start, const byte* end, uint32_t buffer_offset = 0)
     48       : Decoder(start, start, end, buffer_offset) {}
     49   explicit Decoder(const Vector<const byte> bytes, uint32_t buffer_offset = 0)
     50       : Decoder(bytes.start(), bytes.start() + bytes.length(), buffer_offset) {}
     51   Decoder(const byte* start, const byte* pc, const byte* end,
     52           uint32_t buffer_offset = 0)
     53       : start_(start), pc_(pc), end_(end), buffer_offset_(buffer_offset) {
     54     DCHECK_LE(start, pc);
     55     DCHECK_LE(pc, end);
     56     DCHECK_EQ(static_cast<uint32_t>(end - start), end - start);
     57   }
     58 
     59   virtual ~Decoder() {}
     60 
     61   inline bool validate_size(const byte* pc, uint32_t length, const char* msg) {
     62     DCHECK_LE(start_, pc);
     63     DCHECK_LE(pc, end_);
     64     if (V8_UNLIKELY(length > static_cast<uint32_t>(end_ - pc))) {
     65       error(pc, msg);
     66       return false;
     67     }
     68     return true;
     69   }
     70 
     71   // Reads an 8-bit unsigned integer.
     72   template <ValidateFlag validate>
     73   inline uint8_t read_u8(const byte* pc, const char* msg = "expected 1 byte") {
     74     return read_little_endian<uint8_t, validate>(pc, msg);
     75   }
     76 
     77   // Reads a 16-bit unsigned integer (little endian).
     78   template <ValidateFlag validate>
     79   inline uint16_t read_u16(const byte* pc,
     80                            const char* msg = "expected 2 bytes") {
     81     return read_little_endian<uint16_t, validate>(pc, msg);
     82   }
     83 
     84   // Reads a 32-bit unsigned integer (little endian).
     85   template <ValidateFlag validate>
     86   inline uint32_t read_u32(const byte* pc,
     87                            const char* msg = "expected 4 bytes") {
     88     return read_little_endian<uint32_t, validate>(pc, msg);
     89   }
     90 
     91   // Reads a 64-bit unsigned integer (little endian).
     92   template <ValidateFlag validate>
     93   inline uint64_t read_u64(const byte* pc,
     94                            const char* msg = "expected 8 bytes") {
     95     return read_little_endian<uint64_t, validate>(pc, msg);
     96   }
     97 
     98   // Reads a variable-length unsigned integer (little endian).
     99   template <ValidateFlag validate>
    100   uint32_t read_u32v(const byte* pc, uint32_t* length,
    101                      const char* name = "LEB32") {
    102     return read_leb<uint32_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
    103                                                                 name);
    104   }
    105 
    106   // Reads a variable-length signed integer (little endian).
    107   template <ValidateFlag validate>
    108   int32_t read_i32v(const byte* pc, uint32_t* length,
    109                     const char* name = "signed LEB32") {
    110     return read_leb<int32_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
    111                                                                name);
    112   }
    113 
    114   // Reads a variable-length unsigned integer (little endian).
    115   template <ValidateFlag validate>
    116   uint64_t read_u64v(const byte* pc, uint32_t* length,
    117                      const char* name = "LEB64") {
    118     return read_leb<uint64_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
    119                                                                 name);
    120   }
    121 
    122   // Reads a variable-length signed integer (little endian).
    123   template <ValidateFlag validate>
    124   int64_t read_i64v(const byte* pc, uint32_t* length,
    125                     const char* name = "signed LEB64") {
    126     return read_leb<int64_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
    127                                                                name);
    128   }
    129 
    130   // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
    131   uint8_t consume_u8(const char* name = "uint8_t") {
    132     return consume_little_endian<uint8_t>(name);
    133   }
    134 
    135   // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
    136   uint16_t consume_u16(const char* name = "uint16_t") {
    137     return consume_little_endian<uint16_t>(name);
    138   }
    139 
    140   // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
    141   uint32_t consume_u32(const char* name = "uint32_t") {
    142     return consume_little_endian<uint32_t>(name);
    143   }
    144 
    145   // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}.
    146   uint32_t consume_u32v(const char* name = nullptr) {
    147     uint32_t length = 0;
    148     return read_leb<uint32_t, kValidate, kAdvancePc, kTrace>(pc_, &length,
    149                                                              name);
    150   }
    151 
    152   // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}.
    153   int32_t consume_i32v(const char* name = nullptr) {
    154     uint32_t length = 0;
    155     return read_leb<int32_t, kValidate, kAdvancePc, kTrace>(pc_, &length, name);
    156   }
    157 
    158   // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
    159   void consume_bytes(uint32_t size, const char* name = "skip") {
    160     // Only trace if the name is not null.
    161     TRACE_IF(name, "  +%u  %-20s: %u bytes\n", pc_offset(), name, size);
    162     if (checkAvailable(size)) {
    163       pc_ += size;
    164     } else {
    165       pc_ = end_;
    166     }
    167   }
    168 
    169   // Check that at least {size} bytes exist between {pc_} and {end_}.
    170   bool checkAvailable(uint32_t size) {
    171     DCHECK_LE(pc_, end_);
    172     if (V8_UNLIKELY(size > static_cast<uint32_t>(end_ - pc_))) {
    173       errorf(pc_, "expected %u bytes, fell off end", size);
    174       return false;
    175     }
    176     return true;
    177   }
    178 
    179   void error(const char* msg) { errorf(pc_, "%s", msg); }
    180 
    181   void error(const byte* pc, const char* msg) { errorf(pc, "%s", msg); }
    182 
    183   // Sets internal error state.
    184   void PRINTF_FORMAT(3, 4) errorf(const byte* pc, const char* format, ...) {
    185     // Only report the first error.
    186     if (!ok()) return;
    187 #if DEBUG
    188     if (FLAG_wasm_break_on_decoder_error) {
    189       base::OS::DebugBreak();
    190     }
    191 #endif
    192     constexpr int kMaxErrorMsg = 256;
    193     EmbeddedVector<char, kMaxErrorMsg> buffer;
    194     va_list arguments;
    195     va_start(arguments, format);
    196     int len = VSNPrintF(buffer, format, arguments);
    197     CHECK_LT(0, len);
    198     va_end(arguments);
    199     error_msg_.assign(buffer.start(), len);
    200     DCHECK_GE(pc, start_);
    201     error_offset_ = static_cast<uint32_t>(pc - start_) + buffer_offset_;
    202     onFirstError();
    203   }
    204 
    205   // Behavior triggered on first error, overridden in subclasses.
    206   virtual void onFirstError() {}
    207 
    208   // Debugging helper to print a bytes range as hex bytes.
    209   void traceByteRange(const byte* start, const byte* end) {
    210     DCHECK_LE(start, end);
    211     for (const byte* p = start; p < end; ++p) TRACE("%02x ", *p);
    212   }
    213 
    214   // Debugging helper to print bytes up to the end.
    215   void traceOffEnd() {
    216     traceByteRange(pc_, end_);
    217     TRACE("<end>\n");
    218   }
    219 
    220   // Converts the given value to a {Result}, copying the error if necessary.
    221   template <typename T, typename U = typename std::remove_reference<T>::type>
    222   Result<U> toResult(T&& val) {
    223     Result<U> result(std::forward<T>(val));
    224     if (failed()) {
    225       TRACE("Result error: %s\n", error_msg_.c_str());
    226       result.error(error_offset_, std::move(error_msg_));
    227     }
    228     return result;
    229   }
    230 
    231   // Resets the boundaries of this decoder.
    232   void Reset(const byte* start, const byte* end, uint32_t buffer_offset = 0) {
    233     DCHECK_LE(start, end);
    234     DCHECK_EQ(static_cast<uint32_t>(end - start), end - start);
    235     start_ = start;
    236     pc_ = start;
    237     end_ = end;
    238     buffer_offset_ = buffer_offset;
    239     error_offset_ = 0;
    240     error_msg_.clear();
    241   }
    242 
    243   void Reset(Vector<const uint8_t> bytes, uint32_t buffer_offset = 0) {
    244     Reset(bytes.begin(), bytes.end(), buffer_offset);
    245   }
    246 
    247   bool ok() const { return error_msg_.empty(); }
    248   bool failed() const { return !ok(); }
    249   bool more() const { return pc_ < end_; }
    250 
    251   const byte* start() const { return start_; }
    252   const byte* pc() const { return pc_; }
    253   uint32_t position() const { return static_cast<uint32_t>(pc_ - start_); }
    254   uint32_t pc_offset() const {
    255     return static_cast<uint32_t>(pc_ - start_) + buffer_offset_;
    256   }
    257   uint32_t buffer_offset() const { return buffer_offset_; }
    258   // Takes an offset relative to the module start and returns an offset relative
    259   // to the current buffer of the decoder.
    260   uint32_t GetBufferRelativeOffset(uint32_t offset) const {
    261     DCHECK_LE(buffer_offset_, offset);
    262     return offset - buffer_offset_;
    263   }
    264   const byte* end() const { return end_; }
    265 
    266  protected:
    267   const byte* start_;
    268   const byte* pc_;
    269   const byte* end_;
    270   // The offset of the current buffer in the module. Needed for streaming.
    271   uint32_t buffer_offset_;
    272   uint32_t error_offset_ = 0;
    273   std::string error_msg_;
    274 
    275  private:
    276   template <typename IntType, bool validate>
    277   inline IntType read_little_endian(const byte* pc, const char* msg) {
    278     if (!validate) {
    279       DCHECK(validate_size(pc, sizeof(IntType), msg));
    280     } else if (!validate_size(pc, sizeof(IntType), msg)) {
    281       return IntType{0};
    282     }
    283     return ReadLittleEndianValue<IntType>(reinterpret_cast<Address>(pc));
    284   }
    285 
    286   template <typename IntType>
    287   inline IntType consume_little_endian(const char* name) {
    288     TRACE("  +%u  %-20s: ", pc_offset(), name);
    289     if (!checkAvailable(sizeof(IntType))) {
    290       traceOffEnd();
    291       pc_ = end_;
    292       return IntType{0};
    293     }
    294     IntType val = read_little_endian<IntType, false>(pc_, name);
    295     traceByteRange(pc_, pc_ + sizeof(IntType));
    296     TRACE("= %d\n", val);
    297     pc_ += sizeof(IntType);
    298     return val;
    299   }
    300 
    301   template <typename IntType, ValidateFlag validate, AdvancePCFlag advance_pc,
    302             TraceFlag trace>
    303   inline IntType read_leb(const byte* pc, uint32_t* length,
    304                           const char* name = "varint") {
    305     DCHECK_IMPLIES(advance_pc, pc == pc_);
    306     TRACE_IF(trace, "  +%u  %-20s: ", pc_offset(), name);
    307     return read_leb_tail<IntType, validate, advance_pc, trace, 0>(pc, length,
    308                                                                   name, 0);
    309   }
    310 
    311   template <typename IntType, ValidateFlag validate, AdvancePCFlag advance_pc,
    312             TraceFlag trace, int byte_index>
    313   IntType read_leb_tail(const byte* pc, uint32_t* length, const char* name,
    314                         IntType result) {
    315     constexpr bool is_signed = std::is_signed<IntType>::value;
    316     constexpr int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
    317     static_assert(byte_index < kMaxLength, "invalid template instantiation");
    318     constexpr int shift = byte_index * 7;
    319     constexpr bool is_last_byte = byte_index == kMaxLength - 1;
    320     DCHECK_LE(pc, end_);
    321     const bool at_end = validate && pc == end_;
    322     byte b = 0;
    323     if (!at_end) {
    324       DCHECK_LT(pc, end_);
    325       b = *pc;
    326       TRACE_IF(trace, "%02x ", b);
    327       result = result | ((static_cast<IntType>(b) & 0x7f) << shift);
    328     }
    329     if (!is_last_byte && (b & 0x80)) {
    330       // Make sure that we only instantiate the template for valid byte indexes.
    331       // Compilers are not smart enough to figure out statically that the
    332       // following call is unreachable if is_last_byte is false.
    333       constexpr int next_byte_index = byte_index + (is_last_byte ? 0 : 1);
    334       return read_leb_tail<IntType, validate, advance_pc, trace,
    335                            next_byte_index>(pc + 1, length, name, result);
    336     }
    337     if (advance_pc) pc_ = pc + (at_end ? 0 : 1);
    338     *length = byte_index + (at_end ? 0 : 1);
    339     if (validate && (at_end || (b & 0x80))) {
    340       TRACE_IF(trace, at_end ? "<end> " : "<length overflow> ");
    341       errorf(pc, "expected %s", name);
    342       result = 0;
    343     }
    344     if (is_last_byte) {
    345       // A signed-LEB128 must sign-extend the final byte, excluding its
    346       // most-significant bit; e.g. for a 32-bit LEB128:
    347       //   kExtraBits = 4  (== 32 - (5-1) * 7)
    348       // For unsigned values, the extra bits must be all zero.
    349       // For signed values, the extra bits *plus* the most significant bit must
    350       // either be 0, or all ones.
    351       constexpr int kExtraBits = (sizeof(IntType) * 8) - ((kMaxLength - 1) * 7);
    352       constexpr int kSignExtBits = kExtraBits - (is_signed ? 1 : 0);
    353       const byte checked_bits = b & (0xFF << kSignExtBits);
    354       constexpr byte kSignExtendedExtraBits = 0x7f & (0xFF << kSignExtBits);
    355       bool valid_extra_bits =
    356           checked_bits == 0 ||
    357           (is_signed && checked_bits == kSignExtendedExtraBits);
    358       if (!validate) {
    359         DCHECK(valid_extra_bits);
    360       } else if (!valid_extra_bits) {
    361         error(pc, "extra bits in varint");
    362         result = 0;
    363       }
    364     }
    365     constexpr int sign_ext_shift =
    366         is_signed ? Max(0, int{8 * sizeof(IntType)} - shift - 7) : 0;
    367     // Perform sign extension.
    368     result = (result << sign_ext_shift) >> sign_ext_shift;
    369     if (trace && is_signed) {
    370       TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result));
    371     } else if (trace) {
    372       TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result));
    373     }
    374     return result;
    375   }
    376 };
    377 
    378 // Reference to a string in the wire bytes.
    379 class WireBytesRef {
    380  public:
    381   WireBytesRef() : WireBytesRef(0, 0) {}
    382   WireBytesRef(uint32_t offset, uint32_t length)
    383       : offset_(offset), length_(length) {
    384     DCHECK_IMPLIES(offset_ == 0, length_ == 0);
    385     DCHECK_LE(offset_, offset_ + length_);  // no uint32_t overflow.
    386   }
    387 
    388   uint32_t offset() const { return offset_; }
    389   uint32_t length() const { return length_; }
    390   uint32_t end_offset() const { return offset_ + length_; }
    391   bool is_empty() const { return length_ == 0; }
    392   bool is_set() const { return offset_ != 0; }
    393 
    394  private:
    395   uint32_t offset_;
    396   uint32_t length_;
    397 };
    398 
    399 #undef TRACE
    400 }  // namespace wasm
    401 }  // namespace internal
    402 }  // namespace v8
    403 
    404 #endif  // V8_WASM_DECODER_H_
    405