Home | History | Annotate | Download | only in source
      1 // Copyright (c) 2015-2016 The Khronos Group Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "source/binary.h"
     16 
     17 #include <algorithm>
     18 #include <cassert>
     19 #include <cstring>
     20 #include <iterator>
     21 #include <limits>
     22 #include <string>
     23 #include <unordered_map>
     24 #include <vector>
     25 
     26 #include "source/assembly_grammar.h"
     27 #include "source/diagnostic.h"
     28 #include "source/ext_inst.h"
     29 #include "source/latest_version_spirv_header.h"
     30 #include "source/opcode.h"
     31 #include "source/operand.h"
     32 #include "source/spirv_constant.h"
     33 #include "source/spirv_endian.h"
     34 
     35 spv_result_t spvBinaryHeaderGet(const spv_const_binary binary,
     36                                 const spv_endianness_t endian,
     37                                 spv_header_t* pHeader) {
     38   if (!binary->code) return SPV_ERROR_INVALID_BINARY;
     39   if (binary->wordCount < SPV_INDEX_INSTRUCTION)
     40     return SPV_ERROR_INVALID_BINARY;
     41   if (!pHeader) return SPV_ERROR_INVALID_POINTER;
     42 
     43   // TODO: Validation checking?
     44   pHeader->magic = spvFixWord(binary->code[SPV_INDEX_MAGIC_NUMBER], endian);
     45   pHeader->version = spvFixWord(binary->code[SPV_INDEX_VERSION_NUMBER], endian);
     46   pHeader->generator =
     47       spvFixWord(binary->code[SPV_INDEX_GENERATOR_NUMBER], endian);
     48   pHeader->bound = spvFixWord(binary->code[SPV_INDEX_BOUND], endian);
     49   pHeader->schema = spvFixWord(binary->code[SPV_INDEX_SCHEMA], endian);
     50   pHeader->instructions = &binary->code[SPV_INDEX_INSTRUCTION];
     51 
     52   return SPV_SUCCESS;
     53 }
     54 
     55 namespace {
     56 
     57 // A SPIR-V binary parser.  A parser instance communicates detailed parse
     58 // results via callbacks.
     59 class Parser {
     60  public:
     61   // The user_data value is provided to the callbacks as context.
     62   Parser(const spv_const_context context, void* user_data,
     63          spv_parsed_header_fn_t parsed_header_fn,
     64          spv_parsed_instruction_fn_t parsed_instruction_fn)
     65       : grammar_(context),
     66         consumer_(context->consumer),
     67         user_data_(user_data),
     68         parsed_header_fn_(parsed_header_fn),
     69         parsed_instruction_fn_(parsed_instruction_fn) {}
     70 
     71   // Parses the specified binary SPIR-V module, issuing callbacks on a parsed
     72   // header and for each parsed instruction.  Returns SPV_SUCCESS on success.
     73   // Otherwise returns an error code and issues a diagnostic.
     74   spv_result_t parse(const uint32_t* words, size_t num_words,
     75                      spv_diagnostic* diagnostic);
     76 
     77  private:
     78   // All remaining methods work on the current module parse state.
     79 
     80   // Like the parse method, but works on the current module parse state.
     81   spv_result_t parseModule();
     82 
     83   // Parses an instruction at the current position of the binary.  Assumes
     84   // the header has been parsed, the endian has been set, and the word index is
     85   // still in range.  Advances the parsing position past the instruction, and
     86   // updates other parsing state for the current module.
     87   // On success, returns SPV_SUCCESS and issues the parsed-instruction callback.
     88   // On failure, returns an error code and issues a diagnostic.
     89   spv_result_t parseInstruction();
     90 
     91   // Parses an instruction operand with the given type, for an instruction
     92   // starting at inst_offset words into the SPIR-V binary.
     93   // If the SPIR-V binary is the same endianness as the host, then the
     94   // endian_converted_inst_words parameter is ignored.  Otherwise, this method
     95   // appends the words for this operand, converted to host native endianness,
     96   // to the end of endian_converted_inst_words.  This method also updates the
     97   // expected_operands parameter, and the scalar members of the inst parameter.
     98   // On success, returns SPV_SUCCESS, advances past the operand, and pushes a
     99   // new entry on to the operands vector.  Otherwise returns an error code and
    100   // issues a diagnostic.
    101   spv_result_t parseOperand(size_t inst_offset, spv_parsed_instruction_t* inst,
    102                             const spv_operand_type_t type,
    103                             std::vector<uint32_t>* endian_converted_inst_words,
    104                             std::vector<spv_parsed_operand_t>* operands,
    105                             spv_operand_pattern_t* expected_operands);
    106 
    107   // Records the numeric type for an operand according to the type information
    108   // associated with the given non-zero type Id.  This can fail if the type Id
    109   // is not a type Id, or if the type Id does not reference a scalar numeric
    110   // type.  On success, return SPV_SUCCESS and populates the num_words,
    111   // number_kind, and number_bit_width fields of parsed_operand.
    112   spv_result_t setNumericTypeInfoForType(spv_parsed_operand_t* parsed_operand,
    113                                          uint32_t type_id);
    114 
    115   // Records the number type for an instruction at the given offset, if that
    116   // instruction generates a type.  For types that aren't scalar numbers,
    117   // record something with number kind SPV_NUMBER_NONE.
    118   void recordNumberType(size_t inst_offset,
    119                         const spv_parsed_instruction_t* inst);
    120 
    121   // Returns a diagnostic stream object initialized with current position in
    122   // the input stream, and for the given error code. Any data written to the
    123   // returned object will be propagated to the current parse's diagnostic
    124   // object.
    125   spvtools::DiagnosticStream diagnostic(spv_result_t error) {
    126     return spvtools::DiagnosticStream({0, 0, _.word_index}, consumer_, "",
    127                                       error);
    128   }
    129 
    130   // Returns a diagnostic stream object with the default parse error code.
    131   spvtools::DiagnosticStream diagnostic() {
    132     // The default failure for parsing is invalid binary.
    133     return diagnostic(SPV_ERROR_INVALID_BINARY);
    134   }
    135 
    136   // Issues a diagnostic describing an exhaustion of input condition when
    137   // trying to decode an instruction operand, and returns
    138   // SPV_ERROR_INVALID_BINARY.
    139   spv_result_t exhaustedInputDiagnostic(size_t inst_offset, SpvOp opcode,
    140                                         spv_operand_type_t type) {
    141     return diagnostic() << "End of input reached while decoding Op"
    142                         << spvOpcodeString(opcode) << " starting at word "
    143                         << inst_offset
    144                         << ((_.word_index < _.num_words) ? ": truncated "
    145                                                          : ": missing ")
    146                         << spvOperandTypeStr(type) << " operand at word offset "
    147                         << _.word_index - inst_offset << ".";
    148   }
    149 
    150   // Returns the endian-corrected word at the current position.
    151   uint32_t peek() const { return peekAt(_.word_index); }
    152 
    153   // Returns the endian-corrected word at the given position.
    154   uint32_t peekAt(size_t index) const {
    155     assert(index < _.num_words);
    156     return spvFixWord(_.words[index], _.endian);
    157   }
    158 
    159   // Data members
    160 
    161   const spvtools::AssemblyGrammar grammar_;        // SPIR-V syntax utility.
    162   const spvtools::MessageConsumer& consumer_;      // Message consumer callback.
    163   void* const user_data_;                          // Context for the callbacks
    164   const spv_parsed_header_fn_t parsed_header_fn_;  // Parsed header callback
    165   const spv_parsed_instruction_fn_t
    166       parsed_instruction_fn_;  // Parsed instruction callback
    167 
    168   // Describes the format of a typed literal number.
    169   struct NumberType {
    170     spv_number_kind_t type;
    171     uint32_t bit_width;
    172   };
    173 
    174   // The state used to parse a single SPIR-V binary module.
    175   struct State {
    176     State(const uint32_t* words_arg, size_t num_words_arg,
    177           spv_diagnostic* diagnostic_arg)
    178         : words(words_arg),
    179           num_words(num_words_arg),
    180           diagnostic(diagnostic_arg),
    181           word_index(0),
    182           endian(),
    183           requires_endian_conversion(false) {
    184       // Temporary storage for parser state within a single instruction.
    185       // Most instructions require fewer than 25 words or operands.
    186       operands.reserve(25);
    187       endian_converted_words.reserve(25);
    188       expected_operands.reserve(25);
    189     }
    190     State() : State(0, 0, nullptr) {}
    191     const uint32_t* words;       // Words in the binary SPIR-V module.
    192     size_t num_words;            // Number of words in the module.
    193     spv_diagnostic* diagnostic;  // Where diagnostics go.
    194     size_t word_index;           // The current position in words.
    195     spv_endianness_t endian;     // The endianness of the binary.
    196     // Is the SPIR-V binary in a different endiannes from the host native
    197     // endianness?
    198     bool requires_endian_conversion;
    199 
    200     // Maps a result ID to its type ID.  By convention:
    201     //  - a result ID that is a type definition maps to itself.
    202     //  - a result ID without a type maps to 0.  (E.g. for OpLabel)
    203     std::unordered_map<uint32_t, uint32_t> id_to_type_id;
    204     // Maps a type ID to its number type description.
    205     std::unordered_map<uint32_t, NumberType> type_id_to_number_type_info;
    206     // Maps an ExtInstImport id to the extended instruction type.
    207     std::unordered_map<uint32_t, spv_ext_inst_type_t>
    208         import_id_to_ext_inst_type;
    209 
    210     // Used by parseOperand
    211     std::vector<spv_parsed_operand_t> operands;
    212     std::vector<uint32_t> endian_converted_words;
    213     spv_operand_pattern_t expected_operands;
    214   } _;
    215 };
    216 
    217 spv_result_t Parser::parse(const uint32_t* words, size_t num_words,
    218                            spv_diagnostic* diagnostic_arg) {
    219   _ = State(words, num_words, diagnostic_arg);
    220 
    221   const spv_result_t result = parseModule();
    222 
    223   // Clear the module state.  The tables might be big.
    224   _ = State();
    225 
    226   return result;
    227 }
    228 
    229 spv_result_t Parser::parseModule() {
    230   if (!_.words) return diagnostic() << "Missing module.";
    231 
    232   if (_.num_words < SPV_INDEX_INSTRUCTION)
    233     return diagnostic() << "Module has incomplete header: only " << _.num_words
    234                         << " words instead of " << SPV_INDEX_INSTRUCTION;
    235 
    236   // Check the magic number and detect the module's endianness.
    237   spv_const_binary_t binary{_.words, _.num_words};
    238   if (spvBinaryEndianness(&binary, &_.endian)) {
    239     return diagnostic() << "Invalid SPIR-V magic number '" << std::hex
    240                         << _.words[0] << "'.";
    241   }
    242   _.requires_endian_conversion = !spvIsHostEndian(_.endian);
    243 
    244   // Process the header.
    245   spv_header_t header;
    246   if (spvBinaryHeaderGet(&binary, _.endian, &header)) {
    247     // It turns out there is no way to trigger this error since the only
    248     // failure cases are already handled above, with better messages.
    249     return diagnostic(SPV_ERROR_INTERNAL)
    250            << "Internal error: unhandled header parse failure";
    251   }
    252   if (parsed_header_fn_) {
    253     if (auto error = parsed_header_fn_(user_data_, _.endian, header.magic,
    254                                        header.version, header.generator,
    255                                        header.bound, header.schema)) {
    256       return error;
    257     }
    258   }
    259 
    260   // Process the instructions.
    261   _.word_index = SPV_INDEX_INSTRUCTION;
    262   while (_.word_index < _.num_words)
    263     if (auto error = parseInstruction()) return error;
    264 
    265   // Running off the end should already have been reported earlier.
    266   assert(_.word_index == _.num_words);
    267 
    268   return SPV_SUCCESS;
    269 }
    270 
    271 spv_result_t Parser::parseInstruction() {
    272   // The zero values for all members except for opcode are the
    273   // correct initial values.
    274   spv_parsed_instruction_t inst = {};
    275 
    276   const uint32_t first_word = peek();
    277 
    278   // If the module's endianness is different from the host native endianness,
    279   // then converted_words contains the the endian-translated words in the
    280   // instruction.
    281   _.endian_converted_words.clear();
    282   _.endian_converted_words.push_back(first_word);
    283 
    284   // After a successful parse of the instruction, the inst.operands member
    285   // will point to this vector's storage.
    286   _.operands.clear();
    287 
    288   assert(_.word_index < _.num_words);
    289   // Decompose and check the first word.
    290   uint16_t inst_word_count = 0;
    291   spvOpcodeSplit(first_word, &inst_word_count, &inst.opcode);
    292   if (inst_word_count < 1) {
    293     return diagnostic() << "Invalid instruction word count: "
    294                         << inst_word_count;
    295   }
    296   spv_opcode_desc opcode_desc;
    297   if (grammar_.lookupOpcode(static_cast<SpvOp>(inst.opcode), &opcode_desc))
    298     return diagnostic() << "Invalid opcode: " << inst.opcode;
    299 
    300   // Advance past the opcode word.  But remember the of the start
    301   // of the instruction.
    302   const size_t inst_offset = _.word_index;
    303   _.word_index++;
    304 
    305   // Maintains the ordered list of expected operand types.
    306   // For many instructions we only need the {numTypes, operandTypes}
    307   // entries in opcode_desc.  However, sometimes we need to modify
    308   // the list as we parse the operands. This occurs when an operand
    309   // has its own logical operands (such as the LocalSize operand for
    310   // ExecutionMode), or for extended instructions that may have their
    311   // own operands depending on the selected extended instruction.
    312   _.expected_operands.clear();
    313   for (auto i = 0; i < opcode_desc->numTypes; i++)
    314     _.expected_operands.push_back(
    315         opcode_desc->operandTypes[opcode_desc->numTypes - i - 1]);
    316 
    317   while (_.word_index < inst_offset + inst_word_count) {
    318     const uint16_t inst_word_index = uint16_t(_.word_index - inst_offset);
    319     if (_.expected_operands.empty()) {
    320       return diagnostic() << "Invalid instruction Op" << opcode_desc->name
    321                           << " starting at word " << inst_offset
    322                           << ": expected no more operands after "
    323                           << inst_word_index
    324                           << " words, but stated word count is "
    325                           << inst_word_count << ".";
    326     }
    327 
    328     spv_operand_type_t type =
    329         spvTakeFirstMatchableOperand(&_.expected_operands);
    330 
    331     if (auto error =
    332             parseOperand(inst_offset, &inst, type, &_.endian_converted_words,
    333                          &_.operands, &_.expected_operands)) {
    334       return error;
    335     }
    336   }
    337 
    338   if (!_.expected_operands.empty() &&
    339       !spvOperandIsOptional(_.expected_operands.back())) {
    340     return diagnostic() << "End of input reached while decoding Op"
    341                         << opcode_desc->name << " starting at word "
    342                         << inst_offset << ": expected more operands after "
    343                         << inst_word_count << " words.";
    344   }
    345 
    346   if ((inst_offset + inst_word_count) != _.word_index) {
    347     return diagnostic() << "Invalid word count: Op" << opcode_desc->name
    348                         << " starting at word " << inst_offset
    349                         << " says it has " << inst_word_count
    350                         << " words, but found " << _.word_index - inst_offset
    351                         << " words instead.";
    352   }
    353 
    354   // Check the computed length of the endian-converted words vector against
    355   // the declared number of words in the instruction.  If endian conversion
    356   // is required, then they should match.  If no endian conversion was
    357   // performed, then the vector only contains the initial opcode/word-count
    358   // word.
    359   assert(!_.requires_endian_conversion ||
    360          (inst_word_count == _.endian_converted_words.size()));
    361   assert(_.requires_endian_conversion ||
    362          (_.endian_converted_words.size() == 1));
    363 
    364   recordNumberType(inst_offset, &inst);
    365 
    366   if (_.requires_endian_conversion) {
    367     // We must wait until here to set this pointer, because the vector might
    368     // have been be resized while we accumulated its elements.
    369     inst.words = _.endian_converted_words.data();
    370   } else {
    371     // If no conversion is required, then just point to the underlying binary.
    372     // This saves time and space.
    373     inst.words = _.words + inst_offset;
    374   }
    375   inst.num_words = inst_word_count;
    376 
    377   // We must wait until here to set this pointer, because the vector might
    378   // have been be resized while we accumulated its elements.
    379   inst.operands = _.operands.data();
    380   inst.num_operands = uint16_t(_.operands.size());
    381 
    382   // Issue the callback.  The callee should know that all the storage in inst
    383   // is transient, and will disappear immediately afterward.
    384   if (parsed_instruction_fn_) {
    385     if (auto error = parsed_instruction_fn_(user_data_, &inst)) return error;
    386   }
    387 
    388   return SPV_SUCCESS;
    389 }
    390 
    391 spv_result_t Parser::parseOperand(size_t inst_offset,
    392                                   spv_parsed_instruction_t* inst,
    393                                   const spv_operand_type_t type,
    394                                   std::vector<uint32_t>* words,
    395                                   std::vector<spv_parsed_operand_t>* operands,
    396                                   spv_operand_pattern_t* expected_operands) {
    397   const SpvOp opcode = static_cast<SpvOp>(inst->opcode);
    398   // We'll fill in this result as we go along.
    399   spv_parsed_operand_t parsed_operand;
    400   parsed_operand.offset = uint16_t(_.word_index - inst_offset);
    401   // Most operands occupy one word.  This might be be adjusted later.
    402   parsed_operand.num_words = 1;
    403   // The type argument is the one used by the grammar to parse the instruction.
    404   // But it can exposes internal parser details such as whether an operand is
    405   // optional or actually represents a variable-length sequence of operands.
    406   // The resulting type should be adjusted to avoid those internal details.
    407   // In most cases, the resulting operand type is the same as the grammar type.
    408   parsed_operand.type = type;
    409 
    410   // Assume non-numeric values.  This will be updated for literal numbers.
    411   parsed_operand.number_kind = SPV_NUMBER_NONE;
    412   parsed_operand.number_bit_width = 0;
    413 
    414   if (_.word_index >= _.num_words)
    415     return exhaustedInputDiagnostic(inst_offset, opcode, type);
    416 
    417   const uint32_t word = peek();
    418 
    419   // Do the words in this operand have to be converted to native endianness?
    420   // True for all but literal strings.
    421   bool convert_operand_endianness = true;
    422 
    423   switch (type) {
    424     case SPV_OPERAND_TYPE_TYPE_ID:
    425       if (!word)
    426         return diagnostic(SPV_ERROR_INVALID_ID) << "Error: Type Id is 0";
    427       inst->type_id = word;
    428       break;
    429 
    430     case SPV_OPERAND_TYPE_RESULT_ID:
    431       if (!word)
    432         return diagnostic(SPV_ERROR_INVALID_ID) << "Error: Result Id is 0";
    433       inst->result_id = word;
    434       // Save the result ID to type ID mapping.
    435       // In the grammar, type ID always appears before result ID.
    436       if (_.id_to_type_id.find(inst->result_id) != _.id_to_type_id.end())
    437         return diagnostic(SPV_ERROR_INVALID_ID)
    438                << "Id " << inst->result_id << " is defined more than once";
    439       // Record it.
    440       // A regular value maps to its type.  Some instructions (e.g. OpLabel)
    441       // have no type Id, and will map to 0.  The result Id for a
    442       // type-generating instruction (e.g. OpTypeInt) maps to itself.
    443       _.id_to_type_id[inst->result_id] =
    444           spvOpcodeGeneratesType(opcode) ? inst->result_id : inst->type_id;
    445       break;
    446 
    447     case SPV_OPERAND_TYPE_ID:
    448     case SPV_OPERAND_TYPE_OPTIONAL_ID:
    449       if (!word) return diagnostic(SPV_ERROR_INVALID_ID) << "Id is 0";
    450       parsed_operand.type = SPV_OPERAND_TYPE_ID;
    451 
    452       if (opcode == SpvOpExtInst && parsed_operand.offset == 3) {
    453         // The current word is the extended instruction set Id.
    454         // Set the extended instruction set type for the current instruction.
    455         auto ext_inst_type_iter = _.import_id_to_ext_inst_type.find(word);
    456         if (ext_inst_type_iter == _.import_id_to_ext_inst_type.end()) {
    457           return diagnostic(SPV_ERROR_INVALID_ID)
    458                  << "OpExtInst set Id " << word
    459                  << " does not reference an OpExtInstImport result Id";
    460         }
    461         inst->ext_inst_type = ext_inst_type_iter->second;
    462       }
    463       break;
    464 
    465     case SPV_OPERAND_TYPE_SCOPE_ID:
    466     case SPV_OPERAND_TYPE_MEMORY_SEMANTICS_ID:
    467       // Check for trivially invalid values.  The operand descriptions already
    468       // have the word "ID" in them.
    469       if (!word) return diagnostic() << spvOperandTypeStr(type) << " is 0";
    470       break;
    471 
    472     case SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER: {
    473       assert(SpvOpExtInst == opcode);
    474       assert(inst->ext_inst_type != SPV_EXT_INST_TYPE_NONE);
    475       spv_ext_inst_desc ext_inst;
    476       if (grammar_.lookupExtInst(inst->ext_inst_type, word, &ext_inst))
    477         return diagnostic() << "Invalid extended instruction number: " << word;
    478       spvPushOperandTypes(ext_inst->operandTypes, expected_operands);
    479     } break;
    480 
    481     case SPV_OPERAND_TYPE_SPEC_CONSTANT_OP_NUMBER: {
    482       assert(SpvOpSpecConstantOp == opcode);
    483       if (grammar_.lookupSpecConstantOpcode(SpvOp(word))) {
    484         return diagnostic()
    485                << "Invalid " << spvOperandTypeStr(type) << ": " << word;
    486       }
    487       spv_opcode_desc opcode_entry = nullptr;
    488       if (grammar_.lookupOpcode(SpvOp(word), &opcode_entry)) {
    489         return diagnostic(SPV_ERROR_INTERNAL)
    490                << "OpSpecConstant opcode table out of sync";
    491       }
    492       // OpSpecConstant opcodes must have a type and result. We've already
    493       // processed them, so skip them when preparing to parse the other
    494       // operants for the opcode.
    495       assert(opcode_entry->hasType);
    496       assert(opcode_entry->hasResult);
    497       assert(opcode_entry->numTypes >= 2);
    498       spvPushOperandTypes(opcode_entry->operandTypes + 2, expected_operands);
    499     } break;
    500 
    501     case SPV_OPERAND_TYPE_LITERAL_INTEGER:
    502     case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_INTEGER:
    503       // These are regular single-word literal integer operands.
    504       // Post-parsing validation should check the range of the parsed value.
    505       parsed_operand.type = SPV_OPERAND_TYPE_LITERAL_INTEGER;
    506       // It turns out they are always unsigned integers!
    507       parsed_operand.number_kind = SPV_NUMBER_UNSIGNED_INT;
    508       parsed_operand.number_bit_width = 32;
    509       break;
    510 
    511     case SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER:
    512     case SPV_OPERAND_TYPE_OPTIONAL_TYPED_LITERAL_INTEGER:
    513       parsed_operand.type = SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER;
    514       if (opcode == SpvOpSwitch) {
    515         // The literal operands have the same type as the value
    516         // referenced by the selector Id.
    517         const uint32_t selector_id = peekAt(inst_offset + 1);
    518         const auto type_id_iter = _.id_to_type_id.find(selector_id);
    519         if (type_id_iter == _.id_to_type_id.end() ||
    520             type_id_iter->second == 0) {
    521           return diagnostic() << "Invalid OpSwitch: selector id " << selector_id
    522                               << " has no type";
    523         }
    524         uint32_t type_id = type_id_iter->second;
    525 
    526         if (selector_id == type_id) {
    527           // Recall that by convention, a result ID that is a type definition
    528           // maps to itself.
    529           return diagnostic() << "Invalid OpSwitch: selector id " << selector_id
    530                               << " is a type, not a value";
    531         }
    532         if (auto error = setNumericTypeInfoForType(&parsed_operand, type_id))
    533           return error;
    534         if (parsed_operand.number_kind != SPV_NUMBER_UNSIGNED_INT &&
    535             parsed_operand.number_kind != SPV_NUMBER_SIGNED_INT) {
    536           return diagnostic() << "Invalid OpSwitch: selector id " << selector_id
    537                               << " is not a scalar integer";
    538         }
    539       } else {
    540         assert(opcode == SpvOpConstant || opcode == SpvOpSpecConstant);
    541         // The literal number type is determined by the type Id for the
    542         // constant.
    543         assert(inst->type_id);
    544         if (auto error =
    545                 setNumericTypeInfoForType(&parsed_operand, inst->type_id))
    546           return error;
    547       }
    548       break;
    549 
    550     case SPV_OPERAND_TYPE_LITERAL_STRING:
    551     case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_STRING: {
    552       convert_operand_endianness = false;
    553       const char* string =
    554           reinterpret_cast<const char*>(_.words + _.word_index);
    555       // Compute the length of the string, but make sure we don't run off the
    556       // end of the input.
    557       const size_t remaining_input_bytes =
    558           sizeof(uint32_t) * (_.num_words - _.word_index);
    559       const size_t string_num_content_bytes =
    560           spv_strnlen_s(string, remaining_input_bytes);
    561       // If there was no terminating null byte, then that's an end-of-input
    562       // error.
    563       if (string_num_content_bytes == remaining_input_bytes)
    564         return exhaustedInputDiagnostic(inst_offset, opcode, type);
    565       // Account for null in the word length, so add 1 for null, then add 3 to
    566       // make sure we round up.  The following is equivalent to:
    567       //    (string_num_content_bytes + 1 + 3) / 4
    568       const size_t string_num_words = string_num_content_bytes / 4 + 1;
    569       // Make sure we can record the word count without overflow.
    570       //
    571       // This error can't currently be triggered because of validity
    572       // checks elsewhere.
    573       if (string_num_words > std::numeric_limits<uint16_t>::max()) {
    574         return diagnostic() << "Literal string is longer than "
    575                             << std::numeric_limits<uint16_t>::max()
    576                             << " words: " << string_num_words << " words long";
    577       }
    578       parsed_operand.num_words = uint16_t(string_num_words);
    579       parsed_operand.type = SPV_OPERAND_TYPE_LITERAL_STRING;
    580 
    581       if (SpvOpExtInstImport == opcode) {
    582         // Record the extended instruction type for the ID for this import.
    583         // There is only one string literal argument to OpExtInstImport,
    584         // so it's sufficient to guard this just on the opcode.
    585         const spv_ext_inst_type_t ext_inst_type =
    586             spvExtInstImportTypeGet(string);
    587         if (SPV_EXT_INST_TYPE_NONE == ext_inst_type) {
    588           return diagnostic()
    589                  << "Invalid extended instruction import '" << string << "'";
    590         }
    591         // We must have parsed a valid result ID.  It's a condition
    592         // of the grammar, and we only accept non-zero result Ids.
    593         assert(inst->result_id);
    594         _.import_id_to_ext_inst_type[inst->result_id] = ext_inst_type;
    595       }
    596     } break;
    597 
    598     case SPV_OPERAND_TYPE_CAPABILITY:
    599     case SPV_OPERAND_TYPE_SOURCE_LANGUAGE:
    600     case SPV_OPERAND_TYPE_EXECUTION_MODEL:
    601     case SPV_OPERAND_TYPE_ADDRESSING_MODEL:
    602     case SPV_OPERAND_TYPE_MEMORY_MODEL:
    603     case SPV_OPERAND_TYPE_EXECUTION_MODE:
    604     case SPV_OPERAND_TYPE_STORAGE_CLASS:
    605     case SPV_OPERAND_TYPE_DIMENSIONALITY:
    606     case SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE:
    607     case SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE:
    608     case SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT:
    609     case SPV_OPERAND_TYPE_FP_ROUNDING_MODE:
    610     case SPV_OPERAND_TYPE_LINKAGE_TYPE:
    611     case SPV_OPERAND_TYPE_ACCESS_QUALIFIER:
    612     case SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER:
    613     case SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE:
    614     case SPV_OPERAND_TYPE_DECORATION:
    615     case SPV_OPERAND_TYPE_BUILT_IN:
    616     case SPV_OPERAND_TYPE_GROUP_OPERATION:
    617     case SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS:
    618     case SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO:
    619     case SPV_OPERAND_TYPE_DEBUG_BASE_TYPE_ATTRIBUTE_ENCODING:
    620     case SPV_OPERAND_TYPE_DEBUG_COMPOSITE_TYPE:
    621     case SPV_OPERAND_TYPE_DEBUG_TYPE_QUALIFIER:
    622     case SPV_OPERAND_TYPE_DEBUG_OPERATION: {
    623       // A single word that is a plain enum value.
    624 
    625       // Map an optional operand type to its corresponding concrete type.
    626       if (type == SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER)
    627         parsed_operand.type = SPV_OPERAND_TYPE_ACCESS_QUALIFIER;
    628 
    629       spv_operand_desc entry;
    630       if (grammar_.lookupOperand(type, word, &entry)) {
    631         return diagnostic()
    632                << "Invalid " << spvOperandTypeStr(parsed_operand.type)
    633                << " operand: " << word;
    634       }
    635       // Prepare to accept operands to this operand, if needed.
    636       spvPushOperandTypes(entry->operandTypes, expected_operands);
    637     } break;
    638 
    639     case SPV_OPERAND_TYPE_FP_FAST_MATH_MODE:
    640     case SPV_OPERAND_TYPE_FUNCTION_CONTROL:
    641     case SPV_OPERAND_TYPE_LOOP_CONTROL:
    642     case SPV_OPERAND_TYPE_IMAGE:
    643     case SPV_OPERAND_TYPE_OPTIONAL_IMAGE:
    644     case SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS:
    645     case SPV_OPERAND_TYPE_SELECTION_CONTROL:
    646     case SPV_OPERAND_TYPE_DEBUG_INFO_FLAGS: {
    647       // This operand is a mask.
    648 
    649       // Map an optional operand type to its corresponding concrete type.
    650       if (type == SPV_OPERAND_TYPE_OPTIONAL_IMAGE)
    651         parsed_operand.type = SPV_OPERAND_TYPE_IMAGE;
    652       else if (type == SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS)
    653         parsed_operand.type = SPV_OPERAND_TYPE_MEMORY_ACCESS;
    654 
    655       // Check validity of set mask bits. Also prepare for operands for those
    656       // masks if they have any.  To get operand order correct, scan from
    657       // MSB to LSB since we can only prepend operands to a pattern.
    658       // The only case in the grammar where you have more than one mask bit
    659       // having an operand is for image operands.  See SPIR-V 3.14 Image
    660       // Operands.
    661       uint32_t remaining_word = word;
    662       for (uint32_t mask = (1u << 31); remaining_word; mask >>= 1) {
    663         if (remaining_word & mask) {
    664           spv_operand_desc entry;
    665           if (grammar_.lookupOperand(type, mask, &entry)) {
    666             return diagnostic()
    667                    << "Invalid " << spvOperandTypeStr(parsed_operand.type)
    668                    << " operand: " << word << " has invalid mask component "
    669                    << mask;
    670           }
    671           remaining_word ^= mask;
    672           spvPushOperandTypes(entry->operandTypes, expected_operands);
    673         }
    674       }
    675       if (word == 0) {
    676         // An all-zeroes mask *might* also be valid.
    677         spv_operand_desc entry;
    678         if (SPV_SUCCESS == grammar_.lookupOperand(type, 0, &entry)) {
    679           // Prepare for its operands, if any.
    680           spvPushOperandTypes(entry->operandTypes, expected_operands);
    681         }
    682       }
    683     } break;
    684     default:
    685       return diagnostic() << "Internal error: Unhandled operand type: " << type;
    686   }
    687 
    688   assert(spvOperandIsConcrete(parsed_operand.type));
    689 
    690   operands->push_back(parsed_operand);
    691 
    692   const size_t index_after_operand = _.word_index + parsed_operand.num_words;
    693 
    694   // Avoid buffer overrun for the cases where the operand has more than one
    695   // word, and where it isn't a string.  (Those other cases have already been
    696   // handled earlier.)  For example, this error can occur for a multi-word
    697   // argument to OpConstant, or a multi-word case literal operand for OpSwitch.
    698   if (_.num_words < index_after_operand)
    699     return exhaustedInputDiagnostic(inst_offset, opcode, type);
    700 
    701   if (_.requires_endian_conversion) {
    702     // Copy instruction words.  Translate to native endianness as needed.
    703     if (convert_operand_endianness) {
    704       const spv_endianness_t endianness = _.endian;
    705       std::transform(_.words + _.word_index, _.words + index_after_operand,
    706                      std::back_inserter(*words),
    707                      [endianness](const uint32_t raw_word) {
    708                        return spvFixWord(raw_word, endianness);
    709                      });
    710     } else {
    711       words->insert(words->end(), _.words + _.word_index,
    712                     _.words + index_after_operand);
    713     }
    714   }
    715 
    716   // Advance past the operand.
    717   _.word_index = index_after_operand;
    718 
    719   return SPV_SUCCESS;
    720 }
    721 
    722 spv_result_t Parser::setNumericTypeInfoForType(
    723     spv_parsed_operand_t* parsed_operand, uint32_t type_id) {
    724   assert(type_id != 0);
    725   auto type_info_iter = _.type_id_to_number_type_info.find(type_id);
    726   if (type_info_iter == _.type_id_to_number_type_info.end()) {
    727     return diagnostic() << "Type Id " << type_id << " is not a type";
    728   }
    729   const NumberType& info = type_info_iter->second;
    730   if (info.type == SPV_NUMBER_NONE) {
    731     // This is a valid type, but for something other than a scalar number.
    732     return diagnostic() << "Type Id " << type_id
    733                         << " is not a scalar numeric type";
    734   }
    735 
    736   parsed_operand->number_kind = info.type;
    737   parsed_operand->number_bit_width = info.bit_width;
    738   // Round up the word count.
    739   parsed_operand->num_words = static_cast<uint16_t>((info.bit_width + 31) / 32);
    740   return SPV_SUCCESS;
    741 }
    742 
    743 void Parser::recordNumberType(size_t inst_offset,
    744                               const spv_parsed_instruction_t* inst) {
    745   const SpvOp opcode = static_cast<SpvOp>(inst->opcode);
    746   if (spvOpcodeGeneratesType(opcode)) {
    747     NumberType info = {SPV_NUMBER_NONE, 0};
    748     if (SpvOpTypeInt == opcode) {
    749       const bool is_signed = peekAt(inst_offset + 3) != 0;
    750       info.type = is_signed ? SPV_NUMBER_SIGNED_INT : SPV_NUMBER_UNSIGNED_INT;
    751       info.bit_width = peekAt(inst_offset + 2);
    752     } else if (SpvOpTypeFloat == opcode) {
    753       info.type = SPV_NUMBER_FLOATING;
    754       info.bit_width = peekAt(inst_offset + 2);
    755     }
    756     // The *result* Id of a type generating instruction is the type Id.
    757     _.type_id_to_number_type_info[inst->result_id] = info;
    758   }
    759 }
    760 
    761 }  // anonymous namespace
    762 
    763 spv_result_t spvBinaryParse(const spv_const_context context, void* user_data,
    764                             const uint32_t* code, const size_t num_words,
    765                             spv_parsed_header_fn_t parsed_header,
    766                             spv_parsed_instruction_fn_t parsed_instruction,
    767                             spv_diagnostic* diagnostic) {
    768   spv_context_t hijack_context = *context;
    769   if (diagnostic) {
    770     *diagnostic = nullptr;
    771     spvtools::UseDiagnosticAsMessageConsumer(&hijack_context, diagnostic);
    772   }
    773   Parser parser(&hijack_context, user_data, parsed_header, parsed_instruction);
    774   return parser.parse(code, num_words, diagnostic);
    775 }
    776 
    777 // TODO(dneto): This probably belongs in text.cpp since that's the only place
    778 // that a spv_binary_t value is created.
    779 void spvBinaryDestroy(spv_binary binary) {
    780   if (!binary) return;
    781   delete[] binary->code;
    782   delete binary;
    783 }
    784 
    785 size_t spv_strnlen_s(const char* str, size_t strsz) {
    786   if (!str) return 0;
    787   for (size_t i = 0; i < strsz; i++) {
    788     if (!str[i]) return i;
    789   }
    790   return strsz;
    791 }
    792