Home | History | Annotate | Download | only in source
      1 // Copyright (c) 2015-2016 The Khronos Group Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "binary.h"
     16 
     17 #include <algorithm>
     18 #include <cassert>
     19 #include <cstring>
     20 #include <iterator>
     21 #include <limits>
     22 #include <unordered_map>
     23 #include <vector>
     24 
     25 #include "assembly_grammar.h"
     26 #include "diagnostic.h"
     27 #include "ext_inst.h"
     28 #include "opcode.h"
     29 #include "operand.h"
     30 #include "spirv/1.2/spirv.h"
     31 #include "spirv_constant.h"
     32 #include "spirv_endian.h"
     33 
     34 spv_result_t spvBinaryHeaderGet(const spv_const_binary binary,
     35                                 const spv_endianness_t endian,
     36                                 spv_header_t* pHeader) {
     37   if (!binary->code) return SPV_ERROR_INVALID_BINARY;
     38   if (binary->wordCount < SPV_INDEX_INSTRUCTION)
     39     return SPV_ERROR_INVALID_BINARY;
     40   if (!pHeader) return SPV_ERROR_INVALID_POINTER;
     41 
     42   // TODO: Validation checking?
     43   pHeader->magic = spvFixWord(binary->code[SPV_INDEX_MAGIC_NUMBER], endian);
     44   pHeader->version = spvFixWord(binary->code[SPV_INDEX_VERSION_NUMBER], endian);
     45   pHeader->generator =
     46       spvFixWord(binary->code[SPV_INDEX_GENERATOR_NUMBER], endian);
     47   pHeader->bound = spvFixWord(binary->code[SPV_INDEX_BOUND], endian);
     48   pHeader->schema = spvFixWord(binary->code[SPV_INDEX_SCHEMA], endian);
     49   pHeader->instructions = &binary->code[SPV_INDEX_INSTRUCTION];
     50 
     51   return SPV_SUCCESS;
     52 }
     53 
     54 namespace {
     55 
     56 // A SPIR-V binary parser.  A parser instance communicates detailed parse
     57 // results via callbacks.
     58 class Parser {
     59  public:
     60   // The user_data value is provided to the callbacks as context.
     61   Parser(const spv_const_context context, void* user_data,
     62          spv_parsed_header_fn_t parsed_header_fn,
     63          spv_parsed_instruction_fn_t parsed_instruction_fn)
     64       : grammar_(context),
     65         consumer_(context->consumer),
     66         user_data_(user_data),
     67         parsed_header_fn_(parsed_header_fn),
     68         parsed_instruction_fn_(parsed_instruction_fn) {}
     69 
     70   // Parses the specified binary SPIR-V module, issuing callbacks on a parsed
     71   // header and for each parsed instruction.  Returns SPV_SUCCESS on success.
     72   // Otherwise returns an error code and issues a diagnostic.
     73   spv_result_t parse(const uint32_t* words, size_t num_words,
     74                      spv_diagnostic* diagnostic);
     75 
     76  private:
     77   // All remaining methods work on the current module parse state.
     78 
     79   // Like the parse method, but works on the current module parse state.
     80   spv_result_t parseModule();
     81 
     82   // Parses an instruction at the current position of the binary.  Assumes
     83   // the header has been parsed, the endian has been set, and the word index is
     84   // still in range.  Advances the parsing position past the instruction, and
     85   // updates other parsing state for the current module.
     86   // On success, returns SPV_SUCCESS and issues the parsed-instruction callback.
     87   // On failure, returns an error code and issues a diagnostic.
     88   spv_result_t parseInstruction();
     89 
     90   // Parses an instruction operand with the given type, for an instruction
     91   // starting at inst_offset words into the SPIR-V binary.
     92   // If the SPIR-V binary is the same endianness as the host, then the
     93   // endian_converted_inst_words parameter is ignored.  Otherwise, this method
     94   // appends the words for this operand, converted to host native endianness,
     95   // to the end of endian_converted_inst_words.  This method also updates the
     96   // expected_operands parameter, and the scalar members of the inst parameter.
     97   // On success, returns SPV_SUCCESS, advances past the operand, and pushes a
     98   // new entry on to the operands vector.  Otherwise returns an error code and
     99   // issues a diagnostic.
    100   spv_result_t parseOperand(size_t inst_offset, spv_parsed_instruction_t* inst,
    101                             const spv_operand_type_t type,
    102                             std::vector<uint32_t>* endian_converted_inst_words,
    103                             std::vector<spv_parsed_operand_t>* operands,
    104                             spv_operand_pattern_t* expected_operands);
    105 
    106   // Records the numeric type for an operand according to the type information
    107   // associated with the given non-zero type Id.  This can fail if the type Id
    108   // is not a type Id, or if the type Id does not reference a scalar numeric
    109   // type.  On success, return SPV_SUCCESS and populates the num_words,
    110   // number_kind, and number_bit_width fields of parsed_operand.
    111   spv_result_t setNumericTypeInfoForType(spv_parsed_operand_t* parsed_operand,
    112                                          uint32_t type_id);
    113 
    114   // Records the number type for an instruction at the given offset, if that
    115   // instruction generates a type.  For types that aren't scalar numbers,
    116   // record something with number kind SPV_NUMBER_NONE.
    117   void recordNumberType(size_t inst_offset,
    118                         const spv_parsed_instruction_t* inst);
    119 
    120   // Returns a diagnostic stream object initialized with current position in
    121   // the input stream, and for the given error code. Any data written to the
    122   // returned object will be propagated to the current parse's diagnostic
    123   // object.
    124   libspirv::DiagnosticStream diagnostic(spv_result_t error) {
    125     return libspirv::DiagnosticStream({0, 0, _.word_index}, consumer_, error);
    126   }
    127 
    128   // Returns a diagnostic stream object with the default parse error code.
    129   libspirv::DiagnosticStream diagnostic() {
    130     // The default failure for parsing is invalid binary.
    131     return diagnostic(SPV_ERROR_INVALID_BINARY);
    132   }
    133 
    134   // Issues a diagnostic describing an exhaustion of input condition when
    135   // trying to decode an instruction operand, and returns
    136   // SPV_ERROR_INVALID_BINARY.
    137   spv_result_t exhaustedInputDiagnostic(size_t inst_offset, SpvOp opcode,
    138                                         spv_operand_type_t type) {
    139     return diagnostic() << "End of input reached while decoding Op"
    140                         << spvOpcodeString(opcode) << " starting at word "
    141                         << inst_offset
    142                         << ((_.word_index < _.num_words) ? ": truncated "
    143                                                          : ": missing ")
    144                         << spvOperandTypeStr(type) << " operand at word offset "
    145                         << _.word_index - inst_offset << ".";
    146   }
    147 
    148   // Returns the endian-corrected word at the current position.
    149   uint32_t peek() const { return peekAt(_.word_index); }
    150 
    151   // Returns the endian-corrected word at the given position.
    152   uint32_t peekAt(size_t index) const {
    153     assert(index < _.num_words);
    154     return spvFixWord(_.words[index], _.endian);
    155   }
    156 
    157   // Data members
    158 
    159   const libspirv::AssemblyGrammar grammar_;        // SPIR-V syntax utility.
    160   const spvtools::MessageConsumer& consumer_;      // Message consumer callback.
    161   void* const user_data_;                          // Context for the callbacks
    162   const spv_parsed_header_fn_t parsed_header_fn_;  // Parsed header callback
    163   const spv_parsed_instruction_fn_t
    164       parsed_instruction_fn_;  // Parsed instruction callback
    165 
    166   // Describes the format of a typed literal number.
    167   struct NumberType {
    168     spv_number_kind_t type;
    169     uint32_t bit_width;
    170   };
    171 
    172   // The state used to parse a single SPIR-V binary module.
    173   struct State {
    174     State(const uint32_t* words_arg, size_t num_words_arg,
    175           spv_diagnostic* diagnostic_arg)
    176         : words(words_arg),
    177           num_words(num_words_arg),
    178           diagnostic(diagnostic_arg),
    179           word_index(0),
    180           endian(),
    181           requires_endian_conversion(false) {
    182 
    183         // Temporary storage for parser state within a single instruction.
    184         // Most instructions require fewer than 25 words or operands.
    185         operands.reserve(25);
    186         endian_converted_words.reserve(25);
    187         expected_operands.reserve(25);
    188     }
    189     State() : State(0, 0, nullptr) {}
    190     const uint32_t* words;       // Words in the binary SPIR-V module.
    191     size_t num_words;            // Number of words in the module.
    192     spv_diagnostic* diagnostic;  // Where diagnostics go.
    193     size_t word_index;           // The current position in words.
    194     spv_endianness_t endian;     // The endianness of the binary.
    195     // Is the SPIR-V binary in a different endiannes from the host native
    196     // endianness?
    197     bool requires_endian_conversion;
    198 
    199     // Maps a result ID to its type ID.  By convention:
    200     //  - a result ID that is a type definition maps to itself.
    201     //  - a result ID without a type maps to 0.  (E.g. for OpLabel)
    202     std::unordered_map<uint32_t, uint32_t> id_to_type_id;
    203     // Maps a type ID to its number type description.
    204     std::unordered_map<uint32_t, NumberType> type_id_to_number_type_info;
    205     // Maps an ExtInstImport id to the extended instruction type.
    206     std::unordered_map<uint32_t, spv_ext_inst_type_t>
    207         import_id_to_ext_inst_type;
    208 
    209     // Used by parseOperand
    210     std::vector<spv_parsed_operand_t> operands;
    211     std::vector<uint32_t> endian_converted_words;
    212     spv_operand_pattern_t expected_operands;
    213   } _;
    214 };
    215 
    216 spv_result_t Parser::parse(const uint32_t* words, size_t num_words,
    217                            spv_diagnostic* diagnostic_arg) {
    218   _ = State(words, num_words, diagnostic_arg);
    219 
    220   const spv_result_t result = parseModule();
    221 
    222   // Clear the module state.  The tables might be big.
    223   _ = State();
    224 
    225   return result;
    226 }
    227 
    228 spv_result_t Parser::parseModule() {
    229   if (!_.words) return diagnostic() << "Missing module.";
    230 
    231   if (_.num_words < SPV_INDEX_INSTRUCTION)
    232     return diagnostic() << "Module has incomplete header: only " << _.num_words
    233                         << " words instead of " << SPV_INDEX_INSTRUCTION;
    234 
    235   // Check the magic number and detect the module's endianness.
    236   spv_const_binary_t binary{_.words, _.num_words};
    237   if (spvBinaryEndianness(&binary, &_.endian)) {
    238     return diagnostic() << "Invalid SPIR-V magic number '" << std::hex
    239                         << _.words[0] << "'.";
    240   }
    241   _.requires_endian_conversion = !spvIsHostEndian(_.endian);
    242 
    243   // Process the header.
    244   spv_header_t header;
    245   if (spvBinaryHeaderGet(&binary, _.endian, &header)) {
    246     // It turns out there is no way to trigger this error since the only
    247     // failure cases are already handled above, with better messages.
    248     return diagnostic(SPV_ERROR_INTERNAL)
    249            << "Internal error: unhandled header parse failure";
    250   }
    251   if (parsed_header_fn_) {
    252     if (auto error = parsed_header_fn_(user_data_, _.endian, header.magic,
    253                                        header.version, header.generator,
    254                                        header.bound, header.schema)) {
    255       return error;
    256     }
    257   }
    258 
    259   // Process the instructions.
    260   _.word_index = SPV_INDEX_INSTRUCTION;
    261   while (_.word_index < _.num_words)
    262     if (auto error = parseInstruction()) return error;
    263 
    264   // Running off the end should already have been reported earlier.
    265   assert(_.word_index == _.num_words);
    266 
    267   return SPV_SUCCESS;
    268 }
    269 
    270 spv_result_t Parser::parseInstruction() {
    271   // The zero values for all members except for opcode are the
    272   // correct initial values.
    273   spv_parsed_instruction_t inst = {};
    274 
    275   const uint32_t first_word = peek();
    276 
    277   // If the module's endianness is different from the host native endianness,
    278   // then converted_words contains the the endian-translated words in the
    279   // instruction.
    280   _.endian_converted_words.clear();
    281   _.endian_converted_words.push_back(first_word);
    282 
    283   // After a successful parse of the instruction, the inst.operands member
    284   // will point to this vector's storage.
    285   _.operands.clear();
    286 
    287   assert(_.word_index < _.num_words);
    288   // Decompose and check the first word.
    289   uint16_t inst_word_count = 0;
    290   spvOpcodeSplit(first_word, &inst_word_count, &inst.opcode);
    291   if (inst_word_count < 1) {
    292     return diagnostic() << "Invalid instruction word count: "
    293                         << inst_word_count;
    294   }
    295   spv_opcode_desc opcode_desc;
    296   if (grammar_.lookupOpcode(static_cast<SpvOp>(inst.opcode), &opcode_desc))
    297     return diagnostic() << "Invalid opcode: " << inst.opcode;
    298 
    299   // Advance past the opcode word.  But remember the of the start
    300   // of the instruction.
    301   const size_t inst_offset = _.word_index;
    302   _.word_index++;
    303 
    304   // Maintains the ordered list of expected operand types.
    305   // For many instructions we only need the {numTypes, operandTypes}
    306   // entries in opcode_desc.  However, sometimes we need to modify
    307   // the list as we parse the operands. This occurs when an operand
    308   // has its own logical operands (such as the LocalSize operand for
    309   // ExecutionMode), or for extended instructions that may have their
    310   // own operands depending on the selected extended instruction.
    311   _.expected_operands.clear();
    312   for (auto i = 0; i < opcode_desc->numTypes; i++)
    313       _.expected_operands.push_back(opcode_desc->operandTypes[opcode_desc->numTypes - i - 1]);
    314 
    315   while (_.word_index < inst_offset + inst_word_count) {
    316     const uint16_t inst_word_index = uint16_t(_.word_index - inst_offset);
    317     if (_.expected_operands.empty()) {
    318       return diagnostic() << "Invalid instruction Op" << opcode_desc->name
    319                           << " starting at word " << inst_offset
    320                           << ": expected no more operands after "
    321                           << inst_word_index
    322                           << " words, but stated word count is "
    323                           << inst_word_count << ".";
    324     }
    325 
    326     spv_operand_type_t type = spvTakeFirstMatchableOperand(&_.expected_operands);
    327 
    328     if (auto error =
    329             parseOperand(inst_offset, &inst, type, &_.endian_converted_words,
    330                          &_.operands, &_.expected_operands)) {
    331       return error;
    332     }
    333   }
    334 
    335   if (!_.expected_operands.empty() &&
    336       !spvOperandIsOptional(_.expected_operands.back())) {
    337     return diagnostic() << "End of input reached while decoding Op"
    338                         << opcode_desc->name << " starting at word "
    339                         << inst_offset << ": expected more operands after "
    340                         << inst_word_count << " words.";
    341   }
    342 
    343   if ((inst_offset + inst_word_count) != _.word_index) {
    344     return diagnostic() << "Invalid word count: Op" << opcode_desc->name
    345                         << " starting at word " << inst_offset
    346                         << " says it has " << inst_word_count
    347                         << " words, but found " << _.word_index - inst_offset
    348                         << " words instead.";
    349   }
    350 
    351   // Check the computed length of the endian-converted words vector against
    352   // the declared number of words in the instruction.  If endian conversion
    353   // is required, then they should match.  If no endian conversion was
    354   // performed, then the vector only contains the initial opcode/word-count
    355   // word.
    356   assert(!_.requires_endian_conversion ||
    357          (inst_word_count == _.endian_converted_words.size()));
    358   assert(_.requires_endian_conversion || (_.endian_converted_words.size() == 1));
    359 
    360   recordNumberType(inst_offset, &inst);
    361 
    362   if (_.requires_endian_conversion) {
    363     // We must wait until here to set this pointer, because the vector might
    364     // have been be resized while we accumulated its elements.
    365     inst.words = _.endian_converted_words.data();
    366   } else {
    367     // If no conversion is required, then just point to the underlying binary.
    368     // This saves time and space.
    369     inst.words = _.words + inst_offset;
    370   }
    371   inst.num_words = inst_word_count;
    372 
    373   // We must wait until here to set this pointer, because the vector might
    374   // have been be resized while we accumulated its elements.
    375   inst.operands = _.operands.data();
    376   inst.num_operands = uint16_t(_.operands.size());
    377 
    378   // Issue the callback.  The callee should know that all the storage in inst
    379   // is transient, and will disappear immediately afterward.
    380   if (parsed_instruction_fn_) {
    381     if (auto error = parsed_instruction_fn_(user_data_, &inst)) return error;
    382   }
    383 
    384   return SPV_SUCCESS;
    385 }
    386 
    387 spv_result_t Parser::parseOperand(size_t inst_offset,
    388                                   spv_parsed_instruction_t* inst,
    389                                   const spv_operand_type_t type,
    390                                   std::vector<uint32_t>* words,
    391                                   std::vector<spv_parsed_operand_t>* operands,
    392                                   spv_operand_pattern_t* expected_operands) {
    393   const SpvOp opcode = static_cast<SpvOp>(inst->opcode);
    394   // We'll fill in this result as we go along.
    395   spv_parsed_operand_t parsed_operand;
    396   parsed_operand.offset = uint16_t(_.word_index - inst_offset);
    397   // Most operands occupy one word.  This might be be adjusted later.
    398   parsed_operand.num_words = 1;
    399   // The type argument is the one used by the grammar to parse the instruction.
    400   // But it can exposes internal parser details such as whether an operand is
    401   // optional or actually represents a variable-length sequence of operands.
    402   // The resulting type should be adjusted to avoid those internal details.
    403   // In most cases, the resulting operand type is the same as the grammar type.
    404   parsed_operand.type = type;
    405 
    406   // Assume non-numeric values.  This will be updated for literal numbers.
    407   parsed_operand.number_kind = SPV_NUMBER_NONE;
    408   parsed_operand.number_bit_width = 0;
    409 
    410   if (_.word_index >= _.num_words)
    411     return exhaustedInputDiagnostic(inst_offset, opcode, type);
    412 
    413   const uint32_t word = peek();
    414 
    415   // Do the words in this operand have to be converted to native endianness?
    416   // True for all but literal strings.
    417   bool convert_operand_endianness = true;
    418 
    419   switch (type) {
    420     case SPV_OPERAND_TYPE_TYPE_ID:
    421       if (!word)
    422         return diagnostic(SPV_ERROR_INVALID_ID) << "Error: Type Id is 0";
    423       inst->type_id = word;
    424       break;
    425 
    426     case SPV_OPERAND_TYPE_RESULT_ID:
    427       if (!word)
    428         return diagnostic(SPV_ERROR_INVALID_ID) << "Error: Result Id is 0";
    429       inst->result_id = word;
    430       // Save the result ID to type ID mapping.
    431       // In the grammar, type ID always appears before result ID.
    432       if (_.id_to_type_id.find(inst->result_id) != _.id_to_type_id.end())
    433         return diagnostic(SPV_ERROR_INVALID_ID) << "Id " << inst->result_id
    434                                                 << " is defined more than once";
    435       // Record it.
    436       // A regular value maps to its type.  Some instructions (e.g. OpLabel)
    437       // have no type Id, and will map to 0.  The result Id for a
    438       // type-generating instruction (e.g. OpTypeInt) maps to itself.
    439       _.id_to_type_id[inst->result_id] =
    440           spvOpcodeGeneratesType(opcode) ? inst->result_id : inst->type_id;
    441       break;
    442 
    443     case SPV_OPERAND_TYPE_ID:
    444     case SPV_OPERAND_TYPE_OPTIONAL_ID:
    445       if (!word) return diagnostic(SPV_ERROR_INVALID_ID) << "Id is 0";
    446       parsed_operand.type = SPV_OPERAND_TYPE_ID;
    447 
    448       if (opcode == SpvOpExtInst && parsed_operand.offset == 3) {
    449         // The current word is the extended instruction set Id.
    450         // Set the extended instruction set type for the current instruction.
    451         auto ext_inst_type_iter = _.import_id_to_ext_inst_type.find(word);
    452         if (ext_inst_type_iter == _.import_id_to_ext_inst_type.end()) {
    453           return diagnostic(SPV_ERROR_INVALID_ID)
    454                  << "OpExtInst set Id " << word
    455                  << " does not reference an OpExtInstImport result Id";
    456         }
    457         inst->ext_inst_type = ext_inst_type_iter->second;
    458       }
    459       break;
    460 
    461     case SPV_OPERAND_TYPE_SCOPE_ID:
    462     case SPV_OPERAND_TYPE_MEMORY_SEMANTICS_ID:
    463       // Check for trivially invalid values.  The operand descriptions already
    464       // have the word "ID" in them.
    465       if (!word) return diagnostic() << spvOperandTypeStr(type) << " is 0";
    466       break;
    467 
    468     case SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER: {
    469       assert(SpvOpExtInst == opcode);
    470       assert(inst->ext_inst_type != SPV_EXT_INST_TYPE_NONE);
    471       spv_ext_inst_desc ext_inst;
    472       if (grammar_.lookupExtInst(inst->ext_inst_type, word, &ext_inst))
    473         return diagnostic() << "Invalid extended instruction number: " << word;
    474       spvPushOperandTypes(ext_inst->operandTypes, expected_operands);
    475     } break;
    476 
    477     case SPV_OPERAND_TYPE_SPEC_CONSTANT_OP_NUMBER: {
    478       assert(SpvOpSpecConstantOp == opcode);
    479       if (grammar_.lookupSpecConstantOpcode(SpvOp(word))) {
    480         return diagnostic() << "Invalid " << spvOperandTypeStr(type) << ": "
    481                             << word;
    482       }
    483       spv_opcode_desc opcode_entry = nullptr;
    484       if (grammar_.lookupOpcode(SpvOp(word), &opcode_entry)) {
    485         return diagnostic(SPV_ERROR_INTERNAL)
    486                << "OpSpecConstant opcode table out of sync";
    487       }
    488       // OpSpecConstant opcodes must have a type and result. We've already
    489       // processed them, so skip them when preparing to parse the other
    490       // operants for the opcode.
    491       assert(opcode_entry->hasType);
    492       assert(opcode_entry->hasResult);
    493       assert(opcode_entry->numTypes >= 2);
    494       spvPushOperandTypes(opcode_entry->operandTypes + 2, expected_operands);
    495     } break;
    496 
    497     case SPV_OPERAND_TYPE_LITERAL_INTEGER:
    498     case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_INTEGER:
    499       // These are regular single-word literal integer operands.
    500       // Post-parsing validation should check the range of the parsed value.
    501       parsed_operand.type = SPV_OPERAND_TYPE_LITERAL_INTEGER;
    502       // It turns out they are always unsigned integers!
    503       parsed_operand.number_kind = SPV_NUMBER_UNSIGNED_INT;
    504       parsed_operand.number_bit_width = 32;
    505       break;
    506 
    507     case SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER:
    508     case SPV_OPERAND_TYPE_OPTIONAL_TYPED_LITERAL_INTEGER:
    509       parsed_operand.type = SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER;
    510       if (opcode == SpvOpSwitch) {
    511         // The literal operands have the same type as the value
    512         // referenced by the selector Id.
    513         const uint32_t selector_id = peekAt(inst_offset + 1);
    514         const auto type_id_iter = _.id_to_type_id.find(selector_id);
    515         if (type_id_iter == _.id_to_type_id.end() ||
    516             type_id_iter->second == 0) {
    517           return diagnostic() << "Invalid OpSwitch: selector id " << selector_id
    518                               << " has no type";
    519         }
    520         uint32_t type_id = type_id_iter->second;
    521 
    522         if (selector_id == type_id) {
    523           // Recall that by convention, a result ID that is a type definition
    524           // maps to itself.
    525           return diagnostic() << "Invalid OpSwitch: selector id " << selector_id
    526                               << " is a type, not a value";
    527         }
    528         if (auto error = setNumericTypeInfoForType(&parsed_operand, type_id))
    529           return error;
    530         if (parsed_operand.number_kind != SPV_NUMBER_UNSIGNED_INT &&
    531             parsed_operand.number_kind != SPV_NUMBER_SIGNED_INT) {
    532           return diagnostic() << "Invalid OpSwitch: selector id " << selector_id
    533                               << " is not a scalar integer";
    534         }
    535       } else {
    536         assert(opcode == SpvOpConstant || opcode == SpvOpSpecConstant);
    537         // The literal number type is determined by the type Id for the
    538         // constant.
    539         assert(inst->type_id);
    540         if (auto error =
    541                 setNumericTypeInfoForType(&parsed_operand, inst->type_id))
    542           return error;
    543       }
    544       break;
    545 
    546     case SPV_OPERAND_TYPE_LITERAL_STRING:
    547     case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_STRING: {
    548       convert_operand_endianness = false;
    549       const char* string =
    550           reinterpret_cast<const char*>(_.words + _.word_index);
    551       // Compute the length of the string, but make sure we don't run off the
    552       // end of the input.
    553       const size_t remaining_input_bytes =
    554           sizeof(uint32_t) * (_.num_words - _.word_index);
    555       const size_t string_num_content_bytes =
    556           spv_strnlen_s(string, remaining_input_bytes);
    557       // If there was no terminating null byte, then that's an end-of-input
    558       // error.
    559       if (string_num_content_bytes == remaining_input_bytes)
    560         return exhaustedInputDiagnostic(inst_offset, opcode, type);
    561       // Account for null in the word length, so add 1 for null, then add 3 to
    562       // make sure we round up.  The following is equivalent to:
    563       //    (string_num_content_bytes + 1 + 3) / 4
    564       const size_t string_num_words = string_num_content_bytes / 4 + 1;
    565       // Make sure we can record the word count without overflow.
    566       //
    567       // This error can't currently be triggered because of validity
    568       // checks elsewhere.
    569       if (string_num_words > std::numeric_limits<uint16_t>::max()) {
    570         return diagnostic() << "Literal string is longer than "
    571                             << std::numeric_limits<uint16_t>::max()
    572                             << " words: " << string_num_words << " words long";
    573       }
    574       parsed_operand.num_words = uint16_t(string_num_words);
    575       parsed_operand.type = SPV_OPERAND_TYPE_LITERAL_STRING;
    576 
    577       if (SpvOpExtInstImport == opcode) {
    578         // Record the extended instruction type for the ID for this import.
    579         // There is only one string literal argument to OpExtInstImport,
    580         // so it's sufficient to guard this just on the opcode.
    581         const spv_ext_inst_type_t ext_inst_type =
    582             spvExtInstImportTypeGet(string);
    583         if (SPV_EXT_INST_TYPE_NONE == ext_inst_type) {
    584           return diagnostic() << "Invalid extended instruction import '"
    585                               << string << "'";
    586         }
    587         // We must have parsed a valid result ID.  It's a condition
    588         // of the grammar, and we only accept non-zero result Ids.
    589         assert(inst->result_id);
    590         _.import_id_to_ext_inst_type[inst->result_id] = ext_inst_type;
    591       }
    592     } break;
    593 
    594     case SPV_OPERAND_TYPE_CAPABILITY:
    595     case SPV_OPERAND_TYPE_SOURCE_LANGUAGE:
    596     case SPV_OPERAND_TYPE_EXECUTION_MODEL:
    597     case SPV_OPERAND_TYPE_ADDRESSING_MODEL:
    598     case SPV_OPERAND_TYPE_MEMORY_MODEL:
    599     case SPV_OPERAND_TYPE_EXECUTION_MODE:
    600     case SPV_OPERAND_TYPE_STORAGE_CLASS:
    601     case SPV_OPERAND_TYPE_DIMENSIONALITY:
    602     case SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE:
    603     case SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE:
    604     case SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT:
    605     case SPV_OPERAND_TYPE_FP_ROUNDING_MODE:
    606     case SPV_OPERAND_TYPE_LINKAGE_TYPE:
    607     case SPV_OPERAND_TYPE_ACCESS_QUALIFIER:
    608     case SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER:
    609     case SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE:
    610     case SPV_OPERAND_TYPE_DECORATION:
    611     case SPV_OPERAND_TYPE_BUILT_IN:
    612     case SPV_OPERAND_TYPE_GROUP_OPERATION:
    613     case SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS:
    614     case SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO: {
    615       // A single word that is a plain enum value.
    616 
    617       // Map an optional operand type to its corresponding concrete type.
    618       if (type == SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER)
    619         parsed_operand.type = SPV_OPERAND_TYPE_ACCESS_QUALIFIER;
    620 
    621       spv_operand_desc entry;
    622       if (grammar_.lookupOperand(type, word, &entry)) {
    623         return diagnostic() << "Invalid "
    624                             << spvOperandTypeStr(parsed_operand.type)
    625                             << " operand: " << word;
    626       }
    627       // Prepare to accept operands to this operand, if needed.
    628       spvPushOperandTypes(entry->operandTypes, expected_operands);
    629     } break;
    630 
    631     case SPV_OPERAND_TYPE_FP_FAST_MATH_MODE:
    632     case SPV_OPERAND_TYPE_FUNCTION_CONTROL:
    633     case SPV_OPERAND_TYPE_LOOP_CONTROL:
    634     case SPV_OPERAND_TYPE_IMAGE:
    635     case SPV_OPERAND_TYPE_OPTIONAL_IMAGE:
    636     case SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS:
    637     case SPV_OPERAND_TYPE_SELECTION_CONTROL: {
    638       // This operand is a mask.
    639 
    640       // Map an optional operand type to its corresponding concrete type.
    641       if (type == SPV_OPERAND_TYPE_OPTIONAL_IMAGE)
    642         parsed_operand.type = SPV_OPERAND_TYPE_IMAGE;
    643       else if (type == SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS)
    644         parsed_operand.type = SPV_OPERAND_TYPE_MEMORY_ACCESS;
    645 
    646       // Check validity of set mask bits. Also prepare for operands for those
    647       // masks if they have any.  To get operand order correct, scan from
    648       // MSB to LSB since we can only prepend operands to a pattern.
    649       // The only case in the grammar where you have more than one mask bit
    650       // having an operand is for image operands.  See SPIR-V 3.14 Image
    651       // Operands.
    652       uint32_t remaining_word = word;
    653       for (uint32_t mask = (1u << 31); remaining_word; mask >>= 1) {
    654         if (remaining_word & mask) {
    655           spv_operand_desc entry;
    656           if (grammar_.lookupOperand(type, mask, &entry)) {
    657             return diagnostic()
    658                    << "Invalid " << spvOperandTypeStr(parsed_operand.type)
    659                    << " operand: " << word << " has invalid mask component "
    660                    << mask;
    661           }
    662           remaining_word ^= mask;
    663           spvPushOperandTypes(entry->operandTypes, expected_operands);
    664         }
    665       }
    666       if (word == 0) {
    667         // An all-zeroes mask *might* also be valid.
    668         spv_operand_desc entry;
    669         if (SPV_SUCCESS == grammar_.lookupOperand(type, 0, &entry)) {
    670           // Prepare for its operands, if any.
    671           spvPushOperandTypes(entry->operandTypes, expected_operands);
    672         }
    673       }
    674     } break;
    675     default:
    676       return diagnostic() << "Internal error: Unhandled operand type: " << type;
    677   }
    678 
    679   assert(int(SPV_OPERAND_TYPE_FIRST_CONCRETE_TYPE) <= int(parsed_operand.type));
    680   assert(int(SPV_OPERAND_TYPE_LAST_CONCRETE_TYPE) >= int(parsed_operand.type));
    681 
    682   operands->push_back(parsed_operand);
    683 
    684   const size_t index_after_operand = _.word_index + parsed_operand.num_words;
    685 
    686   // Avoid buffer overrun for the cases where the operand has more than one
    687   // word, and where it isn't a string.  (Those other cases have already been
    688   // handled earlier.)  For example, this error can occur for a multi-word
    689   // argument to OpConstant, or a multi-word case literal operand for OpSwitch.
    690   if (_.num_words < index_after_operand)
    691     return exhaustedInputDiagnostic(inst_offset, opcode, type);
    692 
    693   if (_.requires_endian_conversion) {
    694     // Copy instruction words.  Translate to native endianness as needed.
    695     if (convert_operand_endianness) {
    696       const spv_endianness_t endianness = _.endian;
    697       std::transform(_.words + _.word_index, _.words + index_after_operand,
    698                      std::back_inserter(*words),
    699                      [endianness](const uint32_t raw_word) {
    700                        return spvFixWord(raw_word, endianness);
    701                      });
    702     } else {
    703       words->insert(words->end(), _.words + _.word_index,
    704                     _.words + index_after_operand);
    705     }
    706   }
    707 
    708   // Advance past the operand.
    709   _.word_index = index_after_operand;
    710 
    711   return SPV_SUCCESS;
    712 }
    713 
    714 spv_result_t Parser::setNumericTypeInfoForType(
    715     spv_parsed_operand_t* parsed_operand, uint32_t type_id) {
    716   assert(type_id != 0);
    717   auto type_info_iter = _.type_id_to_number_type_info.find(type_id);
    718   if (type_info_iter == _.type_id_to_number_type_info.end()) {
    719     return diagnostic() << "Type Id " << type_id << " is not a type";
    720   }
    721   const NumberType& info = type_info_iter->second;
    722   if (info.type == SPV_NUMBER_NONE) {
    723     // This is a valid type, but for something other than a scalar number.
    724     return diagnostic() << "Type Id " << type_id
    725                         << " is not a scalar numeric type";
    726   }
    727 
    728   parsed_operand->number_kind = info.type;
    729   parsed_operand->number_bit_width = info.bit_width;
    730   // Round up the word count.
    731   parsed_operand->num_words = static_cast<uint16_t>((info.bit_width + 31) / 32);
    732   return SPV_SUCCESS;
    733 }
    734 
    735 void Parser::recordNumberType(size_t inst_offset,
    736                               const spv_parsed_instruction_t* inst) {
    737   const SpvOp opcode = static_cast<SpvOp>(inst->opcode);
    738   if (spvOpcodeGeneratesType(opcode)) {
    739     NumberType info = {SPV_NUMBER_NONE, 0};
    740     if (SpvOpTypeInt == opcode) {
    741       const bool is_signed = peekAt(inst_offset + 3) != 0;
    742       info.type = is_signed ? SPV_NUMBER_SIGNED_INT : SPV_NUMBER_UNSIGNED_INT;
    743       info.bit_width = peekAt(inst_offset + 2);
    744     } else if (SpvOpTypeFloat == opcode) {
    745       info.type = SPV_NUMBER_FLOATING;
    746       info.bit_width = peekAt(inst_offset + 2);
    747     }
    748     // The *result* Id of a type generating instruction is the type Id.
    749     _.type_id_to_number_type_info[inst->result_id] = info;
    750   }
    751 }
    752 
    753 }  // anonymous namespace
    754 
    755 spv_result_t spvBinaryParse(const spv_const_context context, void* user_data,
    756                             const uint32_t* code, const size_t num_words,
    757                             spv_parsed_header_fn_t parsed_header,
    758                             spv_parsed_instruction_fn_t parsed_instruction,
    759                             spv_diagnostic* diagnostic) {
    760   spv_context_t hijack_context = *context;
    761   if (diagnostic) {
    762     *diagnostic = nullptr;
    763     libspirv::UseDiagnosticAsMessageConsumer(&hijack_context, diagnostic);
    764   }
    765   Parser parser(&hijack_context, user_data, parsed_header, parsed_instruction);
    766   return parser.parse(code, num_words, diagnostic);
    767 }
    768 
    769 // TODO(dneto): This probably belongs in text.cpp since that's the only place
    770 // that a spv_binary_t value is created.
    771 void spvBinaryDestroy(spv_binary binary) {
    772   if (!binary) return;
    773   delete[] binary->code;
    774   delete binary;
    775 }
    776 
    777 size_t spv_strnlen_s(const char* str, size_t strsz) {
    778   if (!str) return 0;
    779   for (size_t i = 0; i < strsz; i++) {
    780     if (!str[i]) return i;
    781   }
    782   return strsz;
    783 }
    784