1 // Copyright (c) 2015-2016 The Khronos Group Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a 4 // copy of this software and/or associated documentation files (the 5 // "Materials"), to deal in the Materials without restriction, including 6 // without limitation the rights to use, copy, modify, merge, publish, 7 // distribute, sublicense, and/or sell copies of the Materials, and to 8 // permit persons to whom the Materials are furnished to do so, subject to 9 // the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included 12 // in all copies or substantial portions of the Materials. 13 // 14 // MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS 15 // KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS 16 // SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT 17 // https://www.khronos.org/registry/ 18 // 19 // THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 // MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 26 27 #include "binary.h" 28 29 #include <algorithm> 30 #include <cassert> 31 #include <cstring> 32 #include <iterator> 33 #include <limits> 34 #include <unordered_map> 35 36 #include "assembly_grammar.h" 37 #include "diagnostic.h" 38 #include "ext_inst.h" 39 #include "opcode.h" 40 #include "operand.h" 41 #include "spirv-tools/libspirv.h" 42 #include "spirv_constant.h" 43 #include "spirv_endian.h" 44 45 spv_result_t spvBinaryHeaderGet(const spv_const_binary binary, 46 const spv_endianness_t endian, 47 spv_header_t* pHeader) { 48 if (!binary->code) return SPV_ERROR_INVALID_BINARY; 49 if (binary->wordCount < SPV_INDEX_INSTRUCTION) 50 return SPV_ERROR_INVALID_BINARY; 51 if (!pHeader) return SPV_ERROR_INVALID_POINTER; 52 53 // TODO: Validation checking? 54 pHeader->magic = spvFixWord(binary->code[SPV_INDEX_MAGIC_NUMBER], endian); 55 pHeader->version = spvFixWord(binary->code[SPV_INDEX_VERSION_NUMBER], endian); 56 pHeader->generator = 57 spvFixWord(binary->code[SPV_INDEX_GENERATOR_NUMBER], endian); 58 pHeader->bound = spvFixWord(binary->code[SPV_INDEX_BOUND], endian); 59 pHeader->schema = spvFixWord(binary->code[SPV_INDEX_SCHEMA], endian); 60 pHeader->instructions = &binary->code[SPV_INDEX_INSTRUCTION]; 61 62 return SPV_SUCCESS; 63 } 64 65 namespace { 66 67 // A SPIR-V binary parser. A parser instance communicates detailed parse 68 // results via callbacks. 69 class Parser { 70 public: 71 // The user_data value is provided to the callbacks as context. 72 Parser(const spv_const_context context, void* user_data, 73 spv_parsed_header_fn_t parsed_header_fn, 74 spv_parsed_instruction_fn_t parsed_instruction_fn) 75 : grammar_(context), 76 user_data_(user_data), 77 parsed_header_fn_(parsed_header_fn), 78 parsed_instruction_fn_(parsed_instruction_fn) {} 79 80 // Parses the specified binary SPIR-V module, issuing callbacks on a parsed 81 // header and for each parsed instruction. Returns SPV_SUCCESS on success. 82 // Otherwise returns an error code and issues a diagnostic. 83 spv_result_t parse(const uint32_t* words, size_t num_words, 84 spv_diagnostic* diagnostic); 85 86 private: 87 // All remaining methods work on the current module parse state. 88 89 // Like the parse method, but works on the current module parse state. 90 spv_result_t parseModule(); 91 92 // Parses an instruction at the current position of the binary. Assumes 93 // the header has been parsed, the endian has been set, and the word index is 94 // still in range. Advances the parsing position past the instruction, and 95 // updates other parsing state for the current module. 96 // On success, returns SPV_SUCCESS and issues the parsed-instruction callback. 97 // On failure, returns an error code and issues a diagnostic. 98 spv_result_t parseInstruction(); 99 100 // Parses an instruction operand with the given type, for an instruction 101 // starting at inst_offset words into the SPIR-V binary. 102 // If the SPIR-V binary is the same endianness as the host, then the 103 // endian_converted_inst_words parameter is ignored. Otherwise, this method 104 // appends the words for this operand, converted to host native endianness, 105 // to the end of endian_converted_inst_words. This method also updates the 106 // expected_operands parameter, and the scalar members of the inst parameter. 107 // On success, returns SPV_SUCCESS, advances past the operand, and pushes a 108 // new entry on to the operands vector. Otherwise returns an error code and 109 // issues a diagnostic. 110 spv_result_t parseOperand(size_t inst_offset, spv_parsed_instruction_t* inst, 111 const spv_operand_type_t type, 112 std::vector<uint32_t>* endian_converted_inst_words, 113 std::vector<spv_parsed_operand_t>* operands, 114 spv_operand_pattern_t* expected_operands); 115 116 // Records the numeric type for an operand according to the type information 117 // associated with the given non-zero type Id. This can fail if the type Id 118 // is not a type Id, or if the type Id does not reference a scalar numeric 119 // type. On success, return SPV_SUCCESS and populates the num_words, 120 // number_kind, and number_bit_width fields of parsed_operand. 121 spv_result_t setNumericTypeInfoForType(spv_parsed_operand_t* parsed_operand, 122 uint32_t type_id); 123 124 // Records the number type for an instruction at the given offset, if that 125 // instruction generates a type. For types that aren't scalar numbers, 126 // record something with number kind SPV_NUMBER_NONE. 127 void recordNumberType(size_t inst_offset, 128 const spv_parsed_instruction_t* inst); 129 130 // Returns a diagnostic stream object initialized with current position in 131 // the input stream, and for the given error code. Any data written to the 132 // returned object will be propagated to the current parse's diagnostic 133 // object. 134 libspirv::DiagnosticStream diagnostic(spv_result_t error) { 135 return libspirv::DiagnosticStream({0, 0, _.word_index}, _.diagnostic, 136 error); 137 } 138 139 // Returns a diagnostic stream object with the default parse error code. 140 libspirv::DiagnosticStream diagnostic() { 141 // The default failure for parsing is invalid binary. 142 return diagnostic(SPV_ERROR_INVALID_BINARY); 143 } 144 145 // Issues a diagnostic describing an exhaustion of input condition when 146 // trying to decode an instruction operand, and returns 147 // SPV_ERROR_INVALID_BINARY. 148 spv_result_t exhaustedInputDiagnostic(size_t inst_offset, SpvOp opcode, 149 spv_operand_type_t type) { 150 return diagnostic() << "End of input reached while decoding Op" 151 << spvOpcodeString(opcode) << " starting at word " 152 << inst_offset 153 << ((_.word_index < _.num_words) ? ": truncated " 154 : ": missing ") 155 << spvOperandTypeStr(type) << " operand at word offset " 156 << _.word_index - inst_offset << "."; 157 } 158 159 // Returns the endian-corrected word at the current position. 160 uint32_t peek() const { return peekAt(_.word_index); } 161 162 // Returns the endian-corrected word at the given position. 163 uint32_t peekAt(size_t index) const { 164 assert(index < _.num_words); 165 return spvFixWord(_.words[index], _.endian); 166 } 167 168 // Data members 169 170 const libspirv::AssemblyGrammar grammar_; // SPIR-V syntax utility. 171 void* const user_data_; // Context for the callbacks 172 const spv_parsed_header_fn_t parsed_header_fn_; // Parsed header callback 173 const spv_parsed_instruction_fn_t 174 parsed_instruction_fn_; // Parsed instruction callback 175 176 // Describes the format of a typed literal number. 177 struct NumberType { 178 spv_number_kind_t type; 179 uint32_t bit_width; 180 }; 181 182 // The state used to parse a single SPIR-V binary module. 183 struct State { 184 State(const uint32_t* words_arg, size_t num_words_arg, 185 spv_diagnostic* diagnostic_arg) 186 : words(words_arg), 187 num_words(num_words_arg), 188 diagnostic(diagnostic_arg), 189 word_index(0), 190 endian(), 191 requires_endian_conversion(false) {} 192 State() : State(0, 0, nullptr) {} 193 const uint32_t* words; // Words in the binary SPIR-V module. 194 size_t num_words; // Number of words in the module. 195 spv_diagnostic* diagnostic; // Where diagnostics go. 196 size_t word_index; // The current position in words. 197 spv_endianness_t endian; // The endianness of the binary. 198 // Is the SPIR-V binary in a different endiannes from the host native 199 // endianness? 200 bool requires_endian_conversion; 201 202 // Maps a result ID to its type ID. By convention: 203 // - a result ID that is a type definition maps to itself. 204 // - a result ID without a type maps to 0. (E.g. for OpLabel) 205 std::unordered_map<uint32_t, uint32_t> id_to_type_id; 206 // Maps a type ID to its number type description. 207 std::unordered_map<uint32_t, NumberType> type_id_to_number_type_info; 208 // Maps an ExtInstImport id to the extended instruction type. 209 std::unordered_map<uint32_t, spv_ext_inst_type_t> 210 import_id_to_ext_inst_type; 211 } _; 212 }; 213 214 spv_result_t Parser::parse(const uint32_t* words, size_t num_words, 215 spv_diagnostic* diagnostic_arg) { 216 _ = State(words, num_words, diagnostic_arg); 217 218 const spv_result_t result = parseModule(); 219 220 // Clear the module state. The tables might be big. 221 _ = State(); 222 223 return result; 224 } 225 226 spv_result_t Parser::parseModule() { 227 if (!_.words) return diagnostic() << "Missing module."; 228 229 if (_.num_words < SPV_INDEX_INSTRUCTION) 230 return diagnostic() << "Module has incomplete header: only " << _.num_words 231 << " words instead of " << SPV_INDEX_INSTRUCTION; 232 233 // Check the magic number and detect the module's endianness. 234 spv_const_binary_t binary{_.words, _.num_words}; 235 if (spvBinaryEndianness(&binary, &_.endian)) { 236 return diagnostic() << "Invalid SPIR-V magic number '" << std::hex 237 << _.words[0] << "'."; 238 } 239 _.requires_endian_conversion = !spvIsHostEndian(_.endian); 240 241 // Process the header. 242 spv_header_t header; 243 if (spvBinaryHeaderGet(&binary, _.endian, &header)) { 244 // It turns out there is no way to trigger this error since the only 245 // failure cases are already handled above, with better messages. 246 return diagnostic(SPV_ERROR_INTERNAL) 247 << "Internal error: unhandled header parse failure"; 248 } 249 if (parsed_header_fn_) { 250 if (auto error = parsed_header_fn_(user_data_, _.endian, header.magic, 251 header.version, header.generator, 252 header.bound, header.schema)) { 253 return error; 254 } 255 } 256 257 // Process the instructions. 258 _.word_index = SPV_INDEX_INSTRUCTION; 259 while (_.word_index < _.num_words) 260 if (auto error = parseInstruction()) return error; 261 262 // Running off the end should already have been reported earlier. 263 assert(_.word_index == _.num_words); 264 265 return SPV_SUCCESS; 266 } 267 268 spv_result_t Parser::parseInstruction() { 269 // The zero values for all members except for opcode are the 270 // correct initial values. 271 spv_parsed_instruction_t inst = {}; 272 273 const uint32_t first_word = peek(); 274 275 // TODO(dneto): If it's too expensive to construct the following "words" 276 // and "operands" vectors for each instruction, each instruction, then make 277 // them class data members instead, and clear them here. 278 279 // If the module's endianness is different from the host native endianness, 280 // then converted_words contains the the endian-translated words in the 281 // instruction. 282 std::vector<uint32_t> endian_converted_words = {first_word}; 283 if (_.requires_endian_conversion) { 284 // Most instructions have fewer than 25 words. 285 endian_converted_words.reserve(25); 286 } 287 288 // After a successful parse of the instruction, the inst.operands member 289 // will point to this vector's storage. 290 std::vector<spv_parsed_operand_t> operands; 291 // Most instructions have fewer than 25 logical operands. 292 operands.reserve(25); 293 294 assert(_.word_index < _.num_words); 295 // Decompose and check the first word. 296 uint16_t inst_word_count = 0; 297 spvOpcodeSplit(first_word, &inst_word_count, &inst.opcode); 298 if (inst_word_count < 1) { 299 return diagnostic() << "Invalid instruction word count: " 300 << inst_word_count; 301 } 302 spv_opcode_desc opcode_desc; 303 if (grammar_.lookupOpcode(static_cast<SpvOp>(inst.opcode), &opcode_desc)) 304 return diagnostic() << "Invalid opcode: " << inst.opcode; 305 306 // Advance past the opcode word. But remember the of the start 307 // of the instruction. 308 const size_t inst_offset = _.word_index; 309 _.word_index++; 310 311 // Maintains the ordered list of expected operand types. 312 // For many instructions we only need the {numTypes, operandTypes} 313 // entries in opcode_desc. However, sometimes we need to modify 314 // the list as we parse the operands. This occurs when an operand 315 // has its own logical operands (such as the LocalSize operand for 316 // ExecutionMode), or for extended instructions that may have their 317 // own operands depending on the selected extended instruction. 318 spv_operand_pattern_t expected_operands( 319 opcode_desc->operandTypes, 320 opcode_desc->operandTypes + opcode_desc->numTypes); 321 322 while (_.word_index < inst_offset + inst_word_count) { 323 const uint16_t inst_word_index = uint16_t(_.word_index - inst_offset); 324 if (expected_operands.empty()) { 325 return diagnostic() << "Invalid instruction Op" << opcode_desc->name 326 << " starting at word " << inst_offset 327 << ": expected no more operands after " 328 << inst_word_index 329 << " words, but stated word count is " 330 << inst_word_count << "."; 331 } 332 333 spv_operand_type_t type = spvTakeFirstMatchableOperand(&expected_operands); 334 335 if (auto error = 336 parseOperand(inst_offset, &inst, type, &endian_converted_words, 337 &operands, &expected_operands)) { 338 return error; 339 } 340 } 341 342 if (!expected_operands.empty() && 343 !spvOperandIsOptional(expected_operands.front())) { 344 return diagnostic() << "End of input reached while decoding Op" 345 << opcode_desc->name << " starting at word " 346 << inst_offset << ": expected more operands after " 347 << inst_word_count << " words."; 348 } 349 350 if ((inst_offset + inst_word_count) != _.word_index) { 351 return diagnostic() << "Invalid word count: Op" << opcode_desc->name 352 << " starting at word " << inst_offset 353 << " says it has " << inst_word_count 354 << " words, but found " << _.word_index - inst_offset 355 << " words instead."; 356 } 357 358 // Check the computed length of the endian-converted words vector against 359 // the declared number of words in the instruction. If endian conversion 360 // is required, then they should match. If no endian conversion was 361 // performed, then the vector only contains the initial opcode/word-count 362 // word. 363 assert(!_.requires_endian_conversion || 364 (inst_word_count == endian_converted_words.size())); 365 assert(_.requires_endian_conversion || (endian_converted_words.size() == 1)); 366 367 recordNumberType(inst_offset, &inst); 368 369 if (_.requires_endian_conversion) { 370 // We must wait until here to set this pointer, because the vector might 371 // have been be resized while we accumulated its elements. 372 inst.words = endian_converted_words.data(); 373 } else { 374 // If no conversion is required, then just point to the underlying binary. 375 // This saves time and space. 376 inst.words = _.words + inst_offset; 377 } 378 inst.num_words = inst_word_count; 379 380 // We must wait until here to set this pointer, because the vector might 381 // have been be resized while we accumulated its elements. 382 inst.operands = operands.data(); 383 inst.num_operands = uint16_t(operands.size()); 384 385 // Issue the callback. The callee should know that all the storage in inst 386 // is transient, and will disappear immediately afterward. 387 if (parsed_instruction_fn_) { 388 if (auto error = parsed_instruction_fn_(user_data_, &inst)) return error; 389 } 390 391 return SPV_SUCCESS; 392 } 393 394 spv_result_t Parser::parseOperand(size_t inst_offset, 395 spv_parsed_instruction_t* inst, 396 const spv_operand_type_t type, 397 std::vector<uint32_t>* words, 398 std::vector<spv_parsed_operand_t>* operands, 399 spv_operand_pattern_t* expected_operands) { 400 const SpvOp opcode = static_cast<SpvOp>(inst->opcode); 401 // We'll fill in this result as we go along. 402 spv_parsed_operand_t parsed_operand; 403 parsed_operand.offset = uint16_t(_.word_index - inst_offset); 404 // Most operands occupy one word. This might be be adjusted later. 405 parsed_operand.num_words = 1; 406 // The type argument is the one used by the grammar to parse the instruction. 407 // But it can exposes internal parser details such as whether an operand is 408 // optional or actually represents a variable-length sequence of operands. 409 // The resulting type should be adjusted to avoid those internal details. 410 // In most cases, the resulting operand type is the same as the grammar type. 411 parsed_operand.type = type; 412 413 // Assume non-numeric values. This will be updated for literal numbers. 414 parsed_operand.number_kind = SPV_NUMBER_NONE; 415 parsed_operand.number_bit_width = 0; 416 417 if (_.word_index >= _.num_words) 418 return exhaustedInputDiagnostic(inst_offset, opcode, type); 419 420 const uint32_t word = peek(); 421 422 // Do the words in this operand have to be converted to native endianness? 423 // True for all but literal strings. 424 bool convert_operand_endianness = true; 425 426 switch (type) { 427 case SPV_OPERAND_TYPE_TYPE_ID: 428 if (!word) 429 return diagnostic(SPV_ERROR_INVALID_ID) << "Error: Type Id is 0"; 430 inst->type_id = word; 431 break; 432 433 case SPV_OPERAND_TYPE_RESULT_ID: 434 if (!word) 435 return diagnostic(SPV_ERROR_INVALID_ID) << "Error: Result Id is 0"; 436 inst->result_id = word; 437 // Save the result ID to type ID mapping. 438 // In the grammar, type ID always appears before result ID. 439 if (_.id_to_type_id.find(inst->result_id) != _.id_to_type_id.end()) 440 return diagnostic(SPV_ERROR_INVALID_ID) << "Id " << inst->result_id 441 << " is defined more than once"; 442 // Record it. 443 // A regular value maps to its type. Some instructions (e.g. OpLabel) 444 // have no type Id, and will map to 0. The result Id for a 445 // type-generating instruction (e.g. OpTypeInt) maps to itself. 446 _.id_to_type_id[inst->result_id] = 447 spvOpcodeGeneratesType(opcode) ? inst->result_id : inst->type_id; 448 break; 449 450 case SPV_OPERAND_TYPE_ID: 451 case SPV_OPERAND_TYPE_OPTIONAL_ID: 452 if (!word) return diagnostic(SPV_ERROR_INVALID_ID) << "Id is 0"; 453 parsed_operand.type = SPV_OPERAND_TYPE_ID; 454 455 if (opcode == SpvOpExtInst && parsed_operand.offset == 3) { 456 // The current word is the extended instruction set Id. 457 // Set the extended instruction set type for the current instruction. 458 auto ext_inst_type_iter = _.import_id_to_ext_inst_type.find(word); 459 if (ext_inst_type_iter == _.import_id_to_ext_inst_type.end()) { 460 return diagnostic(SPV_ERROR_INVALID_ID) 461 << "OpExtInst set Id " << word 462 << " does not reference an OpExtInstImport result Id"; 463 } 464 inst->ext_inst_type = ext_inst_type_iter->second; 465 } 466 break; 467 468 case SPV_OPERAND_TYPE_SCOPE_ID: 469 case SPV_OPERAND_TYPE_MEMORY_SEMANTICS_ID: 470 // Check for trivially invalid values. The operand descriptions already 471 // have the word "ID" in them. 472 if (!word) return diagnostic() << spvOperandTypeStr(type) << " is 0"; 473 break; 474 475 case SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER: { 476 assert(SpvOpExtInst == opcode); 477 assert(inst->ext_inst_type != SPV_EXT_INST_TYPE_NONE); 478 spv_ext_inst_desc ext_inst; 479 if (grammar_.lookupExtInst(inst->ext_inst_type, word, &ext_inst)) 480 return diagnostic() << "Invalid extended instruction number: " << word; 481 spvPrependOperandTypes(ext_inst->operandTypes, expected_operands); 482 } break; 483 484 case SPV_OPERAND_TYPE_SPEC_CONSTANT_OP_NUMBER: { 485 assert(SpvOpSpecConstantOp == opcode); 486 if (grammar_.lookupSpecConstantOpcode(SpvOp(word))) { 487 return diagnostic() << "Invalid " << spvOperandTypeStr(type) << ": " 488 << word; 489 } 490 spv_opcode_desc opcode_entry = nullptr; 491 if (grammar_.lookupOpcode(SpvOp(word), &opcode_entry)) { 492 return diagnostic(SPV_ERROR_INTERNAL) 493 << "OpSpecConstant opcode table out of sync"; 494 } 495 // OpSpecConstant opcodes must have a type and result. We've already 496 // processed them, so skip them when preparing to parse the other 497 // operants for the opcode. 498 assert(opcode_entry->hasType); 499 assert(opcode_entry->hasResult); 500 assert(opcode_entry->numTypes >= 2); 501 spvPrependOperandTypes(opcode_entry->operandTypes + 2, expected_operands); 502 } break; 503 504 case SPV_OPERAND_TYPE_LITERAL_INTEGER: 505 case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_INTEGER: 506 // These are regular single-word literal integer operands. 507 // Post-parsing validation should check the range of the parsed value. 508 parsed_operand.type = SPV_OPERAND_TYPE_LITERAL_INTEGER; 509 // It turns out they are always unsigned integers! 510 parsed_operand.number_kind = SPV_NUMBER_UNSIGNED_INT; 511 parsed_operand.number_bit_width = 32; 512 break; 513 514 case SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER: 515 case SPV_OPERAND_TYPE_OPTIONAL_TYPED_LITERAL_INTEGER: 516 parsed_operand.type = SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER; 517 if (opcode == SpvOpSwitch) { 518 // The literal operands have the same type as the value 519 // referenced by the selector Id. 520 const uint32_t selector_id = peekAt(inst_offset + 1); 521 const auto type_id_iter = _.id_to_type_id.find(selector_id); 522 if (type_id_iter == _.id_to_type_id.end() || 523 type_id_iter->second == 0) { 524 return diagnostic() << "Invalid OpSwitch: selector id " << selector_id 525 << " has no type"; 526 } 527 uint32_t type_id = type_id_iter->second; 528 529 if (selector_id == type_id) { 530 // Recall that by convention, a result ID that is a type definition 531 // maps to itself. 532 return diagnostic() << "Invalid OpSwitch: selector id " << selector_id 533 << " is a type, not a value"; 534 } 535 if (auto error = setNumericTypeInfoForType(&parsed_operand, type_id)) 536 return error; 537 if (parsed_operand.number_kind != SPV_NUMBER_UNSIGNED_INT && 538 parsed_operand.number_kind != SPV_NUMBER_SIGNED_INT) { 539 return diagnostic() << "Invalid OpSwitch: selector id " << selector_id 540 << " is not a scalar integer"; 541 } 542 } else { 543 assert(opcode == SpvOpConstant || opcode == SpvOpSpecConstant); 544 // The literal number type is determined by the type Id for the 545 // constant. 546 assert(inst->type_id); 547 if (auto error = 548 setNumericTypeInfoForType(&parsed_operand, inst->type_id)) 549 return error; 550 } 551 break; 552 553 case SPV_OPERAND_TYPE_LITERAL_STRING: 554 case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_STRING: { 555 convert_operand_endianness = false; 556 const char* string = 557 reinterpret_cast<const char*>(_.words + _.word_index); 558 // Compute the length of the string, but make sure we don't run off the 559 // end of the input. 560 const size_t remaining_input_bytes = 561 sizeof(uint32_t) * (_.num_words - _.word_index); 562 const size_t string_num_content_bytes = 563 strnlen(string, remaining_input_bytes); 564 // If there was no terminating null byte, then that's an end-of-input 565 // error. 566 if (string_num_content_bytes == remaining_input_bytes) 567 return exhaustedInputDiagnostic(inst_offset, opcode, type); 568 // Account for null in the word length, so add 1 for null, then add 3 to 569 // make sure we round up. The following is equivalent to: 570 // (string_num_content_bytes + 1 + 3) / 4 571 const size_t string_num_words = string_num_content_bytes / 4 + 1; 572 // Make sure we can record the word count without overflow. 573 // 574 // This error can't currently be triggered because of validity 575 // checks elsewhere. 576 if (string_num_words > std::numeric_limits<uint16_t>::max()) { 577 return diagnostic() << "Literal string is longer than " 578 << std::numeric_limits<uint16_t>::max() 579 << " words: " << string_num_words << " words long"; 580 } 581 parsed_operand.num_words = uint16_t(string_num_words); 582 parsed_operand.type = SPV_OPERAND_TYPE_LITERAL_STRING; 583 584 if (SpvOpExtInstImport == opcode) { 585 // Record the extended instruction type for the ID for this import. 586 // There is only one string literal argument to OpExtInstImport, 587 // so it's sufficient to guard this just on the opcode. 588 const spv_ext_inst_type_t ext_inst_type = 589 spvExtInstImportTypeGet(string); 590 if (SPV_EXT_INST_TYPE_NONE == ext_inst_type) { 591 return diagnostic() << "Invalid extended instruction import '" 592 << string << "'"; 593 } 594 // We must have parsed a valid result ID. It's a condition 595 // of the grammar, and we only accept non-zero result Ids. 596 assert(inst->result_id); 597 _.import_id_to_ext_inst_type[inst->result_id] = ext_inst_type; 598 } 599 } break; 600 601 case SPV_OPERAND_TYPE_CAPABILITY: 602 case SPV_OPERAND_TYPE_SOURCE_LANGUAGE: 603 case SPV_OPERAND_TYPE_EXECUTION_MODEL: 604 case SPV_OPERAND_TYPE_ADDRESSING_MODEL: 605 case SPV_OPERAND_TYPE_MEMORY_MODEL: 606 case SPV_OPERAND_TYPE_EXECUTION_MODE: 607 case SPV_OPERAND_TYPE_STORAGE_CLASS: 608 case SPV_OPERAND_TYPE_DIMENSIONALITY: 609 case SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE: 610 case SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE: 611 case SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT: 612 case SPV_OPERAND_TYPE_FP_ROUNDING_MODE: 613 case SPV_OPERAND_TYPE_LINKAGE_TYPE: 614 case SPV_OPERAND_TYPE_ACCESS_QUALIFIER: 615 case SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER: 616 case SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE: 617 case SPV_OPERAND_TYPE_DECORATION: 618 case SPV_OPERAND_TYPE_BUILT_IN: 619 case SPV_OPERAND_TYPE_GROUP_OPERATION: 620 case SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS: 621 case SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO: { 622 // A single word that is a plain enum value. 623 624 // Map an optional operand type to its corresponding concrete type. 625 if (type == SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER) 626 parsed_operand.type = SPV_OPERAND_TYPE_ACCESS_QUALIFIER; 627 628 spv_operand_desc entry; 629 if (grammar_.lookupOperand(type, word, &entry)) { 630 return diagnostic() << "Invalid " 631 << spvOperandTypeStr(parsed_operand.type) 632 << " operand: " << word; 633 } 634 // Prepare to accept operands to this operand, if needed. 635 spvPrependOperandTypes(entry->operandTypes, expected_operands); 636 } break; 637 638 case SPV_OPERAND_TYPE_FP_FAST_MATH_MODE: 639 case SPV_OPERAND_TYPE_FUNCTION_CONTROL: 640 case SPV_OPERAND_TYPE_LOOP_CONTROL: 641 case SPV_OPERAND_TYPE_IMAGE: 642 case SPV_OPERAND_TYPE_OPTIONAL_IMAGE: 643 case SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS: 644 case SPV_OPERAND_TYPE_SELECTION_CONTROL: { 645 // This operand is a mask. 646 647 // Map an optional operand type to its corresponding concrete type. 648 if (type == SPV_OPERAND_TYPE_OPTIONAL_IMAGE) 649 parsed_operand.type = SPV_OPERAND_TYPE_IMAGE; 650 else if (type == SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS) 651 parsed_operand.type = SPV_OPERAND_TYPE_MEMORY_ACCESS; 652 653 // Check validity of set mask bits. Also prepare for operands for those 654 // masks if they have any. To get operand order correct, scan from 655 // MSB to LSB since we can only prepend operands to a pattern. 656 // The only case in the grammar where you have more than one mask bit 657 // having an operand is for image operands. See SPIR-V 3.14 Image 658 // Operands. 659 uint32_t remaining_word = word; 660 for (uint32_t mask = (1u << 31); remaining_word; mask >>= 1) { 661 if (remaining_word & mask) { 662 spv_operand_desc entry; 663 if (grammar_.lookupOperand(type, mask, &entry)) { 664 return diagnostic() 665 << "Invalid " << spvOperandTypeStr(parsed_operand.type) 666 << " operand: " << word << " has invalid mask component " 667 << mask; 668 } 669 remaining_word ^= mask; 670 spvPrependOperandTypes(entry->operandTypes, expected_operands); 671 } 672 } 673 if (word == 0) { 674 // An all-zeroes mask *might* also be valid. 675 spv_operand_desc entry; 676 if (SPV_SUCCESS == grammar_.lookupOperand(type, 0, &entry)) { 677 // Prepare for its operands, if any. 678 spvPrependOperandTypes(entry->operandTypes, expected_operands); 679 } 680 } 681 } break; 682 default: 683 return diagnostic() << "Internal error: Unhandled operand type: " << type; 684 } 685 686 assert(int(SPV_OPERAND_TYPE_FIRST_CONCRETE_TYPE) <= int(parsed_operand.type)); 687 assert(int(SPV_OPERAND_TYPE_LAST_CONCRETE_TYPE) >= int(parsed_operand.type)); 688 689 operands->push_back(parsed_operand); 690 691 const size_t index_after_operand = _.word_index + parsed_operand.num_words; 692 693 // Avoid buffer overrun for the cases where the operand has more than one 694 // word, and where it isn't a string. (Those other cases have already been 695 // handled earlier.) For example, this error can occur for a multi-word 696 // argument to OpConstant, or a multi-word case literal operand for OpSwitch. 697 if (_.num_words < index_after_operand) 698 return exhaustedInputDiagnostic(inst_offset, opcode, type); 699 700 if (_.requires_endian_conversion) { 701 // Copy instruction words. Translate to native endianness as needed. 702 if (convert_operand_endianness) { 703 const spv_endianness_t endianness = _.endian; 704 std::transform(_.words + _.word_index, _.words + index_after_operand, 705 std::back_inserter(*words), 706 [endianness](const uint32_t raw_word) { 707 return spvFixWord(raw_word, endianness); 708 }); 709 } else { 710 words->insert(words->end(), _.words + _.word_index, 711 _.words + index_after_operand); 712 } 713 } 714 715 // Advance past the operand. 716 _.word_index = index_after_operand; 717 718 return SPV_SUCCESS; 719 } 720 721 spv_result_t Parser::setNumericTypeInfoForType( 722 spv_parsed_operand_t* parsed_operand, uint32_t type_id) { 723 assert(type_id != 0); 724 auto type_info_iter = _.type_id_to_number_type_info.find(type_id); 725 if (type_info_iter == _.type_id_to_number_type_info.end()) { 726 return diagnostic() << "Type Id " << type_id << " is not a type"; 727 } 728 const NumberType& info = type_info_iter->second; 729 if (info.type == SPV_NUMBER_NONE) { 730 // This is a valid type, but for something other than a scalar number. 731 return diagnostic() << "Type Id " << type_id 732 << " is not a scalar numeric type"; 733 } 734 735 parsed_operand->number_kind = info.type; 736 parsed_operand->number_bit_width = info.bit_width; 737 // Round up the word count. 738 parsed_operand->num_words = static_cast<uint16_t>((info.bit_width + 31) / 32); 739 return SPV_SUCCESS; 740 } 741 742 void Parser::recordNumberType(size_t inst_offset, 743 const spv_parsed_instruction_t* inst) { 744 const SpvOp opcode = static_cast<SpvOp>(inst->opcode); 745 if (spvOpcodeGeneratesType(opcode)) { 746 NumberType info = {SPV_NUMBER_NONE, 0}; 747 if (SpvOpTypeInt == opcode) { 748 const bool is_signed = peekAt(inst_offset + 3) != 0; 749 info.type = is_signed ? SPV_NUMBER_SIGNED_INT : SPV_NUMBER_UNSIGNED_INT; 750 info.bit_width = peekAt(inst_offset + 2); 751 } else if (SpvOpTypeFloat == opcode) { 752 info.type = SPV_NUMBER_FLOATING; 753 info.bit_width = peekAt(inst_offset + 2); 754 } 755 // The *result* Id of a type generating instruction is the type Id. 756 _.type_id_to_number_type_info[inst->result_id] = info; 757 } 758 } 759 760 } // anonymous namespace 761 762 spv_result_t spvBinaryParse(const spv_const_context context, void* user_data, 763 const uint32_t* code, const size_t num_words, 764 spv_parsed_header_fn_t parsed_header, 765 spv_parsed_instruction_fn_t parsed_instruction, 766 spv_diagnostic* diagnostic) { 767 Parser parser(context, user_data, parsed_header, parsed_instruction); 768 return parser.parse(code, num_words, diagnostic); 769 } 770 771 // TODO(dneto): This probably belongs in text.cpp since that's the only place 772 // that a spv_binary_t value is created. 773 void spvBinaryDestroy(spv_binary binary) { 774 if (!binary) return; 775 delete[] binary->code; 776 delete binary; 777 } 778