1 // Copyright (c) 2016 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <algorithm> 16 #include <cassert> 17 #include <cstring> 18 #include <fstream> 19 #include <iostream> 20 #include <memory> 21 #include <sstream> 22 #include <string> 23 #include <vector> 24 25 #include "source/opt/log.h" 26 #include "source/spirv_target_env.h" 27 #include "source/util/string_utils.h" 28 #include "spirv-tools/libspirv.hpp" 29 #include "spirv-tools/optimizer.hpp" 30 #include "tools/io.h" 31 #include "tools/util/cli_consumer.h" 32 33 namespace { 34 35 // Status and actions to perform after parsing command-line arguments. 36 enum OptActions { OPT_CONTINUE, OPT_STOP }; 37 38 struct OptStatus { 39 OptActions action; 40 int code; 41 }; 42 43 // Message consumer for this tool. Used to emit diagnostics during 44 // initialization and setup. Note that |source| and |position| are irrelevant 45 // here because we are still not processing a SPIR-V input file. 46 void opt_diagnostic(spv_message_level_t level, const char* /*source*/, 47 const spv_position_t& /*positon*/, const char* message) { 48 if (level == SPV_MSG_ERROR) { 49 fprintf(stderr, "error: "); 50 } 51 fprintf(stderr, "%s\n", message); 52 } 53 54 std::string GetListOfPassesAsString(const spvtools::Optimizer& optimizer) { 55 std::stringstream ss; 56 for (const auto& name : optimizer.GetPassNames()) { 57 ss << "\n\t\t" << name; 58 } 59 return ss.str(); 60 } 61 62 const auto kDefaultEnvironment = SPV_ENV_UNIVERSAL_1_3; 63 64 std::string GetLegalizationPasses() { 65 spvtools::Optimizer optimizer(kDefaultEnvironment); 66 optimizer.RegisterLegalizationPasses(); 67 return GetListOfPassesAsString(optimizer); 68 } 69 70 std::string GetOptimizationPasses() { 71 spvtools::Optimizer optimizer(kDefaultEnvironment); 72 optimizer.RegisterPerformancePasses(); 73 return GetListOfPassesAsString(optimizer); 74 } 75 76 std::string GetSizePasses() { 77 spvtools::Optimizer optimizer(kDefaultEnvironment); 78 optimizer.RegisterSizePasses(); 79 return GetListOfPassesAsString(optimizer); 80 } 81 82 std::string GetWebGPUPasses() { 83 spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0); 84 optimizer.RegisterWebGPUPasses(); 85 return GetListOfPassesAsString(optimizer); 86 } 87 88 void PrintUsage(const char* program) { 89 // NOTE: Please maintain flags in lexicographical order. 90 printf( 91 R"(%s - Optimize a SPIR-V binary file. 92 93 USAGE: %s [options] [<input>] -o <output> 94 95 The SPIR-V binary is read from <input>. If no file is specified, 96 or if <input> is "-", then the binary is read from standard input. 97 if <output> is "-", then the optimized output is written to 98 standard output. 99 100 NOTE: The optimizer is a work in progress. 101 102 Options (in lexicographical order): 103 --ccp 104 Apply the conditional constant propagation transform. This will 105 propagate constant values throughout the program, and simplify 106 expressions and conditional jumps with known predicate 107 values. Performed on entry point call tree functions and 108 exported functions. 109 --cfg-cleanup 110 Cleanup the control flow graph. This will remove any unnecessary 111 code from the CFG like unreachable code. Performed on entry 112 point call tree functions and exported functions. 113 --combine-access-chains 114 Combines chained access chains to produce a single instruction 115 where possible. 116 --compact-ids 117 Remap result ids to a compact range starting from %%1 and without 118 any gaps. 119 --convert-local-access-chains 120 Convert constant index access chain loads/stores into 121 equivalent load/stores with inserts and extracts. Performed 122 on function scope variables referenced only with load, store, 123 and constant index access chains in entry point call tree 124 functions. 125 --copy-propagate-arrays 126 Does propagation of memory references when an array is a copy of 127 another. It will only propagate an array if the source is never 128 written to, and the only store to the target is the copy. 129 --eliminate-common-uniform 130 Perform load/load elimination for duplicate uniform values. 131 Converts any constant index access chain uniform loads into 132 its equivalent load and extract. Some loads will be moved 133 to facilitate sharing. Performed only on entry point 134 call tree functions. 135 --eliminate-dead-branches 136 Convert conditional branches with constant condition to the 137 indicated unconditional brranch. Delete all resulting dead 138 code. Performed only on entry point call tree functions. 139 --eliminate-dead-code-aggressive 140 Delete instructions which do not contribute to a function's 141 output. Performed only on entry point call tree functions. 142 --eliminate-dead-const 143 Eliminate dead constants. 144 --eliminate-dead-functions 145 Deletes functions that cannot be reached from entry points or 146 exported functions. 147 --eliminate-dead-inserts 148 Deletes unreferenced inserts into composites, most notably 149 unused stores to vector components, that are not removed by 150 aggressive dead code elimination. 151 --eliminate-dead-variables 152 Deletes module scope variables that are not referenced. 153 --eliminate-insert-extract 154 DEPRECATED. This pass has been replaced by the simplification 155 pass, and that pass will be run instead. 156 See --simplify-instructions. 157 --eliminate-local-multi-store 158 Replace stores and loads of function scope variables that are 159 stored multiple times. Performed on variables referenceed only 160 with loads and stores. Performed only on entry point call tree 161 functions. 162 --eliminate-local-single-block 163 Perform single-block store/load and load/load elimination. 164 Performed only on function scope variables in entry point 165 call tree functions. 166 --eliminate-local-single-store 167 Replace stores and loads of function scope variables that are 168 only stored once. Performed on variables referenceed only with 169 loads and stores. Performed only on entry point call tree 170 functions. 171 --flatten-decorations 172 Replace decoration groups with repeated OpDecorate and 173 OpMemberDecorate instructions. 174 --fold-spec-const-op-composite 175 Fold the spec constants defined by OpSpecConstantOp or 176 OpSpecConstantComposite instructions to front-end constants 177 when possible. 178 --freeze-spec-const 179 Freeze the values of specialization constants to their default 180 values. 181 --if-conversion 182 Convert if-then-else like assignments into OpSelect. 183 --inline-entry-points-exhaustive 184 Exhaustively inline all function calls in entry point call tree 185 functions. Currently does not inline calls to functions with 186 early return in a loop. 187 --legalize-hlsl 188 Runs a series of optimizations that attempts to take SPIR-V 189 generated by an HLSL front-end and generates legal Vulkan SPIR-V. 190 The optimizations are: 191 %s 192 193 Note this does not guarantee legal code. This option passes the 194 option --relax-logical-pointer to the validator. 195 --local-redundancy-elimination 196 Looks for instructions in the same basic block that compute the 197 same value, and deletes the redundant ones. 198 --loop-fission 199 Splits any top level loops in which the register pressure has 200 exceeded a given threshold. The threshold must follow the use of 201 this flag and must be a positive integer value. 202 --loop-fusion 203 Identifies adjacent loops with the same lower and upper bound. 204 If this is legal, then merge the loops into a single loop. 205 Includes heuristics to ensure it does not increase number of 206 registers too much, while reducing the number of loads from 207 memory. Takes an additional positive integer argument to set 208 the maximum number of registers. 209 --loop-invariant-code-motion 210 Identifies code in loops that has the same value for every 211 iteration of the loop, and move it to the loop pre-header. 212 --loop-unroll 213 Fully unrolls loops marked with the Unroll flag 214 --loop-unroll-partial 215 Partially unrolls loops marked with the Unroll flag. Takes an 216 additional non-0 integer argument to set the unroll factor, or 217 how many times a loop body should be duplicated 218 --loop-peeling 219 Execute few first (respectively last) iterations before 220 (respectively after) the loop if it can elide some branches. 221 --loop-peeling-threshold 222 Takes a non-0 integer argument to set the loop peeling code size 223 growth threshold. The threshold prevents the loop peeling 224 from happening if the code size increase created by 225 the optimization is above the threshold. 226 --max-id-bound=<n> 227 Sets the maximum value for the id bound for the moudle. The 228 default is the minimum value for this limit, 0x3FFFFF. See 229 section 2.17 of the Spir-V specification. 230 --merge-blocks 231 Join two blocks into a single block if the second has the 232 first as its only predecessor. Performed only on entry point 233 call tree functions. 234 --merge-return 235 Changes functions that have multiple return statements so they 236 have a single return statement. 237 238 For structured control flow it is assumed that the only 239 unreachable blocks in the function are trivial merge and continue 240 blocks. 241 242 A trivial merge block contains the label and an OpUnreachable 243 instructions, nothing else. A trivial continue block contain a 244 label and an OpBranch to the header, nothing else. 245 246 These conditions are guaranteed to be met after running 247 dead-branch elimination. 248 --loop-unswitch 249 Hoists loop-invariant conditionals out of loops by duplicating 250 the loop on each branch of the conditional and adjusting each 251 copy of the loop. 252 -O 253 Optimize for performance. Apply a sequence of transformations 254 in an attempt to improve the performance of the generated 255 code. For this version of the optimizer, this flag is equivalent 256 to specifying the following optimization code names: 257 %s 258 -Os 259 Optimize for size. Apply a sequence of transformations in an 260 attempt to minimize the size of the generated code. For this 261 version of the optimizer, this flag is equivalent to specifying 262 the following optimization code names: 263 %s 264 265 NOTE: The specific transformations done by -O and -Os change 266 from release to release. 267 -Oconfig=<file> 268 Apply the sequence of transformations indicated in <file>. 269 This file contains a sequence of strings separated by whitespace 270 (tabs, newlines or blanks). Each string is one of the flags 271 accepted by spirv-opt. Optimizations will be applied in the 272 sequence they appear in the file. This is equivalent to 273 specifying all the flags on the command line. For example, 274 given the file opts.cfg with the content: 275 276 --inline-entry-points-exhaustive 277 --eliminate-dead-code-aggressive 278 279 The following two invocations to spirv-opt are equivalent: 280 281 $ spirv-opt -Oconfig=opts.cfg program.spv 282 283 $ spirv-opt --inline-entry-points-exhaustive \ 284 --eliminate-dead-code-aggressive program.spv 285 286 Lines starting with the character '#' in the configuration 287 file indicate a comment and will be ignored. 288 289 The -O, -Os, and -Oconfig flags act as macros. Using one of them 290 is equivalent to explicitly inserting the underlying flags at 291 that position in the command line. For example, the invocation 292 'spirv-opt --merge-blocks -O ...' applies the transformation 293 --merge-blocks followed by all the transformations implied by 294 -O. 295 --print-all 296 Print SPIR-V assembly to standard error output before each pass 297 and after the last pass. 298 --private-to-local 299 Change the scope of private variables that are used in a single 300 function to that function. 301 --reduce-load-size 302 Replaces loads of composite objects where not every component is 303 used by loads of just the elements that are used. 304 --redundancy-elimination 305 Looks for instructions in the same function that compute the 306 same value, and deletes the redundant ones. 307 --relax-struct-store 308 Allow store from one struct type to a different type with 309 compatible layout and members. This option is forwarded to the 310 validator. 311 --remove-duplicates 312 Removes duplicate types, decorations, capabilities and extension 313 instructions. 314 --replace-invalid-opcode 315 Replaces instructions whose opcode is valid for shader modules, 316 but not for the current shader stage. To have an effect, all 317 entry points must have the same execution model. 318 --ssa-rewrite 319 Replace loads and stores to function local variables with 320 operations on SSA IDs. 321 --scalar-replacement[=<n>] 322 Replace aggregate function scope variables that are only accessed 323 via their elements with new function variables representing each 324 element. <n> is a limit on the size of the aggragates that will 325 be replaced. 0 means there is no limit. The default value is 326 100. 327 --set-spec-const-default-value "<spec id>:<default value> ..." 328 Set the default values of the specialization constants with 329 <spec id>:<default value> pairs specified in a double-quoted 330 string. <spec id>:<default value> pairs must be separated by 331 blank spaces, and in each pair, spec id and default value must 332 be separated with colon ':' without any blank spaces in between. 333 e.g.: --set-spec-const-default-value "1:100 2:400" 334 --simplify-instructions 335 Will simplify all instructions in the function as much as 336 possible. 337 --skip-validation 338 Will not validate the SPIR-V before optimizing. If the SPIR-V 339 is invalid, the optimizer may fail or generate incorrect code. 340 This options should be used rarely, and with caution. 341 --strength-reduction 342 Replaces instructions with equivalent and less expensive ones. 343 --strip-debug 344 Remove all debug instructions. 345 --strip-reflect 346 Remove all reflection information. For now, this covers 347 reflection information defined by SPV_GOOGLE_hlsl_functionality1. 348 --target-env=<env> 349 Set the target environment. Without this flag the target 350 enviroment defaults to spv1.3. 351 <env> must be one of vulkan1.0, vulkan1.1, opencl2.2, spv1.0, 352 spv1.1, spv1.2, spv1.3, or webgpu0. 353 --time-report 354 Print the resource utilization of each pass (e.g., CPU time, 355 RSS) to standard error output. Currently it supports only Unix 356 systems. This option is the same as -ftime-report in GCC. It 357 prints CPU/WALL/USR/SYS time (and RSS if possible), but note that 358 USR/SYS time are returned by getrusage() and can have a small 359 error. 360 --upgrade-memory-model 361 Upgrades the Logical GLSL450 memory model to Logical VulkanKHR. 362 Transforms memory, image, atomic and barrier operations to conform 363 to that model's requirements. 364 --vector-dce 365 This pass looks for components of vectors that are unused, and 366 removes them from the vector. Note this would still leave around 367 lots of dead code that a pass of ADCE will be able to remove. 368 --webgpu-mode 369 Turns on the prescribed passes for WebGPU and sets the target 370 environmet to webgpu0. Other passes may be turned on via 371 additional flags, but such combinations are not tested. 372 Using --target-env with this flag is not allowed. 373 374 This flag is the equivalent of passing in --target-env=webgpu0 375 and specifying the following optimization code names: 376 %s 377 378 NOTE: This flag is a WIP and its behaviour is subject to change. 379 --workaround-1209 380 Rewrites instructions for which there are known driver bugs to 381 avoid triggering those bugs. 382 Current workarounds: Avoid OpUnreachable in loops. 383 --unify-const 384 Remove the duplicated constants. 385 -h, --help 386 Print this help. 387 --version 388 Display optimizer version information. 389 )", 390 program, program, GetLegalizationPasses().c_str(), 391 GetOptimizationPasses().c_str(), GetSizePasses().c_str(), 392 GetWebGPUPasses().c_str()); 393 } 394 395 // Reads command-line flags the file specified in |oconfig_flag|. This string 396 // is assumed to have the form "-Oconfig=FILENAME". This function parses the 397 // string and extracts the file name after the '=' sign. 398 // 399 // Flags found in |FILENAME| are pushed at the end of the vector |file_flags|. 400 // 401 // This function returns true on success, false on failure. 402 bool ReadFlagsFromFile(const char* oconfig_flag, 403 std::vector<std::string>* file_flags) { 404 const char* fname = strchr(oconfig_flag, '='); 405 if (fname == nullptr || fname[0] != '=') { 406 spvtools::Errorf(opt_diagnostic, nullptr, {}, "Invalid -Oconfig flag %s", 407 oconfig_flag); 408 return false; 409 } 410 fname++; 411 412 std::ifstream input_file; 413 input_file.open(fname); 414 if (input_file.fail()) { 415 spvtools::Errorf(opt_diagnostic, nullptr, {}, "Could not open file '%s'", 416 fname); 417 return false; 418 } 419 420 std::string line; 421 while (std::getline(input_file, line)) { 422 // Ignore empty lines and lines starting with the comment marker '#'. 423 if (line.length() == 0 || line[0] == '#') { 424 continue; 425 } 426 427 // Tokenize the line. Add all found tokens to the list of found flags. This 428 // mimics the way the shell will parse whitespace on the command line. NOTE: 429 // This does not support quoting and it is not intended to. 430 std::istringstream iss(line); 431 while (!iss.eof()) { 432 std::string flag; 433 iss >> flag; 434 file_flags->push_back(flag); 435 } 436 } 437 438 return true; 439 } 440 441 OptStatus ParseFlags(int argc, const char** argv, 442 spvtools::Optimizer* optimizer, const char** in_file, 443 const char** out_file, 444 spvtools::ValidatorOptions* validator_options, 445 spvtools::OptimizerOptions* optimizer_options); 446 447 // Parses and handles the -Oconfig flag. |prog_name| contains the name of 448 // the spirv-opt binary (used to build a new argv vector for the recursive 449 // invocation to ParseFlags). |opt_flag| contains the -Oconfig=FILENAME flag. 450 // |optimizer|, |in_file|, |out_file|, |validator_options|, and 451 // |optimizer_options| are as in ParseFlags. 452 // 453 // This returns the same OptStatus instance returned by ParseFlags. 454 OptStatus ParseOconfigFlag(const char* prog_name, const char* opt_flag, 455 spvtools::Optimizer* optimizer, const char** in_file, 456 const char** out_file, 457 spvtools::ValidatorOptions* validator_options, 458 spvtools::OptimizerOptions* optimizer_options) { 459 std::vector<std::string> flags; 460 flags.push_back(prog_name); 461 462 std::vector<std::string> file_flags; 463 if (!ReadFlagsFromFile(opt_flag, &file_flags)) { 464 spvtools::Error(opt_diagnostic, nullptr, {}, 465 "Could not read optimizer flags from configuration file"); 466 return {OPT_STOP, 1}; 467 } 468 flags.insert(flags.end(), file_flags.begin(), file_flags.end()); 469 470 const char** new_argv = new const char*[flags.size()]; 471 for (size_t i = 0; i < flags.size(); i++) { 472 if (flags[i].find("-Oconfig=") != std::string::npos) { 473 spvtools::Error( 474 opt_diagnostic, nullptr, {}, 475 "Flag -Oconfig= may not be used inside the configuration file"); 476 return {OPT_STOP, 1}; 477 } 478 new_argv[i] = flags[i].c_str(); 479 } 480 481 auto ret_val = 482 ParseFlags(static_cast<int>(flags.size()), new_argv, optimizer, in_file, 483 out_file, validator_options, optimizer_options); 484 delete[] new_argv; 485 return ret_val; 486 } 487 488 // Canonicalize the flag in |argv[argi]| of the form '--pass arg' into 489 // '--pass=arg'. The optimizer only accepts arguments to pass names that use the 490 // form '--pass_name=arg'. Since spirv-opt also accepts the other form, this 491 // function makes the necessary conversion. 492 // 493 // Pass flags that require additional arguments should be handled here. Note 494 // that additional arguments should be given as a single string. If the flag 495 // requires more than one argument, the pass creator in 496 // Optimizer::GetPassFromFlag() should parse it accordingly (e.g., see the 497 // handler for --set-spec-const-default-value). 498 // 499 // If the argument requests one of the passes that need an additional argument, 500 // |argi| is modified to point past the current argument, and the string 501 // "argv[argi]=argv[argi + 1]" is returned. Otherwise, |argi| is unmodified and 502 // the string "|argv[argi]|" is returned. 503 std::string CanonicalizeFlag(const char** argv, int argc, int* argi) { 504 const char* cur_arg = argv[*argi]; 505 const char* next_arg = (*argi + 1 < argc) ? argv[*argi + 1] : nullptr; 506 std::ostringstream canonical_arg; 507 canonical_arg << cur_arg; 508 509 // NOTE: DO NOT ADD NEW FLAGS HERE. 510 // 511 // These flags are supported for backwards compatibility. When adding new 512 // passes that need extra arguments in its command-line flag, please make them 513 // use the syntax "--pass_name[=pass_arg]. 514 if (0 == strcmp(cur_arg, "--set-spec-const-default-value") || 515 0 == strcmp(cur_arg, "--loop-fission") || 516 0 == strcmp(cur_arg, "--loop-fusion") || 517 0 == strcmp(cur_arg, "--loop-unroll-partial") || 518 0 == strcmp(cur_arg, "--loop-peeling-threshold")) { 519 if (next_arg) { 520 canonical_arg << "=" << next_arg; 521 ++(*argi); 522 } 523 } 524 525 return canonical_arg.str(); 526 } 527 528 // Parses command-line flags. |argc| contains the number of command-line flags. 529 // |argv| points to an array of strings holding the flags. |optimizer| is the 530 // Optimizer instance used to optimize the program. 531 // 532 // On return, this function stores the name of the input program in |in_file|. 533 // The name of the output file in |out_file|. The return value indicates whether 534 // optimization should continue and a status code indicating an error or 535 // success. 536 OptStatus ParseFlags(int argc, const char** argv, 537 spvtools::Optimizer* optimizer, const char** in_file, 538 const char** out_file, 539 spvtools::ValidatorOptions* validator_options, 540 spvtools::OptimizerOptions* optimizer_options) { 541 std::vector<std::string> pass_flags; 542 bool target_env_set = false; 543 bool webgpu_mode_set = false; 544 for (int argi = 1; argi < argc; ++argi) { 545 const char* cur_arg = argv[argi]; 546 if ('-' == cur_arg[0]) { 547 if (0 == strcmp(cur_arg, "--version")) { 548 spvtools::Logf(opt_diagnostic, SPV_MSG_INFO, nullptr, {}, "%s\n", 549 spvSoftwareVersionDetailsString()); 550 return {OPT_STOP, 0}; 551 } else if (0 == strcmp(cur_arg, "--help") || 0 == strcmp(cur_arg, "-h")) { 552 PrintUsage(argv[0]); 553 return {OPT_STOP, 0}; 554 } else if (0 == strcmp(cur_arg, "-o")) { 555 if (!*out_file && argi + 1 < argc) { 556 *out_file = argv[++argi]; 557 } else { 558 PrintUsage(argv[0]); 559 return {OPT_STOP, 1}; 560 } 561 } else if ('\0' == cur_arg[1]) { 562 // Setting a filename of "-" to indicate stdin. 563 if (!*in_file) { 564 *in_file = cur_arg; 565 } else { 566 spvtools::Error(opt_diagnostic, nullptr, {}, 567 "More than one input file specified"); 568 return {OPT_STOP, 1}; 569 } 570 } else if (0 == strncmp(cur_arg, "-Oconfig=", sizeof("-Oconfig=") - 1)) { 571 OptStatus status = 572 ParseOconfigFlag(argv[0], cur_arg, optimizer, in_file, out_file, 573 validator_options, optimizer_options); 574 if (status.action != OPT_CONTINUE) { 575 return status; 576 } 577 } else if (0 == strcmp(cur_arg, "--skip-validation")) { 578 optimizer_options->set_run_validator(false); 579 } else if (0 == strcmp(cur_arg, "--print-all")) { 580 optimizer->SetPrintAll(&std::cerr); 581 } else if (0 == strcmp(cur_arg, "--time-report")) { 582 optimizer->SetTimeReport(&std::cerr); 583 } else if (0 == strcmp(cur_arg, "--relax-struct-store")) { 584 validator_options->SetRelaxStructStore(true); 585 } else if (0 == strncmp(cur_arg, "--max-id-bound=", 586 sizeof("--max-id-bound=") - 1)) { 587 auto split_flag = spvtools::utils::SplitFlagArgs(cur_arg); 588 // Will not allow values in the range [2^31,2^32). 589 uint32_t max_id_bound = 590 static_cast<uint32_t>(atoi(split_flag.second.c_str())); 591 592 // That SPIR-V mandates the minimum value for max id bound but 593 // implementations may allow higher minimum bounds. 594 if (max_id_bound < kDefaultMaxIdBound) { 595 spvtools::Error(opt_diagnostic, nullptr, {}, 596 "The max id bound must be at least 0x3FFFFF"); 597 return {OPT_STOP, 1}; 598 } 599 optimizer_options->set_max_id_bound(max_id_bound); 600 validator_options->SetUniversalLimit(spv_validator_limit_max_id_bound, 601 max_id_bound); 602 } else if (0 == strncmp(cur_arg, 603 "--target-env=", sizeof("--target-env=") - 1)) { 604 if (webgpu_mode_set) { 605 spvtools::Error(opt_diagnostic, nullptr, {}, 606 "Cannot use both --webgpu-mode and --target-env at " 607 "the same time"); 608 return {OPT_STOP, 1}; 609 } 610 const auto split_flag = spvtools::utils::SplitFlagArgs(cur_arg); 611 const auto target_env_str = split_flag.second.c_str(); 612 spv_target_env target_env; 613 if (!spvParseTargetEnv(target_env_str, &target_env)) { 614 spvtools::Error(opt_diagnostic, nullptr, {}, 615 "Invalid value passed to --target-env"); 616 return {OPT_STOP, 1}; 617 } 618 optimizer->SetTargetEnv(target_env); 619 } else if (0 == strcmp(cur_arg, "--webgpu-mode")) { 620 if (target_env_set) { 621 spvtools::Error(opt_diagnostic, nullptr, {}, 622 "Cannot use both --webgpu-mode and --target-env at " 623 "the same time"); 624 return {OPT_STOP, 1}; 625 } 626 627 optimizer->SetTargetEnv(SPV_ENV_WEBGPU_0); 628 optimizer->RegisterWebGPUPasses(); 629 } else { 630 // Some passes used to accept the form '--pass arg', canonicalize them 631 // to '--pass=arg'. 632 pass_flags.push_back(CanonicalizeFlag(argv, argc, &argi)); 633 634 // If we were requested to legalize SPIR-V generated from the HLSL 635 // front-end, skip validation. 636 if (0 == strcmp(cur_arg, "--legalize-hlsl")) { 637 validator_options->SetRelaxLogicalPointer(true); 638 } 639 } 640 } else { 641 if (!*in_file) { 642 *in_file = cur_arg; 643 } else { 644 spvtools::Error(opt_diagnostic, nullptr, {}, 645 "More than one input file specified"); 646 return {OPT_STOP, 1}; 647 } 648 } 649 } 650 651 if (!optimizer->RegisterPassesFromFlags(pass_flags)) { 652 return {OPT_STOP, 1}; 653 } 654 655 return {OPT_CONTINUE, 0}; 656 } 657 658 } // namespace 659 660 int main(int argc, const char** argv) { 661 const char* in_file = nullptr; 662 const char* out_file = nullptr; 663 664 spv_target_env target_env = kDefaultEnvironment; 665 666 spvtools::Optimizer optimizer(target_env); 667 optimizer.SetMessageConsumer(spvtools::utils::CLIMessageConsumer); 668 669 spvtools::ValidatorOptions validator_options; 670 spvtools::OptimizerOptions optimizer_options; 671 OptStatus status = ParseFlags(argc, argv, &optimizer, &in_file, &out_file, 672 &validator_options, &optimizer_options); 673 optimizer_options.set_validator_options(validator_options); 674 675 if (status.action == OPT_STOP) { 676 return status.code; 677 } 678 679 if (out_file == nullptr) { 680 spvtools::Error(opt_diagnostic, nullptr, {}, "-o required"); 681 return 1; 682 } 683 684 std::vector<uint32_t> binary; 685 if (!ReadFile<uint32_t>(in_file, "rb", &binary)) { 686 return 1; 687 } 688 689 // By using the same vector as input and output, we save time in the case 690 // that there was no change. 691 bool ok = 692 optimizer.Run(binary.data(), binary.size(), &binary, optimizer_options); 693 694 if (!WriteFile<uint32_t>(out_file, "wb", binary.data(), binary.size())) { 695 return 1; 696 } 697 698 return ok ? 0 : 1; 699 } 700