Home | History | Annotate | Download | only in compiler
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: kenton (at) google.com (Kenton Varda)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 //
     35 // Implements the Protocol Compiler front-end such that it may be reused by
     36 // custom compilers written to support other languages.
     37 
     38 #ifndef GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__
     39 #define GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__
     40 
     41 #include <google/protobuf/stubs/common.h>
     42 #include <google/protobuf/stubs/hash.h>
     43 #include <string>
     44 #include <vector>
     45 #include <map>
     46 #include <set>
     47 #include <utility>
     48 
     49 namespace google {
     50 namespace protobuf {
     51 
     52 class Descriptor;            // descriptor.h
     53 class DescriptorPool;        // descriptor.h
     54 class FileDescriptor;        // descriptor.h
     55 class FileDescriptorProto;   // descriptor.pb.h
     56 template<typename T> class RepeatedPtrField;  // repeated_field.h
     57 
     58 namespace compiler {
     59 
     60 class CodeGenerator;        // code_generator.h
     61 class GeneratorContext;      // code_generator.h
     62 class DiskSourceTree;       // importer.h
     63 
     64 // This class implements the command-line interface to the protocol compiler.
     65 // It is designed to make it very easy to create a custom protocol compiler
     66 // supporting the languages of your choice.  For example, if you wanted to
     67 // create a custom protocol compiler binary which includes both the regular
     68 // C++ support plus support for your own custom output "Foo", you would
     69 // write a class "FooGenerator" which implements the CodeGenerator interface,
     70 // then write a main() procedure like this:
     71 //
     72 //   int main(int argc, char* argv[]) {
     73 //     google::protobuf::compiler::CommandLineInterface cli;
     74 //
     75 //     // Support generation of C++ source and headers.
     76 //     google::protobuf::compiler::cpp::CppGenerator cpp_generator;
     77 //     cli.RegisterGenerator("--cpp_out", &cpp_generator,
     78 //       "Generate C++ source and header.");
     79 //
     80 //     // Support generation of Foo code.
     81 //     FooGenerator foo_generator;
     82 //     cli.RegisterGenerator("--foo_out", &foo_generator,
     83 //       "Generate Foo file.");
     84 //
     85 //     return cli.Run(argc, argv);
     86 //   }
     87 //
     88 // The compiler is invoked with syntax like:
     89 //   protoc --cpp_out=outdir --foo_out=outdir --proto_path=src src/foo.proto
     90 //
     91 // For a full description of the command-line syntax, invoke it with --help.
     92 class LIBPROTOC_EXPORT CommandLineInterface {
     93  public:
     94   CommandLineInterface();
     95   ~CommandLineInterface();
     96 
     97   // Register a code generator for a language.
     98   //
     99   // Parameters:
    100   // * flag_name: The command-line flag used to specify an output file of
    101   //   this type.  The name must start with a '-'.  If the name is longer
    102   //   than one letter, it must start with two '-'s.
    103   // * generator: The CodeGenerator which will be called to generate files
    104   //   of this type.
    105   // * help_text: Text describing this flag in the --help output.
    106   //
    107   // Some generators accept extra parameters.  You can specify this parameter
    108   // on the command-line by placing it before the output directory, separated
    109   // by a colon:
    110   //   protoc --foo_out=enable_bar:outdir
    111   // The text before the colon is passed to CodeGenerator::Generate() as the
    112   // "parameter".
    113   void RegisterGenerator(const string& flag_name,
    114                          CodeGenerator* generator,
    115                          const string& help_text);
    116 
    117   // Register a code generator for a language.
    118   // Besides flag_name you can specify another option_flag_name that could be
    119   // used to pass extra parameters to the registered code generator.
    120   // Suppose you have registered a generator by calling:
    121   //   command_line_interface.RegisterGenerator("--foo_out", "--foo_opt", ...)
    122   // Then you could invoke the compiler with a command like:
    123   //   protoc --foo_out=enable_bar:outdir --foo_opt=enable_baz
    124   // This will pass "enable_bar,enable_baz" as the parameter to the generator.
    125   void RegisterGenerator(const string& flag_name,
    126                          const string& option_flag_name,
    127                          CodeGenerator* generator,
    128                          const string& help_text);
    129 
    130   // Enables "plugins".  In this mode, if a command-line flag ends with "_out"
    131   // but does not match any registered generator, the compiler will attempt to
    132   // find a "plugin" to implement the generator.  Plugins are just executables.
    133   // They should live somewhere in the PATH.
    134   //
    135   // The compiler determines the executable name to search for by concatenating
    136   // exe_name_prefix with the unrecognized flag name, removing "_out".  So, for
    137   // example, if exe_name_prefix is "protoc-" and you pass the flag --foo_out,
    138   // the compiler will try to run the program "protoc-foo".
    139   //
    140   // The plugin program should implement the following usage:
    141   //   plugin [--out=OUTDIR] [--parameter=PARAMETER] PROTO_FILES < DESCRIPTORS
    142   // --out indicates the output directory (as passed to the --foo_out
    143   // parameter); if omitted, the current directory should be used.  --parameter
    144   // gives the generator parameter, if any was provided.  The PROTO_FILES list
    145   // the .proto files which were given on the compiler command-line; these are
    146   // the files for which the plugin is expected to generate output code.
    147   // Finally, DESCRIPTORS is an encoded FileDescriptorSet (as defined in
    148   // descriptor.proto).  This is piped to the plugin's stdin.  The set will
    149   // include descriptors for all the files listed in PROTO_FILES as well as
    150   // all files that they import.  The plugin MUST NOT attempt to read the
    151   // PROTO_FILES directly -- it must use the FileDescriptorSet.
    152   //
    153   // The plugin should generate whatever files are necessary, as code generators
    154   // normally do.  It should write the names of all files it generates to
    155   // stdout.  The names should be relative to the output directory, NOT absolute
    156   // names or relative to the current directory.  If any errors occur, error
    157   // messages should be written to stderr.  If an error is fatal, the plugin
    158   // should exit with a non-zero exit code.
    159   void AllowPlugins(const string& exe_name_prefix);
    160 
    161   // Run the Protocol Compiler with the given command-line parameters.
    162   // Returns the error code which should be returned by main().
    163   //
    164   // It may not be safe to call Run() in a multi-threaded environment because
    165   // it calls strerror().  I'm not sure why you'd want to do this anyway.
    166   int Run(int argc, const char* const argv[]);
    167 
    168   // Call SetInputsAreCwdRelative(true) if the input files given on the command
    169   // line should be interpreted relative to the proto import path specified
    170   // using --proto_path or -I flags.  Otherwise, input file names will be
    171   // interpreted relative to the current working directory (or as absolute
    172   // paths if they start with '/'), though they must still reside inside
    173   // a directory given by --proto_path or the compiler will fail.  The latter
    174   // mode is generally more intuitive and easier to use, especially e.g. when
    175   // defining implicit rules in Makefiles.
    176   void SetInputsAreProtoPathRelative(bool enable) {
    177     inputs_are_proto_path_relative_ = enable;
    178   }
    179 
    180   // Provides some text which will be printed when the --version flag is
    181   // used.  The version of libprotoc will also be printed on the next line
    182   // after this text.
    183   void SetVersionInfo(const string& text) {
    184     version_info_ = text;
    185   }
    186 
    187 
    188  private:
    189   // -----------------------------------------------------------------
    190 
    191   class ErrorPrinter;
    192   class GeneratorContextImpl;
    193   class MemoryOutputStream;
    194   typedef hash_map<string, GeneratorContextImpl*> GeneratorContextMap;
    195 
    196   // Clear state from previous Run().
    197   void Clear();
    198 
    199   // Remaps each file in input_files_ so that it is relative to one of the
    200   // directories in proto_path_.  Returns false if an error occurred.  This
    201   // is only used if inputs_are_proto_path_relative_ is false.
    202   bool MakeInputsBeProtoPathRelative(
    203     DiskSourceTree* source_tree);
    204 
    205   // Return status for ParseArguments() and InterpretArgument().
    206   enum ParseArgumentStatus {
    207     PARSE_ARGUMENT_DONE_AND_CONTINUE,
    208     PARSE_ARGUMENT_DONE_AND_EXIT,
    209     PARSE_ARGUMENT_FAIL
    210   };
    211 
    212   // Parse all command-line arguments.
    213   ParseArgumentStatus ParseArguments(int argc, const char* const argv[]);
    214 
    215 
    216   // Parses a command-line argument into a name/value pair.  Returns
    217   // true if the next argument in the argv should be used as the value,
    218   // false otherwise.
    219   //
    220   // Examples:
    221   //   "-Isrc/protos" ->
    222   //     name = "-I", value = "src/protos"
    223   //   "--cpp_out=src/foo.pb2.cc" ->
    224   //     name = "--cpp_out", value = "src/foo.pb2.cc"
    225   //   "foo.proto" ->
    226   //     name = "", value = "foo.proto"
    227   bool ParseArgument(const char* arg, string* name, string* value);
    228 
    229   // Interprets arguments parsed with ParseArgument.
    230   ParseArgumentStatus InterpretArgument(const string& name,
    231                                         const string& value);
    232 
    233   // Print the --help text to stderr.
    234   void PrintHelpText();
    235 
    236   // Generate the given output file from the given input.
    237   struct OutputDirective;  // see below
    238   bool GenerateOutput(const vector<const FileDescriptor*>& parsed_files,
    239                       const OutputDirective& output_directive,
    240                       GeneratorContext* generator_context);
    241   bool GeneratePluginOutput(const vector<const FileDescriptor*>& parsed_files,
    242                             const string& plugin_name,
    243                             const string& parameter,
    244                             GeneratorContext* generator_context,
    245                             string* error);
    246 
    247   // Implements --encode and --decode.
    248   bool EncodeOrDecode(const DescriptorPool* pool);
    249 
    250   // Implements the --descriptor_set_out option.
    251   bool WriteDescriptorSet(const vector<const FileDescriptor*> parsed_files);
    252 
    253   // Implements the --dependency_out option
    254   bool GenerateDependencyManifestFile(
    255       const vector<const FileDescriptor*>& parsed_files,
    256       const GeneratorContextMap& output_directories,
    257       DiskSourceTree* source_tree);
    258 
    259   // Get all transitive dependencies of the given file (including the file
    260   // itself), adding them to the given list of FileDescriptorProtos.  The
    261   // protos will be ordered such that every file is listed before any file that
    262   // depends on it, so that you can call DescriptorPool::BuildFile() on them
    263   // in order.  Any files in *already_seen will not be added, and each file
    264   // added will be inserted into *already_seen.  If include_source_code_info is
    265   // true then include the source code information in the FileDescriptorProtos.
    266   // If include_json_name is true, populate the json_name field of
    267   // FieldDescriptorProto for all fields.
    268   static void GetTransitiveDependencies(
    269       const FileDescriptor* file,
    270       bool include_json_name,
    271       bool include_source_code_info,
    272       set<const FileDescriptor*>* already_seen,
    273       RepeatedPtrField<FileDescriptorProto>* output);
    274 
    275   // Implements the --print_free_field_numbers. This function prints free field
    276   // numbers into stdout for the message and it's nested message types in
    277   // post-order, i.e. nested types first. Printed range are left-right
    278   // inclusive, i.e. [a, b].
    279   //
    280   // Groups:
    281   // For historical reasons, groups are considered to share the same
    282   // field number space with the parent message, thus it will not print free
    283   // field numbers for groups. The field numbers used in the groups are
    284   // excluded in the free field numbers of the parent message.
    285   //
    286   // Extension Ranges:
    287   // Extension ranges are considered ocuppied field numbers and they will not be
    288   // listed as free numbers in the output.
    289   void PrintFreeFieldNumbers(const Descriptor* descriptor);
    290 
    291   // -----------------------------------------------------------------
    292 
    293   // The name of the executable as invoked (i.e. argv[0]).
    294   string executable_name_;
    295 
    296   // Version info set with SetVersionInfo().
    297   string version_info_;
    298 
    299   // Registered generators.
    300   struct GeneratorInfo {
    301     string flag_name;
    302     string option_flag_name;
    303     CodeGenerator* generator;
    304     string help_text;
    305   };
    306   typedef map<string, GeneratorInfo> GeneratorMap;
    307   GeneratorMap generators_by_flag_name_;
    308   GeneratorMap generators_by_option_name_;
    309   // A map from generator names to the parameters specified using the option
    310   // flag. For example, if the user invokes the compiler with:
    311   //   protoc --foo_out=outputdir --foo_opt=enable_bar ...
    312   // Then there will be an entry ("--foo_out", "enable_bar") in this map.
    313   map<string, string> generator_parameters_;
    314 
    315   // See AllowPlugins().  If this is empty, plugins aren't allowed.
    316   string plugin_prefix_;
    317 
    318   // Maps specific plugin names to files.  When executing a plugin, this map
    319   // is searched first to find the plugin executable.  If not found here, the
    320   // PATH (or other OS-specific search strategy) is searched.
    321   map<string, string> plugins_;
    322 
    323   // Stuff parsed from command line.
    324   enum Mode {
    325     MODE_COMPILE,  // Normal mode:  parse .proto files and compile them.
    326     MODE_ENCODE,   // --encode:  read text from stdin, write binary to stdout.
    327     MODE_DECODE,   // --decode:  read binary from stdin, write text to stdout.
    328     MODE_PRINT,    // Print mode: print info of the given .proto files and exit.
    329   };
    330 
    331   Mode mode_;
    332 
    333   enum PrintMode {
    334     PRINT_NONE,               // Not in MODE_PRINT
    335     PRINT_FREE_FIELDS,        // --print_free_fields
    336   };
    337 
    338   PrintMode print_mode_;
    339 
    340   enum ErrorFormat {
    341     ERROR_FORMAT_GCC,   // GCC error output format (default).
    342     ERROR_FORMAT_MSVS   // Visual Studio output (--error_format=msvs).
    343   };
    344 
    345   ErrorFormat error_format_;
    346 
    347   vector<pair<string, string> > proto_path_;  // Search path for proto files.
    348   vector<string> input_files_;                // Names of the input proto files.
    349 
    350   // output_directives_ lists all the files we are supposed to output and what
    351   // generator to use for each.
    352   struct OutputDirective {
    353     string name;                // E.g. "--foo_out"
    354     CodeGenerator* generator;   // NULL for plugins
    355     string parameter;
    356     string output_location;
    357   };
    358   vector<OutputDirective> output_directives_;
    359 
    360   // When using --encode or --decode, this names the type we are encoding or
    361   // decoding.  (Empty string indicates --decode_raw.)
    362   string codec_type_;
    363 
    364   // If --descriptor_set_out was given, this is the filename to which the
    365   // FileDescriptorSet should be written.  Otherwise, empty.
    366   string descriptor_set_name_;
    367 
    368   // If --dependency_out was given, this is the path to the file where the
    369   // dependency file will be written. Otherwise, empty.
    370   string dependency_out_name_;
    371 
    372   // True if --include_imports was given, meaning that we should
    373   // write all transitive dependencies to the DescriptorSet.  Otherwise, only
    374   // the .proto files listed on the command-line are added.
    375   bool imports_in_descriptor_set_;
    376 
    377   // True if --include_source_info was given, meaning that we should not strip
    378   // SourceCodeInfo from the DescriptorSet.
    379   bool source_info_in_descriptor_set_;
    380 
    381   // Was the --disallow_services flag used?
    382   bool disallow_services_;
    383 
    384   // See SetInputsAreProtoPathRelative().
    385   bool inputs_are_proto_path_relative_;
    386 
    387   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CommandLineInterface);
    388 };
    389 
    390 }  // namespace compiler
    391 }  // namespace protobuf
    392 
    393 }  // namespace google
    394 #endif  // GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__
    395