Home | History | Annotate | Download | only in python
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: robinson (at) google.com (Will Robinson)
     32 //
     33 // This module outputs pure-Python protocol message classes that will
     34 // largely be constructed at runtime via the metaclass in reflection.py.
     35 // In other words, our job is basically to output a Python equivalent
     36 // of the C++ *Descriptor objects, and fix up all circular references
     37 // within these objects.
     38 //
     39 // Note that the runtime performance of protocol message classes created in
     40 // this way is expected to be lousy.  The plan is to create an alternate
     41 // generator that outputs a Python/C extension module that lets
     42 // performance-minded Python code leverage the fast C++ implementation
     43 // directly.
     44 
     45 #include <limits>
     46 #include <map>
     47 #include <utility>
     48 #include <string>
     49 #include <vector>
     50 
     51 #include <google/protobuf/compiler/python/python_generator.h>
     52 #include <google/protobuf/descriptor.pb.h>
     53 
     54 #include <google/protobuf/stubs/common.h>
     55 #include <google/protobuf/io/printer.h>
     56 #include <google/protobuf/descriptor.h>
     57 #include <google/protobuf/io/zero_copy_stream.h>
     58 #include <google/protobuf/stubs/strutil.h>
     59 #include <google/protobuf/stubs/substitute.h>
     60 
     61 namespace google {
     62 namespace protobuf {
     63 namespace compiler {
     64 namespace python {
     65 
     66 namespace {
     67 
     68 // Returns a copy of |filename| with any trailing ".protodevel" or ".proto
     69 // suffix stripped.
     70 // TODO(robinson): Unify with copy in compiler/cpp/internal/helpers.cc.
     71 string StripProto(const string& filename) {
     72   const char* suffix = HasSuffixString(filename, ".protodevel")
     73       ? ".protodevel" : ".proto";
     74   return StripSuffixString(filename, suffix);
     75 }
     76 
     77 
     78 // Returns the Python module name expected for a given .proto filename.
     79 string ModuleName(const string& filename) {
     80   string basename = StripProto(filename);
     81   StripString(&basename, "-", '_');
     82   StripString(&basename, "/", '.');
     83   return basename + "_pb2";
     84 }
     85 
     86 
     87 // Returns the name of all containing types for descriptor,
     88 // in order from outermost to innermost, followed by descriptor's
     89 // own name.  Each name is separated by |separator|.
     90 template <typename DescriptorT>
     91 string NamePrefixedWithNestedTypes(const DescriptorT& descriptor,
     92                                    const string& separator) {
     93   string name = descriptor.name();
     94   for (const Descriptor* current = descriptor.containing_type();
     95        current != NULL; current = current->containing_type()) {
     96     name = current->name() + separator + name;
     97   }
     98   return name;
     99 }
    100 
    101 
    102 // Name of the class attribute where we store the Python
    103 // descriptor.Descriptor instance for the generated class.
    104 // Must stay consistent with the _DESCRIPTOR_KEY constant
    105 // in proto2/public/reflection.py.
    106 const char kDescriptorKey[] = "DESCRIPTOR";
    107 
    108 
    109 // Should we generate generic services for this file?
    110 inline bool HasGenericServices(const FileDescriptor *file) {
    111   return file->service_count() > 0 &&
    112          file->options().py_generic_services();
    113 }
    114 
    115 
    116 // Prints the common boilerplate needed at the top of every .py
    117 // file output by this generator.
    118 void PrintTopBoilerplate(
    119     io::Printer* printer, const FileDescriptor* file, bool descriptor_proto) {
    120   // TODO(robinson): Allow parameterization of Python version?
    121   printer->Print(
    122       "# Generated by the protocol buffer compiler.  DO NOT EDIT!\n"
    123       "\n"
    124       "from google.protobuf import descriptor\n"
    125       "from google.protobuf import message\n"
    126       "from google.protobuf import reflection\n");
    127   if (HasGenericServices(file)) {
    128     printer->Print(
    129         "from google.protobuf import service\n"
    130         "from google.protobuf import service_reflection\n");
    131   }
    132 
    133   // Avoid circular imports if this module is descriptor_pb2.
    134   if (!descriptor_proto) {
    135     printer->Print(
    136         "from google.protobuf import descriptor_pb2\n");
    137   }
    138   printer->Print(
    139     "# @@protoc_insertion_point(imports)\n");
    140   printer->Print("\n\n");
    141 }
    142 
    143 
    144 // Returns a Python literal giving the default value for a field.
    145 // If the field specifies no explicit default value, we'll return
    146 // the default default value for the field type (zero for numbers,
    147 // empty string for strings, empty list for repeated fields, and
    148 // None for non-repeated, composite fields).
    149 //
    150 // TODO(robinson): Unify with code from
    151 // //compiler/cpp/internal/primitive_field.cc
    152 // //compiler/cpp/internal/enum_field.cc
    153 // //compiler/cpp/internal/string_field.cc
    154 string StringifyDefaultValue(const FieldDescriptor& field) {
    155   if (field.is_repeated()) {
    156     return "[]";
    157   }
    158 
    159   switch (field.cpp_type()) {
    160     case FieldDescriptor::CPPTYPE_INT32:
    161       return SimpleItoa(field.default_value_int32());
    162     case FieldDescriptor::CPPTYPE_UINT32:
    163       return SimpleItoa(field.default_value_uint32());
    164     case FieldDescriptor::CPPTYPE_INT64:
    165       return SimpleItoa(field.default_value_int64());
    166     case FieldDescriptor::CPPTYPE_UINT64:
    167       return SimpleItoa(field.default_value_uint64());
    168     case FieldDescriptor::CPPTYPE_DOUBLE: {
    169       double value = field.default_value_double();
    170       if (value == numeric_limits<double>::infinity()) {
    171         // Python pre-2.6 on Windows does not parse "inf" correctly.  However,
    172         // a numeric literal that is too big for a double will become infinity.
    173         return "1e10000";
    174       } else if (value == -numeric_limits<double>::infinity()) {
    175         // See above.
    176         return "-1e10000";
    177       } else if (value != value) {
    178         // infinity * 0 = nan
    179         return "(1e10000 * 0)";
    180       } else {
    181         return SimpleDtoa(value);
    182       }
    183     }
    184     case FieldDescriptor::CPPTYPE_FLOAT: {
    185       float value = field.default_value_float();
    186       if (value == numeric_limits<float>::infinity()) {
    187         // Python pre-2.6 on Windows does not parse "inf" correctly.  However,
    188         // a numeric literal that is too big for a double will become infinity.
    189         return "1e10000";
    190       } else if (value == -numeric_limits<float>::infinity()) {
    191         // See above.
    192         return "-1e10000";
    193       } else if (value != value) {
    194         // infinity - infinity = nan
    195         return "(1e10000 * 0)";
    196       } else {
    197         return SimpleFtoa(value);
    198       }
    199     }
    200     case FieldDescriptor::CPPTYPE_BOOL:
    201       return field.default_value_bool() ? "True" : "False";
    202     case FieldDescriptor::CPPTYPE_ENUM:
    203       return SimpleItoa(field.default_value_enum()->number());
    204     case FieldDescriptor::CPPTYPE_STRING:
    205       if (field.type() == FieldDescriptor::TYPE_STRING) {
    206         return "unicode(\"" + CEscape(field.default_value_string()) +
    207             "\", \"utf-8\")";
    208       } else {
    209         return "\"" + CEscape(field.default_value_string()) + "\"";
    210       }
    211     case FieldDescriptor::CPPTYPE_MESSAGE:
    212       return "None";
    213   }
    214   // (We could add a default case above but then we wouldn't get the nice
    215   // compiler warning when a new type is added.)
    216   GOOGLE_LOG(FATAL) << "Not reached.";
    217   return "";
    218 }
    219 
    220 
    221 
    222 }  // namespace
    223 
    224 
    225 Generator::Generator() : file_(NULL) {
    226 }
    227 
    228 Generator::~Generator() {
    229 }
    230 
    231 bool Generator::Generate(const FileDescriptor* file,
    232                          const string& parameter,
    233                          OutputDirectory* output_directory,
    234                          string* error) const {
    235 
    236   // Completely serialize all Generate() calls on this instance.  The
    237   // thread-safety constraints of the CodeGenerator interface aren't clear so
    238   // just be as conservative as possible.  It's easier to relax this later if
    239   // we need to, but I doubt it will be an issue.
    240   // TODO(kenton):  The proper thing to do would be to allocate any state on
    241   //   the stack and use that, so that the Generator class itself does not need
    242   //   to have any mutable members.  Then it is implicitly thread-safe.
    243   MutexLock lock(&mutex_);
    244   file_ = file;
    245   string module_name = ModuleName(file->name());
    246   string filename = module_name;
    247   StripString(&filename, ".", '/');
    248   filename += ".py";
    249 
    250   FileDescriptorProto fdp;
    251   file_->CopyTo(&fdp);
    252   fdp.SerializeToString(&file_descriptor_serialized_);
    253 
    254 
    255   scoped_ptr<io::ZeroCopyOutputStream> output(output_directory->Open(filename));
    256   GOOGLE_CHECK(output.get());
    257   io::Printer printer(output.get(), '$');
    258   printer_ = &printer;
    259 
    260   PrintTopBoilerplate(printer_, file_, GeneratingDescriptorProto());
    261   PrintFileDescriptor();
    262   PrintTopLevelEnums();
    263   PrintTopLevelExtensions();
    264   PrintAllNestedEnumsInFile();
    265   PrintMessageDescriptors();
    266   // We have to print the imports after the descriptors, so that mutually
    267   // recursive protos in separate files can successfully reference each other.
    268   PrintImports();
    269   FixForeignFieldsInDescriptors();
    270   PrintMessages();
    271   // We have to fix up the extensions after the message classes themselves,
    272   // since they need to call static RegisterExtension() methods on these
    273   // classes.
    274   FixForeignFieldsInExtensions();
    275   if (HasGenericServices(file)) {
    276     PrintServices();
    277   }
    278 
    279   printer.Print(
    280     "# @@protoc_insertion_point(module_scope)\n");
    281 
    282   return !printer.failed();
    283 }
    284 
    285 // Prints Python imports for all modules imported by |file|.
    286 void Generator::PrintImports() const {
    287   for (int i = 0; i < file_->dependency_count(); ++i) {
    288     string module_name = ModuleName(file_->dependency(i)->name());
    289     printer_->Print("import $module$\n", "module",
    290                     module_name);
    291   }
    292   printer_->Print("\n");
    293 }
    294 
    295 // Prints the single file descriptor for this file.
    296 void Generator::PrintFileDescriptor() const {
    297   map<string, string> m;
    298   m["descriptor_name"] = kDescriptorKey;
    299   m["name"] = file_->name();
    300   m["package"] = file_->package();
    301   const char file_descriptor_template[] =
    302       "$descriptor_name$ = descriptor.FileDescriptor(\n"
    303       "  name='$name$',\n"
    304       "  package='$package$',\n";
    305   printer_->Print(m, file_descriptor_template);
    306   printer_->Indent();
    307   printer_->Print(
    308       "serialized_pb='$value$'",
    309       "value", strings::CHexEscape(file_descriptor_serialized_));
    310 
    311   // TODO(falk): Also print options and fix the message_type, enum_type,
    312   //             service and extension later in the generation.
    313 
    314   printer_->Outdent();
    315   printer_->Print(")\n");
    316   printer_->Print("\n");
    317 }
    318 
    319 // Prints descriptors and module-level constants for all top-level
    320 // enums defined in |file|.
    321 void Generator::PrintTopLevelEnums() const {
    322   vector<pair<string, int> > top_level_enum_values;
    323   for (int i = 0; i < file_->enum_type_count(); ++i) {
    324     const EnumDescriptor& enum_descriptor = *file_->enum_type(i);
    325     PrintEnum(enum_descriptor);
    326     printer_->Print("\n");
    327 
    328     for (int j = 0; j < enum_descriptor.value_count(); ++j) {
    329       const EnumValueDescriptor& value_descriptor = *enum_descriptor.value(j);
    330       top_level_enum_values.push_back(
    331           make_pair(value_descriptor.name(), value_descriptor.number()));
    332     }
    333   }
    334 
    335   for (int i = 0; i < top_level_enum_values.size(); ++i) {
    336     printer_->Print("$name$ = $value$\n",
    337                     "name", top_level_enum_values[i].first,
    338                     "value", SimpleItoa(top_level_enum_values[i].second));
    339   }
    340   printer_->Print("\n");
    341 }
    342 
    343 // Prints all enums contained in all message types in |file|.
    344 void Generator::PrintAllNestedEnumsInFile() const {
    345   for (int i = 0; i < file_->message_type_count(); ++i) {
    346     PrintNestedEnums(*file_->message_type(i));
    347   }
    348 }
    349 
    350 // Prints a Python statement assigning the appropriate module-level
    351 // enum name to a Python EnumDescriptor object equivalent to
    352 // enum_descriptor.
    353 void Generator::PrintEnum(const EnumDescriptor& enum_descriptor) const {
    354   map<string, string> m;
    355   m["descriptor_name"] = ModuleLevelDescriptorName(enum_descriptor);
    356   m["name"] = enum_descriptor.name();
    357   m["full_name"] = enum_descriptor.full_name();
    358   m["file"] = kDescriptorKey;
    359   const char enum_descriptor_template[] =
    360       "$descriptor_name$ = descriptor.EnumDescriptor(\n"
    361       "  name='$name$',\n"
    362       "  full_name='$full_name$',\n"
    363       "  filename=None,\n"
    364       "  file=$file$,\n"
    365       "  values=[\n";
    366   string options_string;
    367   enum_descriptor.options().SerializeToString(&options_string);
    368   printer_->Print(m, enum_descriptor_template);
    369   printer_->Indent();
    370   printer_->Indent();
    371   for (int i = 0; i < enum_descriptor.value_count(); ++i) {
    372     PrintEnumValueDescriptor(*enum_descriptor.value(i));
    373     printer_->Print(",\n");
    374   }
    375   printer_->Outdent();
    376   printer_->Print("],\n");
    377   printer_->Print("containing_type=None,\n");
    378   printer_->Print("options=$options_value$,\n",
    379                   "options_value",
    380                   OptionsValue("EnumOptions", CEscape(options_string)));
    381   EnumDescriptorProto edp;
    382   PrintSerializedPbInterval(enum_descriptor, edp);
    383   printer_->Outdent();
    384   printer_->Print(")\n");
    385   printer_->Print("\n");
    386 }
    387 
    388 // Recursively prints enums in nested types within descriptor, then
    389 // prints enums contained at the top level in descriptor.
    390 void Generator::PrintNestedEnums(const Descriptor& descriptor) const {
    391   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
    392     PrintNestedEnums(*descriptor.nested_type(i));
    393   }
    394 
    395   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
    396     PrintEnum(*descriptor.enum_type(i));
    397   }
    398 }
    399 
    400 void Generator::PrintTopLevelExtensions() const {
    401   const bool is_extension = true;
    402   for (int i = 0; i < file_->extension_count(); ++i) {
    403     const FieldDescriptor& extension_field = *file_->extension(i);
    404     string constant_name = extension_field.name() + "_FIELD_NUMBER";
    405     UpperString(&constant_name);
    406     printer_->Print("$constant_name$ = $number$\n",
    407       "constant_name", constant_name,
    408       "number", SimpleItoa(extension_field.number()));
    409     printer_->Print("$name$ = ", "name", extension_field.name());
    410     PrintFieldDescriptor(extension_field, is_extension);
    411     printer_->Print("\n");
    412   }
    413   printer_->Print("\n");
    414 }
    415 
    416 // Prints Python equivalents of all Descriptors in |file|.
    417 void Generator::PrintMessageDescriptors() const {
    418   for (int i = 0; i < file_->message_type_count(); ++i) {
    419     PrintDescriptor(*file_->message_type(i));
    420     printer_->Print("\n");
    421   }
    422 }
    423 
    424 void Generator::PrintServices() const {
    425   for (int i = 0; i < file_->service_count(); ++i) {
    426     PrintServiceDescriptor(*file_->service(i));
    427     PrintServiceClass(*file_->service(i));
    428     PrintServiceStub(*file_->service(i));
    429     printer_->Print("\n");
    430   }
    431 }
    432 
    433 void Generator::PrintServiceDescriptor(
    434     const ServiceDescriptor& descriptor) const {
    435   printer_->Print("\n");
    436   string service_name = ModuleLevelServiceDescriptorName(descriptor);
    437   string options_string;
    438   descriptor.options().SerializeToString(&options_string);
    439 
    440   printer_->Print(
    441       "$service_name$ = descriptor.ServiceDescriptor(\n",
    442       "service_name", service_name);
    443   printer_->Indent();
    444   map<string, string> m;
    445   m["name"] = descriptor.name();
    446   m["full_name"] = descriptor.full_name();
    447   m["file"] = kDescriptorKey;
    448   m["index"] = SimpleItoa(descriptor.index());
    449   m["options_value"] = OptionsValue("ServiceOptions", options_string);
    450   const char required_function_arguments[] =
    451       "name='$name$',\n"
    452       "full_name='$full_name$',\n"
    453       "file=$file$,\n"
    454       "index=$index$,\n"
    455       "options=$options_value$,\n";
    456   printer_->Print(m, required_function_arguments);
    457 
    458   ServiceDescriptorProto sdp;
    459   PrintSerializedPbInterval(descriptor, sdp);
    460 
    461   printer_->Print("methods=[\n");
    462   for (int i = 0; i < descriptor.method_count(); ++i) {
    463     const MethodDescriptor* method = descriptor.method(i);
    464     string options_string;
    465     method->options().SerializeToString(&options_string);
    466 
    467     m.clear();
    468     m["name"] = method->name();
    469     m["full_name"] = method->full_name();
    470     m["index"] = SimpleItoa(method->index());
    471     m["serialized_options"] = CEscape(options_string);
    472     m["input_type"] = ModuleLevelDescriptorName(*(method->input_type()));
    473     m["output_type"] = ModuleLevelDescriptorName(*(method->output_type()));
    474     m["options_value"] = OptionsValue("MethodOptions", options_string);
    475     printer_->Print("descriptor.MethodDescriptor(\n");
    476     printer_->Indent();
    477     printer_->Print(
    478         m,
    479         "name='$name$',\n"
    480         "full_name='$full_name$',\n"
    481         "index=$index$,\n"
    482         "containing_service=None,\n"
    483         "input_type=$input_type$,\n"
    484         "output_type=$output_type$,\n"
    485         "options=$options_value$,\n");
    486     printer_->Outdent();
    487     printer_->Print("),\n");
    488   }
    489 
    490   printer_->Outdent();
    491   printer_->Print("])\n\n");
    492 }
    493 
    494 void Generator::PrintServiceClass(const ServiceDescriptor& descriptor) const {
    495   // Print the service.
    496   printer_->Print("class $class_name$(service.Service):\n",
    497                   "class_name", descriptor.name());
    498   printer_->Indent();
    499   printer_->Print(
    500       "__metaclass__ = service_reflection.GeneratedServiceType\n"
    501       "$descriptor_key$ = $descriptor_name$\n",
    502       "descriptor_key", kDescriptorKey,
    503       "descriptor_name", ModuleLevelServiceDescriptorName(descriptor));
    504   printer_->Outdent();
    505 }
    506 
    507 void Generator::PrintServiceStub(const ServiceDescriptor& descriptor) const {
    508   // Print the service stub.
    509   printer_->Print("class $class_name$_Stub($class_name$):\n",
    510                   "class_name", descriptor.name());
    511   printer_->Indent();
    512   printer_->Print(
    513       "__metaclass__ = service_reflection.GeneratedServiceStubType\n"
    514       "$descriptor_key$ = $descriptor_name$\n",
    515       "descriptor_key", kDescriptorKey,
    516       "descriptor_name", ModuleLevelServiceDescriptorName(descriptor));
    517   printer_->Outdent();
    518 }
    519 
    520 // Prints statement assigning ModuleLevelDescriptorName(message_descriptor)
    521 // to a Python Descriptor object for message_descriptor.
    522 //
    523 // Mutually recursive with PrintNestedDescriptors().
    524 void Generator::PrintDescriptor(const Descriptor& message_descriptor) const {
    525   PrintNestedDescriptors(message_descriptor);
    526 
    527   printer_->Print("\n");
    528   printer_->Print("$descriptor_name$ = descriptor.Descriptor(\n",
    529                   "descriptor_name",
    530                   ModuleLevelDescriptorName(message_descriptor));
    531   printer_->Indent();
    532   map<string, string> m;
    533   m["name"] = message_descriptor.name();
    534   m["full_name"] = message_descriptor.full_name();
    535   m["file"] = kDescriptorKey;
    536   const char required_function_arguments[] =
    537       "name='$name$',\n"
    538       "full_name='$full_name$',\n"
    539       "filename=None,\n"
    540       "file=$file$,\n"
    541       "containing_type=None,\n";
    542   printer_->Print(m, required_function_arguments);
    543   PrintFieldsInDescriptor(message_descriptor);
    544   PrintExtensionsInDescriptor(message_descriptor);
    545 
    546   // Nested types
    547   printer_->Print("nested_types=[");
    548   for (int i = 0; i < message_descriptor.nested_type_count(); ++i) {
    549     const string nested_name = ModuleLevelDescriptorName(
    550         *message_descriptor.nested_type(i));
    551     printer_->Print("$name$, ", "name", nested_name);
    552   }
    553   printer_->Print("],\n");
    554 
    555   // Enum types
    556   printer_->Print("enum_types=[\n");
    557   printer_->Indent();
    558   for (int i = 0; i < message_descriptor.enum_type_count(); ++i) {
    559     const string descriptor_name = ModuleLevelDescriptorName(
    560         *message_descriptor.enum_type(i));
    561     printer_->Print(descriptor_name.c_str());
    562     printer_->Print(",\n");
    563   }
    564   printer_->Outdent();
    565   printer_->Print("],\n");
    566   string options_string;
    567   message_descriptor.options().SerializeToString(&options_string);
    568   printer_->Print(
    569       "options=$options_value$,\n"
    570       "is_extendable=$extendable$",
    571       "options_value", OptionsValue("MessageOptions", options_string),
    572       "extendable", message_descriptor.extension_range_count() > 0 ?
    573                       "True" : "False");
    574   printer_->Print(",\n");
    575 
    576   // Extension ranges
    577   printer_->Print("extension_ranges=[");
    578   for (int i = 0; i < message_descriptor.extension_range_count(); ++i) {
    579     const Descriptor::ExtensionRange* range =
    580         message_descriptor.extension_range(i);
    581     printer_->Print("($start$, $end$), ",
    582                     "start", SimpleItoa(range->start),
    583                     "end", SimpleItoa(range->end));
    584   }
    585   printer_->Print("],\n");
    586 
    587   // Serialization of proto
    588   DescriptorProto edp;
    589   PrintSerializedPbInterval(message_descriptor, edp);
    590 
    591   printer_->Outdent();
    592   printer_->Print(")\n");
    593 }
    594 
    595 // Prints Python Descriptor objects for all nested types contained in
    596 // message_descriptor.
    597 //
    598 // Mutually recursive with PrintDescriptor().
    599 void Generator::PrintNestedDescriptors(
    600     const Descriptor& containing_descriptor) const {
    601   for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
    602     PrintDescriptor(*containing_descriptor.nested_type(i));
    603   }
    604 }
    605 
    606 // Prints all messages in |file|.
    607 void Generator::PrintMessages() const {
    608   for (int i = 0; i < file_->message_type_count(); ++i) {
    609     PrintMessage(*file_->message_type(i));
    610     printer_->Print("\n");
    611   }
    612 }
    613 
    614 // Prints a Python class for the given message descriptor.  We defer to the
    615 // metaclass to do almost all of the work of actually creating a useful class.
    616 // The purpose of this function and its many helper functions above is merely
    617 // to output a Python version of the descriptors, which the metaclass in
    618 // reflection.py will use to construct the meat of the class itself.
    619 //
    620 // Mutually recursive with PrintNestedMessages().
    621 void Generator::PrintMessage(
    622     const Descriptor& message_descriptor) const {
    623   printer_->Print("class $name$(message.Message):\n", "name",
    624                   message_descriptor.name());
    625   printer_->Indent();
    626   printer_->Print("__metaclass__ = reflection.GeneratedProtocolMessageType\n");
    627   PrintNestedMessages(message_descriptor);
    628   map<string, string> m;
    629   m["descriptor_key"] = kDescriptorKey;
    630   m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
    631   printer_->Print(m, "$descriptor_key$ = $descriptor_name$\n");
    632 
    633   printer_->Print(
    634     "\n"
    635     "# @@protoc_insertion_point(class_scope:$full_name$)\n",
    636     "full_name", message_descriptor.full_name());
    637 
    638   printer_->Outdent();
    639 }
    640 
    641 // Prints all nested messages within |containing_descriptor|.
    642 // Mutually recursive with PrintMessage().
    643 void Generator::PrintNestedMessages(
    644     const Descriptor& containing_descriptor) const {
    645   for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
    646     printer_->Print("\n");
    647     PrintMessage(*containing_descriptor.nested_type(i));
    648   }
    649 }
    650 
    651 // Recursively fixes foreign fields in all nested types in |descriptor|, then
    652 // sets the message_type and enum_type of all message and enum fields to point
    653 // to their respective descriptors.
    654 // Args:
    655 //   descriptor: descriptor to print fields for.
    656 //   containing_descriptor: if descriptor is a nested type, this is its
    657 //       containing type, or NULL if this is a root/top-level type.
    658 void Generator::FixForeignFieldsInDescriptor(
    659     const Descriptor& descriptor,
    660     const Descriptor* containing_descriptor) const {
    661   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
    662     FixForeignFieldsInDescriptor(*descriptor.nested_type(i), &descriptor);
    663   }
    664 
    665   for (int i = 0; i < descriptor.field_count(); ++i) {
    666     const FieldDescriptor& field_descriptor = *descriptor.field(i);
    667     FixForeignFieldsInField(&descriptor, field_descriptor, "fields_by_name");
    668   }
    669 
    670   FixContainingTypeInDescriptor(descriptor, containing_descriptor);
    671   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
    672     const EnumDescriptor& enum_descriptor = *descriptor.enum_type(i);
    673     FixContainingTypeInDescriptor(enum_descriptor, &descriptor);
    674   }
    675 }
    676 
    677 // Sets any necessary message_type and enum_type attributes
    678 // for the Python version of |field|.
    679 //
    680 // containing_type may be NULL, in which case this is a module-level field.
    681 //
    682 // python_dict_name is the name of the Python dict where we should
    683 // look the field up in the containing type.  (e.g., fields_by_name
    684 // or extensions_by_name).  We ignore python_dict_name if containing_type
    685 // is NULL.
    686 void Generator::FixForeignFieldsInField(const Descriptor* containing_type,
    687                                         const FieldDescriptor& field,
    688                                         const string& python_dict_name) const {
    689   const string field_referencing_expression = FieldReferencingExpression(
    690       containing_type, field, python_dict_name);
    691   map<string, string> m;
    692   m["field_ref"] = field_referencing_expression;
    693   const Descriptor* foreign_message_type = field.message_type();
    694   if (foreign_message_type) {
    695     m["foreign_type"] = ModuleLevelDescriptorName(*foreign_message_type);
    696     printer_->Print(m, "$field_ref$.message_type = $foreign_type$\n");
    697   }
    698   const EnumDescriptor* enum_type = field.enum_type();
    699   if (enum_type) {
    700     m["enum_type"] = ModuleLevelDescriptorName(*enum_type);
    701     printer_->Print(m, "$field_ref$.enum_type = $enum_type$\n");
    702   }
    703 }
    704 
    705 // Returns the module-level expression for the given FieldDescriptor.
    706 // Only works for fields in the .proto file this Generator is generating for.
    707 //
    708 // containing_type may be NULL, in which case this is a module-level field.
    709 //
    710 // python_dict_name is the name of the Python dict where we should
    711 // look the field up in the containing type.  (e.g., fields_by_name
    712 // or extensions_by_name).  We ignore python_dict_name if containing_type
    713 // is NULL.
    714 string Generator::FieldReferencingExpression(
    715     const Descriptor* containing_type,
    716     const FieldDescriptor& field,
    717     const string& python_dict_name) const {
    718   // We should only ever be looking up fields in the current file.
    719   // The only things we refer to from other files are message descriptors.
    720   GOOGLE_CHECK_EQ(field.file(), file_) << field.file()->name() << " vs. "
    721                                 << file_->name();
    722   if (!containing_type) {
    723     return field.name();
    724   }
    725   return strings::Substitute(
    726       "$0.$1['$2']",
    727       ModuleLevelDescriptorName(*containing_type),
    728       python_dict_name, field.name());
    729 }
    730 
    731 // Prints containing_type for nested descriptors or enum descriptors.
    732 template <typename DescriptorT>
    733 void Generator::FixContainingTypeInDescriptor(
    734     const DescriptorT& descriptor,
    735     const Descriptor* containing_descriptor) const {
    736   if (containing_descriptor != NULL) {
    737     const string nested_name = ModuleLevelDescriptorName(descriptor);
    738     const string parent_name = ModuleLevelDescriptorName(
    739         *containing_descriptor);
    740     printer_->Print(
    741         "$nested_name$.containing_type = $parent_name$;\n",
    742         "nested_name", nested_name,
    743         "parent_name", parent_name);
    744   }
    745 }
    746 
    747 // Prints statements setting the message_type and enum_type fields in the
    748 // Python descriptor objects we've already output in ths file.  We must
    749 // do this in a separate step due to circular references (otherwise, we'd
    750 // just set everything in the initial assignment statements).
    751 void Generator::FixForeignFieldsInDescriptors() const {
    752   for (int i = 0; i < file_->message_type_count(); ++i) {
    753     FixForeignFieldsInDescriptor(*file_->message_type(i), NULL);
    754   }
    755   printer_->Print("\n");
    756 }
    757 
    758 // We need to not only set any necessary message_type fields, but
    759 // also need to call RegisterExtension() on each message we're
    760 // extending.
    761 void Generator::FixForeignFieldsInExtensions() const {
    762   // Top-level extensions.
    763   for (int i = 0; i < file_->extension_count(); ++i) {
    764     FixForeignFieldsInExtension(*file_->extension(i));
    765   }
    766   // Nested extensions.
    767   for (int i = 0; i < file_->message_type_count(); ++i) {
    768     FixForeignFieldsInNestedExtensions(*file_->message_type(i));
    769   }
    770 }
    771 
    772 void Generator::FixForeignFieldsInExtension(
    773     const FieldDescriptor& extension_field) const {
    774   GOOGLE_CHECK(extension_field.is_extension());
    775   // extension_scope() will be NULL for top-level extensions, which is
    776   // exactly what FixForeignFieldsInField() wants.
    777   FixForeignFieldsInField(extension_field.extension_scope(), extension_field,
    778                           "extensions_by_name");
    779 
    780   map<string, string> m;
    781   // Confusingly, for FieldDescriptors that happen to be extensions,
    782   // containing_type() means "extended type."
    783   // On the other hand, extension_scope() will give us what we normally
    784   // mean by containing_type().
    785   m["extended_message_class"] = ModuleLevelMessageName(
    786       *extension_field.containing_type());
    787   m["field"] = FieldReferencingExpression(extension_field.extension_scope(),
    788                                           extension_field,
    789                                           "extensions_by_name");
    790   printer_->Print(m, "$extended_message_class$.RegisterExtension($field$)\n");
    791 }
    792 
    793 void Generator::FixForeignFieldsInNestedExtensions(
    794     const Descriptor& descriptor) const {
    795   // Recursively fix up extensions in all nested types.
    796   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
    797     FixForeignFieldsInNestedExtensions(*descriptor.nested_type(i));
    798   }
    799   // Fix up extensions directly contained within this type.
    800   for (int i = 0; i < descriptor.extension_count(); ++i) {
    801     FixForeignFieldsInExtension(*descriptor.extension(i));
    802   }
    803 }
    804 
    805 // Returns a Python expression that instantiates a Python EnumValueDescriptor
    806 // object for the given C++ descriptor.
    807 void Generator::PrintEnumValueDescriptor(
    808     const EnumValueDescriptor& descriptor) const {
    809   // TODO(robinson): Fix up EnumValueDescriptor "type" fields.
    810   // More circular references.  ::sigh::
    811   string options_string;
    812   descriptor.options().SerializeToString(&options_string);
    813   map<string, string> m;
    814   m["name"] = descriptor.name();
    815   m["index"] = SimpleItoa(descriptor.index());
    816   m["number"] = SimpleItoa(descriptor.number());
    817   m["options"] = OptionsValue("EnumValueOptions", options_string);
    818   printer_->Print(
    819       m,
    820       "descriptor.EnumValueDescriptor(\n"
    821       "  name='$name$', index=$index$, number=$number$,\n"
    822       "  options=$options$,\n"
    823       "  type=None)");
    824 }
    825 
    826 string Generator::OptionsValue(
    827     const string& class_name, const string& serialized_options) const {
    828   if (serialized_options.length() == 0 || GeneratingDescriptorProto()) {
    829     return "None";
    830   } else {
    831     string full_class_name = "descriptor_pb2." + class_name;
    832     return "descriptor._ParseOptions(" + full_class_name + "(), '"
    833         + CEscape(serialized_options)+ "')";
    834   }
    835 }
    836 
    837 // Prints an expression for a Python FieldDescriptor for |field|.
    838 void Generator::PrintFieldDescriptor(
    839     const FieldDescriptor& field, bool is_extension) const {
    840   string options_string;
    841   field.options().SerializeToString(&options_string);
    842   map<string, string> m;
    843   m["name"] = field.name();
    844   m["full_name"] = field.full_name();
    845   m["index"] = SimpleItoa(field.index());
    846   m["number"] = SimpleItoa(field.number());
    847   m["type"] = SimpleItoa(field.type());
    848   m["cpp_type"] = SimpleItoa(field.cpp_type());
    849   m["label"] = SimpleItoa(field.label());
    850   m["has_default_value"] = field.has_default_value() ? "True" : "False";
    851   m["default_value"] = StringifyDefaultValue(field);
    852   m["is_extension"] = is_extension ? "True" : "False";
    853   m["options"] = OptionsValue("FieldOptions", options_string);
    854   // We always set message_type and enum_type to None at this point, and then
    855   // these fields in correctly after all referenced descriptors have been
    856   // defined and/or imported (see FixForeignFieldsInDescriptors()).
    857   const char field_descriptor_decl[] =
    858     "descriptor.FieldDescriptor(\n"
    859     "  name='$name$', full_name='$full_name$', index=$index$,\n"
    860     "  number=$number$, type=$type$, cpp_type=$cpp_type$, label=$label$,\n"
    861     "  has_default_value=$has_default_value$, default_value=$default_value$,\n"
    862     "  message_type=None, enum_type=None, containing_type=None,\n"
    863     "  is_extension=$is_extension$, extension_scope=None,\n"
    864     "  options=$options$)";
    865   printer_->Print(m, field_descriptor_decl);
    866 }
    867 
    868 // Helper for Print{Fields,Extensions}InDescriptor().
    869 void Generator::PrintFieldDescriptorsInDescriptor(
    870     const Descriptor& message_descriptor,
    871     bool is_extension,
    872     const string& list_variable_name,
    873     int (Descriptor::*CountFn)() const,
    874     const FieldDescriptor* (Descriptor::*GetterFn)(int) const) const {
    875   printer_->Print("$list$=[\n", "list", list_variable_name);
    876   printer_->Indent();
    877   for (int i = 0; i < (message_descriptor.*CountFn)(); ++i) {
    878     PrintFieldDescriptor(*(message_descriptor.*GetterFn)(i),
    879                          is_extension);
    880     printer_->Print(",\n");
    881   }
    882   printer_->Outdent();
    883   printer_->Print("],\n");
    884 }
    885 
    886 // Prints a statement assigning "fields" to a list of Python FieldDescriptors,
    887 // one for each field present in message_descriptor.
    888 void Generator::PrintFieldsInDescriptor(
    889     const Descriptor& message_descriptor) const {
    890   const bool is_extension = false;
    891   PrintFieldDescriptorsInDescriptor(
    892       message_descriptor, is_extension, "fields",
    893       &Descriptor::field_count, &Descriptor::field);
    894 }
    895 
    896 // Prints a statement assigning "extensions" to a list of Python
    897 // FieldDescriptors, one for each extension present in message_descriptor.
    898 void Generator::PrintExtensionsInDescriptor(
    899     const Descriptor& message_descriptor) const {
    900   const bool is_extension = true;
    901   PrintFieldDescriptorsInDescriptor(
    902       message_descriptor, is_extension, "extensions",
    903       &Descriptor::extension_count, &Descriptor::extension);
    904 }
    905 
    906 bool Generator::GeneratingDescriptorProto() const {
    907   return file_->name() == "google/protobuf/descriptor.proto";
    908 }
    909 
    910 // Returns the unique Python module-level identifier given to a descriptor.
    911 // This name is module-qualified iff the given descriptor describes an
    912 // entity that doesn't come from the current file.
    913 template <typename DescriptorT>
    914 string Generator::ModuleLevelDescriptorName(
    915     const DescriptorT& descriptor) const {
    916   // FIXME(robinson):
    917   // We currently don't worry about collisions with underscores in the type
    918   // names, so these would collide in nasty ways if found in the same file:
    919   //   OuterProto.ProtoA.ProtoB
    920   //   OuterProto_ProtoA.ProtoB  # Underscore instead of period.
    921   // As would these:
    922   //   OuterProto.ProtoA_.ProtoB
    923   //   OuterProto.ProtoA._ProtoB  # Leading vs. trailing underscore.
    924   // (Contrived, but certainly possible).
    925   //
    926   // The C++ implementation doesn't guard against this either.  Leaving
    927   // it for now...
    928   string name = NamePrefixedWithNestedTypes(descriptor, "_");
    929   UpperString(&name);
    930   // Module-private for now.  Easy to make public later; almost impossible
    931   // to make private later.
    932   name = "_" + name;
    933   // We now have the name relative to its own module.  Also qualify with
    934   // the module name iff this descriptor is from a different .proto file.
    935   if (descriptor.file() != file_) {
    936     name = ModuleName(descriptor.file()->name()) + "." + name;
    937   }
    938   return name;
    939 }
    940 
    941 // Returns the name of the message class itself, not the descriptor.
    942 // Like ModuleLevelDescriptorName(), module-qualifies the name iff
    943 // the given descriptor describes an entity that doesn't come from
    944 // the current file.
    945 string Generator::ModuleLevelMessageName(const Descriptor& descriptor) const {
    946   string name = NamePrefixedWithNestedTypes(descriptor, ".");
    947   if (descriptor.file() != file_) {
    948     name = ModuleName(descriptor.file()->name()) + "." + name;
    949   }
    950   return name;
    951 }
    952 
    953 // Returns the unique Python module-level identifier given to a service
    954 // descriptor.
    955 string Generator::ModuleLevelServiceDescriptorName(
    956     const ServiceDescriptor& descriptor) const {
    957   string name = descriptor.name();
    958   UpperString(&name);
    959   name = "_" + name;
    960   if (descriptor.file() != file_) {
    961     name = ModuleName(descriptor.file()->name()) + "." + name;
    962   }
    963   return name;
    964 }
    965 
    966 // Prints standard constructor arguments serialized_start and serialized_end.
    967 // Args:
    968 //   descriptor: The cpp descriptor to have a serialized reference.
    969 //   proto: A proto
    970 // Example printer output:
    971 // serialized_start=41,
    972 // serialized_end=43,
    973 //
    974 template <typename DescriptorT, typename DescriptorProtoT>
    975 void Generator::PrintSerializedPbInterval(
    976     const DescriptorT& descriptor, DescriptorProtoT& proto) const {
    977   descriptor.CopyTo(&proto);
    978   string sp;
    979   proto.SerializeToString(&sp);
    980   int offset = file_descriptor_serialized_.find(sp);
    981   GOOGLE_CHECK_GE(offset, 0);
    982 
    983   printer_->Print("serialized_start=$serialized_start$,\n"
    984                   "serialized_end=$serialized_end$,\n",
    985                   "serialized_start", SimpleItoa(offset),
    986                   "serialized_end", SimpleItoa(offset + sp.size()));
    987 }
    988 
    989 }  // namespace python
    990 }  // namespace compiler
    991 }  // namespace protobuf
    992 }  // namespace google
    993