Home | History | Annotate | Download | only in csharp
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: kenton (at) google.com (Kenton Varda)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 
     35 #include <algorithm>
     36 #include <google/protobuf/stubs/hash.h>
     37 #include <limits>
     38 #include <vector>
     39 
     40 #include <google/protobuf/compiler/csharp/csharp_helpers.h>
     41 #include <google/protobuf/descriptor.pb.h>
     42 #include <google/protobuf/io/printer.h>
     43 #include <google/protobuf/wire_format.h>
     44 #include <google/protobuf/stubs/strutil.h>
     45 #include <google/protobuf/stubs/substitute.h>
     46 
     47 #include <google/protobuf/compiler/csharp/csharp_field_base.h>
     48 #include <google/protobuf/compiler/csharp/csharp_enum_field.h>
     49 #include <google/protobuf/compiler/csharp/csharp_map_field.h>
     50 #include <google/protobuf/compiler/csharp/csharp_message_field.h>
     51 #include <google/protobuf/compiler/csharp/csharp_options.h>
     52 #include <google/protobuf/compiler/csharp/csharp_primitive_field.h>
     53 #include <google/protobuf/compiler/csharp/csharp_repeated_enum_field.h>
     54 #include <google/protobuf/compiler/csharp/csharp_repeated_message_field.h>
     55 #include <google/protobuf/compiler/csharp/csharp_repeated_primitive_field.h>
     56 #include <google/protobuf/compiler/csharp/csharp_wrapper_field.h>
     57 
     58 namespace google {
     59 namespace protobuf {
     60 namespace compiler {
     61 namespace csharp {
     62 
     63 CSharpType GetCSharpType(FieldDescriptor::Type type) {
     64   switch (type) {
     65     case FieldDescriptor::TYPE_INT32:
     66       return CSHARPTYPE_INT32;
     67     case FieldDescriptor::TYPE_INT64:
     68       return CSHARPTYPE_INT64;
     69     case FieldDescriptor::TYPE_UINT32:
     70       return CSHARPTYPE_UINT32;
     71     case FieldDescriptor::TYPE_UINT64:
     72       return CSHARPTYPE_UINT32;
     73     case FieldDescriptor::TYPE_SINT32:
     74       return CSHARPTYPE_INT32;
     75     case FieldDescriptor::TYPE_SINT64:
     76       return CSHARPTYPE_INT64;
     77     case FieldDescriptor::TYPE_FIXED32:
     78       return CSHARPTYPE_UINT32;
     79     case FieldDescriptor::TYPE_FIXED64:
     80       return CSHARPTYPE_UINT64;
     81     case FieldDescriptor::TYPE_SFIXED32:
     82       return CSHARPTYPE_INT32;
     83     case FieldDescriptor::TYPE_SFIXED64:
     84       return CSHARPTYPE_INT64;
     85     case FieldDescriptor::TYPE_FLOAT:
     86       return CSHARPTYPE_FLOAT;
     87     case FieldDescriptor::TYPE_DOUBLE:
     88       return CSHARPTYPE_DOUBLE;
     89     case FieldDescriptor::TYPE_BOOL:
     90       return CSHARPTYPE_BOOL;
     91     case FieldDescriptor::TYPE_ENUM:
     92       return CSHARPTYPE_ENUM;
     93     case FieldDescriptor::TYPE_STRING:
     94       return CSHARPTYPE_STRING;
     95     case FieldDescriptor::TYPE_BYTES:
     96       return CSHARPTYPE_BYTESTRING;
     97     case FieldDescriptor::TYPE_GROUP:
     98       return CSHARPTYPE_MESSAGE;
     99     case FieldDescriptor::TYPE_MESSAGE:
    100       return CSHARPTYPE_MESSAGE;
    101 
    102       // No default because we want the compiler to complain if any new
    103       // types are added.
    104   }
    105   GOOGLE_LOG(FATAL)<< "Can't get here.";
    106   return (CSharpType) -1;
    107 }
    108 
    109 std::string StripDotProto(const std::string& proto_file) {
    110   int lastindex = proto_file.find_last_of(".");
    111   return proto_file.substr(0, lastindex);
    112 }
    113 
    114 std::string GetFileNamespace(const FileDescriptor* descriptor) {
    115   if (descriptor->options().has_csharp_namespace()) {
    116     return descriptor->options().csharp_namespace();
    117   }
    118   return UnderscoresToCamelCase(descriptor->package(), true, true);
    119 }
    120 
    121 // Returns the Pascal-cased last part of the proto file. For example,
    122 // input of "google/protobuf/foo_bar.proto" would result in "FooBar".
    123 std::string GetFileNameBase(const FileDescriptor* descriptor) {
    124     std::string proto_file = descriptor->name();
    125     int lastslash = proto_file.find_last_of("/");
    126     std::string base = proto_file.substr(lastslash + 1);
    127     return UnderscoresToPascalCase(StripDotProto(base));
    128 }
    129 
    130 std::string GetReflectionClassUnqualifiedName(const FileDescriptor* descriptor) {
    131   // TODO: Detect collisions with existing messages,
    132   // and append an underscore if necessary.
    133   return GetFileNameBase(descriptor) + "Reflection";
    134 }
    135 
    136 // TODO(jtattermusch): can we reuse a utility function?
    137 std::string UnderscoresToCamelCase(const std::string& input,
    138                                    bool cap_next_letter,
    139                                    bool preserve_period) {
    140   string result;
    141   // Note:  I distrust ctype.h due to locales.
    142   for (int i = 0; i < input.size(); i++) {
    143     if ('a' <= input[i] && input[i] <= 'z') {
    144       if (cap_next_letter) {
    145         result += input[i] + ('A' - 'a');
    146       } else {
    147         result += input[i];
    148       }
    149       cap_next_letter = false;
    150     } else if ('A' <= input[i] && input[i] <= 'Z') {
    151       if (i == 0 && !cap_next_letter) {
    152         // Force first letter to lower-case unless explicitly told to
    153         // capitalize it.
    154         result += input[i] + ('a' - 'A');
    155       } else {
    156         // Capital letters after the first are left as-is.
    157         result += input[i];
    158       }
    159       cap_next_letter = false;
    160     } else if ('0' <= input[i] && input[i] <= '9') {
    161       result += input[i];
    162       cap_next_letter = true;
    163     } else {
    164       cap_next_letter = true;
    165       if (input[i] == '.' && preserve_period) {
    166         result += '.';
    167       }
    168     }
    169   }
    170   // Add a trailing "_" if the name should be altered.
    171   if (input[input.size() - 1] == '#') {
    172     result += '_';
    173   }
    174   return result;
    175 }
    176 
    177 std::string UnderscoresToPascalCase(const std::string& input) {
    178   return UnderscoresToCamelCase(input, true);
    179 }
    180 
    181 // Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty)
    182 // into a PascalCase string. Precise rules implemented:
    183 
    184 // Previous input character      Current character         Case
    185 // Any                           Non-alphanumeric          Skipped
    186 // None - first char of input    Alphanumeric              Upper
    187 // Non-letter (e.g. _ or 1)      Alphanumeric              Upper
    188 // Numeric                       Alphanumeric              Upper
    189 // Lower letter                  Alphanumeric              Same as current
    190 // Upper letter                  Alphanumeric              Lower
    191 std::string ShoutyToPascalCase(const std::string& input) {
    192   string result;
    193   // Simple way of implementing "always start with upper"
    194   char previous = '_';
    195   for (int i = 0; i < input.size(); i++) {
    196     char current = input[i];
    197     if (!ascii_isalnum(current)) {
    198       previous = current;
    199       continue;
    200     }
    201     if (!ascii_isalnum(previous)) {
    202       result += ascii_toupper(current);
    203     } else if (ascii_isdigit(previous)) {
    204       result += ascii_toupper(current);
    205     } else if (ascii_islower(previous)) {
    206       result += current;
    207     } else {
    208       result += ascii_tolower(current);
    209     }
    210     previous = current;
    211   }
    212   return result;
    213 }
    214 
    215 // Attempt to remove a prefix from a value, ignoring casing and skipping underscores.
    216 // (foo, foo_bar) => bar - underscore after prefix is skipped
    217 // (FOO, foo_bar) => bar - casing is ignored
    218 // (foo_bar, foobarbaz) => baz - underscore in prefix is ignored
    219 // (foobar, foo_barbaz) => baz - underscore in value is ignored
    220 // (foo, bar) => bar - prefix isn't matched; return original value
    221 std::string TryRemovePrefix(const std::string& prefix, const std::string& value) {
    222   // First normalize to a lower-case no-underscores prefix to match against
    223   std::string prefix_to_match = "";
    224   for (size_t i = 0; i < prefix.size(); i++) {
    225     if (prefix[i] != '_') {
    226       prefix_to_match += ascii_tolower(prefix[i]);
    227     }
    228   }
    229 
    230   // This keeps track of how much of value we've consumed
    231   size_t prefix_index, value_index;
    232   for (prefix_index = 0, value_index = 0;
    233       prefix_index < prefix_to_match.size() && value_index < value.size();
    234       value_index++) {
    235     // Skip over underscores in the value
    236     if (value[value_index] == '_') {
    237       continue;
    238     }
    239     if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) {
    240       // Failed to match the prefix - bail out early.
    241       return value;
    242     }
    243   }
    244 
    245   // If we didn't finish looking through the prefix, we can't strip it.
    246   if (prefix_index < prefix_to_match.size()) {
    247     return value;
    248   }
    249 
    250   // Step over any underscores after the prefix
    251   while (value_index < value.size() && value[value_index] == '_') {
    252     value_index++;
    253   }
    254 
    255   // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip.
    256   if (value_index == value.size()) {
    257     return value;
    258   }
    259 
    260   return value.substr(value_index);
    261 }
    262 
    263 // Format the enum value name in a pleasant way for C#:
    264 // - Strip the enum name as a prefix if possible
    265 // - Convert to PascalCase.
    266 // For example, an enum called Color with a value of COLOR_BLUE should
    267 // result in an enum value in C# called just Blue
    268 std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) {
    269   std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
    270   std::string result = ShoutyToPascalCase(stripped);
    271   // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned
    272   // string is a valid identifier.
    273   if (ascii_isdigit(result[0])) {
    274     result = "_" + result;
    275   }
    276   return result;
    277 }
    278 
    279 std::string ToCSharpName(const std::string& name, const FileDescriptor* file) {
    280   std::string result = GetFileNamespace(file);
    281   if (result != "") {
    282     result += '.';
    283   }
    284   string classname;
    285   if (file->package().empty()) {
    286     classname = name;
    287   } else {
    288     // Strip the proto package from full_name since we've replaced it with
    289     // the C# namespace.
    290     classname = name.substr(file->package().size() + 1);
    291   }
    292   result += StringReplace(classname, ".", ".Types.", true);
    293   return "global::" + result;
    294 }
    295 
    296 std::string GetReflectionClassName(const FileDescriptor* descriptor) {
    297   std::string result = GetFileNamespace(descriptor);
    298   if (!result.empty()) {
    299     result += '.';
    300   }
    301   result += GetReflectionClassUnqualifiedName(descriptor);
    302   return "global::" + result;
    303 }
    304 
    305 std::string GetClassName(const Descriptor* descriptor) {
    306   return ToCSharpName(descriptor->full_name(), descriptor->file());
    307 }
    308 
    309 std::string GetClassName(const EnumDescriptor* descriptor) {
    310   return ToCSharpName(descriptor->full_name(), descriptor->file());
    311 }
    312 
    313 // Groups are hacky:  The name of the field is just the lower-cased name
    314 // of the group type.  In C#, though, we would like to retain the original
    315 // capitalization of the type name.
    316 std::string GetFieldName(const FieldDescriptor* descriptor) {
    317   if (descriptor->type() == FieldDescriptor::TYPE_GROUP) {
    318     return descriptor->message_type()->name();
    319   } else {
    320     return descriptor->name();
    321   }
    322 }
    323 
    324 std::string GetFieldConstantName(const FieldDescriptor* field) {
    325   return GetPropertyName(field) + "FieldNumber";
    326 }
    327 
    328 std::string GetPropertyName(const FieldDescriptor* descriptor) {
    329   // TODO(jtattermusch): consider introducing csharp_property_name field option
    330   std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor));
    331   // Avoid either our own type name or reserved names. Note that not all names
    332   // are reserved - a field called to_string, write_to etc would still cause a problem.
    333   // There are various ways of ending up with naming collisions, but we try to avoid obvious
    334   // ones.
    335   if (property_name == descriptor->containing_type()->name()
    336       || property_name == "Types"
    337       || property_name == "Descriptor") {
    338     property_name += "_";
    339   }
    340   return property_name;
    341 }
    342 
    343 std::string GetOutputFile(
    344     const google::protobuf::FileDescriptor* descriptor,
    345     const std::string file_extension,
    346     const bool generate_directories,
    347     const std::string base_namespace,
    348     string* error) {
    349   string relative_filename = GetFileNameBase(descriptor) + file_extension;
    350   if (!generate_directories) {
    351     return relative_filename;
    352   }
    353   string ns = GetFileNamespace(descriptor);
    354   string namespace_suffix = ns;
    355   if (!base_namespace.empty()) {
    356     // Check that the base_namespace is either equal to or a leading part of
    357     // the file namespace. This isn't just a simple prefix; "Foo.B" shouldn't
    358     // be regarded as a prefix of "Foo.Bar". The simplest option is to add "."
    359     // to both.
    360     string extended_ns = ns + ".";
    361     if (extended_ns.find(base_namespace + ".") != 0) {
    362       *error = "Namespace " + ns + " is not a prefix namespace of base namespace " + base_namespace;
    363       return ""; // This will be ignored, because we've set an error.
    364     }
    365     namespace_suffix = ns.substr(base_namespace.length());
    366     if (namespace_suffix.find(".") == 0) {
    367       namespace_suffix = namespace_suffix.substr(1);
    368     }
    369   }
    370 
    371   string namespace_dir = StringReplace(namespace_suffix, ".", "/", true);
    372   if (!namespace_dir.empty()) {
    373     namespace_dir += "/";
    374   }
    375   return namespace_dir + relative_filename;
    376 }
    377 
    378 // TODO: c&p from Java protoc plugin
    379 // For encodings with fixed sizes, returns that size in bytes.  Otherwise
    380 // returns -1.
    381 int GetFixedSize(FieldDescriptor::Type type) {
    382   switch (type) {
    383     case FieldDescriptor::TYPE_INT32   : return -1;
    384     case FieldDescriptor::TYPE_INT64   : return -1;
    385     case FieldDescriptor::TYPE_UINT32  : return -1;
    386     case FieldDescriptor::TYPE_UINT64  : return -1;
    387     case FieldDescriptor::TYPE_SINT32  : return -1;
    388     case FieldDescriptor::TYPE_SINT64  : return -1;
    389     case FieldDescriptor::TYPE_FIXED32 : return internal::WireFormatLite::kFixed32Size;
    390     case FieldDescriptor::TYPE_FIXED64 : return internal::WireFormatLite::kFixed64Size;
    391     case FieldDescriptor::TYPE_SFIXED32: return internal::WireFormatLite::kSFixed32Size;
    392     case FieldDescriptor::TYPE_SFIXED64: return internal::WireFormatLite::kSFixed64Size;
    393     case FieldDescriptor::TYPE_FLOAT   : return internal::WireFormatLite::kFloatSize;
    394     case FieldDescriptor::TYPE_DOUBLE  : return internal::WireFormatLite::kDoubleSize;
    395 
    396     case FieldDescriptor::TYPE_BOOL    : return internal::WireFormatLite::kBoolSize;
    397     case FieldDescriptor::TYPE_ENUM    : return -1;
    398 
    399     case FieldDescriptor::TYPE_STRING  : return -1;
    400     case FieldDescriptor::TYPE_BYTES   : return -1;
    401     case FieldDescriptor::TYPE_GROUP   : return -1;
    402     case FieldDescriptor::TYPE_MESSAGE : return -1;
    403 
    404     // No default because we want the compiler to complain if any new
    405     // types are added.
    406   }
    407   GOOGLE_LOG(FATAL) << "Can't get here.";
    408   return -1;
    409 }
    410 
    411 static const char base64_chars[] =
    412     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    413 
    414 std::string StringToBase64(const std::string& input) {
    415   std::string result;
    416   size_t remaining = input.size();
    417   const unsigned char *src = (const unsigned char*) input.c_str();
    418   while (remaining > 2) {
    419     result += base64_chars[src[0] >> 2];
    420     result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
    421     result += base64_chars[((src[1] & 0xf) << 2) | (src[2] >> 6)];
    422     result += base64_chars[src[2] & 0x3f];
    423     remaining -= 3;
    424     src += 3;
    425   }
    426   switch (remaining) {
    427     case 2:
    428       result += base64_chars[src[0] >> 2];
    429       result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
    430       result += base64_chars[(src[1] & 0xf) << 2];
    431       result += '=';
    432       src += 2;
    433       break;
    434     case 1:
    435       result += base64_chars[src[0] >> 2];
    436       result += base64_chars[((src[0] & 0x3) << 4)];
    437       result += '=';
    438       result += '=';
    439       src += 1;
    440       break;
    441   }
    442   return result;
    443 }
    444 
    445 std::string FileDescriptorToBase64(const FileDescriptor* descriptor) {
    446   std::string fdp_bytes;
    447   FileDescriptorProto fdp;
    448   descriptor->CopyTo(&fdp);
    449   fdp.SerializeToString(&fdp_bytes);
    450   return StringToBase64(fdp_bytes);
    451 }
    452 
    453 FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor,
    454                                          int fieldOrdinal,
    455                                          const Options* options) {
    456   switch (descriptor->type()) {
    457     case FieldDescriptor::TYPE_GROUP:
    458     case FieldDescriptor::TYPE_MESSAGE:
    459       if (descriptor->is_repeated()) {
    460         if (descriptor->is_map()) {
    461           return new MapFieldGenerator(descriptor, fieldOrdinal, options);
    462         } else {
    463           return new RepeatedMessageFieldGenerator(descriptor, fieldOrdinal, options);
    464         }
    465       } else {
    466         if (IsWrapperType(descriptor)) {
    467           if (descriptor->containing_oneof()) {
    468             return new WrapperOneofFieldGenerator(descriptor, fieldOrdinal, options);
    469           } else {
    470             return new WrapperFieldGenerator(descriptor, fieldOrdinal, options);
    471           }
    472         } else {
    473           if (descriptor->containing_oneof()) {
    474             return new MessageOneofFieldGenerator(descriptor, fieldOrdinal, options);
    475           } else {
    476             return new MessageFieldGenerator(descriptor, fieldOrdinal, options);
    477           }
    478         }
    479       }
    480     case FieldDescriptor::TYPE_ENUM:
    481       if (descriptor->is_repeated()) {
    482         return new RepeatedEnumFieldGenerator(descriptor, fieldOrdinal, options);
    483       } else {
    484         if (descriptor->containing_oneof()) {
    485           return new EnumOneofFieldGenerator(descriptor, fieldOrdinal, options);
    486         } else {
    487           return new EnumFieldGenerator(descriptor, fieldOrdinal, options);
    488         }
    489       }
    490     default:
    491       if (descriptor->is_repeated()) {
    492         return new RepeatedPrimitiveFieldGenerator(descriptor, fieldOrdinal, options);
    493       } else {
    494         if (descriptor->containing_oneof()) {
    495           return new PrimitiveOneofFieldGenerator(descriptor, fieldOrdinal, options);
    496         } else {
    497           return new PrimitiveFieldGenerator(descriptor, fieldOrdinal, options);
    498         }
    499       }
    500   }
    501 }
    502 
    503 }  // namespace csharp
    504 }  // namespace compiler
    505 }  // namespace protobuf
    506 }  // namespace google
    507