1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton (at) google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 35 #include <algorithm> 36 #include <google/protobuf/stubs/hash.h> 37 #include <limits> 38 #include <vector> 39 40 #include <google/protobuf/compiler/csharp/csharp_helpers.h> 41 #include <google/protobuf/descriptor.pb.h> 42 #include <google/protobuf/io/printer.h> 43 #include <google/protobuf/wire_format.h> 44 #include <google/protobuf/stubs/strutil.h> 45 #include <google/protobuf/stubs/substitute.h> 46 47 #include <google/protobuf/compiler/csharp/csharp_field_base.h> 48 #include <google/protobuf/compiler/csharp/csharp_enum_field.h> 49 #include <google/protobuf/compiler/csharp/csharp_map_field.h> 50 #include <google/protobuf/compiler/csharp/csharp_message_field.h> 51 #include <google/protobuf/compiler/csharp/csharp_options.h> 52 #include <google/protobuf/compiler/csharp/csharp_primitive_field.h> 53 #include <google/protobuf/compiler/csharp/csharp_repeated_enum_field.h> 54 #include <google/protobuf/compiler/csharp/csharp_repeated_message_field.h> 55 #include <google/protobuf/compiler/csharp/csharp_repeated_primitive_field.h> 56 #include <google/protobuf/compiler/csharp/csharp_wrapper_field.h> 57 58 namespace google { 59 namespace protobuf { 60 namespace compiler { 61 namespace csharp { 62 63 CSharpType GetCSharpType(FieldDescriptor::Type type) { 64 switch (type) { 65 case FieldDescriptor::TYPE_INT32: 66 return CSHARPTYPE_INT32; 67 case FieldDescriptor::TYPE_INT64: 68 return CSHARPTYPE_INT64; 69 case FieldDescriptor::TYPE_UINT32: 70 return CSHARPTYPE_UINT32; 71 case FieldDescriptor::TYPE_UINT64: 72 return CSHARPTYPE_UINT32; 73 case FieldDescriptor::TYPE_SINT32: 74 return CSHARPTYPE_INT32; 75 case FieldDescriptor::TYPE_SINT64: 76 return CSHARPTYPE_INT64; 77 case FieldDescriptor::TYPE_FIXED32: 78 return CSHARPTYPE_UINT32; 79 case FieldDescriptor::TYPE_FIXED64: 80 return CSHARPTYPE_UINT64; 81 case FieldDescriptor::TYPE_SFIXED32: 82 return CSHARPTYPE_INT32; 83 case FieldDescriptor::TYPE_SFIXED64: 84 return CSHARPTYPE_INT64; 85 case FieldDescriptor::TYPE_FLOAT: 86 return CSHARPTYPE_FLOAT; 87 case FieldDescriptor::TYPE_DOUBLE: 88 return CSHARPTYPE_DOUBLE; 89 case FieldDescriptor::TYPE_BOOL: 90 return CSHARPTYPE_BOOL; 91 case FieldDescriptor::TYPE_ENUM: 92 return CSHARPTYPE_ENUM; 93 case FieldDescriptor::TYPE_STRING: 94 return CSHARPTYPE_STRING; 95 case FieldDescriptor::TYPE_BYTES: 96 return CSHARPTYPE_BYTESTRING; 97 case FieldDescriptor::TYPE_GROUP: 98 return CSHARPTYPE_MESSAGE; 99 case FieldDescriptor::TYPE_MESSAGE: 100 return CSHARPTYPE_MESSAGE; 101 102 // No default because we want the compiler to complain if any new 103 // types are added. 104 } 105 GOOGLE_LOG(FATAL)<< "Can't get here."; 106 return (CSharpType) -1; 107 } 108 109 std::string StripDotProto(const std::string& proto_file) { 110 int lastindex = proto_file.find_last_of("."); 111 return proto_file.substr(0, lastindex); 112 } 113 114 std::string GetFileNamespace(const FileDescriptor* descriptor) { 115 if (descriptor->options().has_csharp_namespace()) { 116 return descriptor->options().csharp_namespace(); 117 } 118 return UnderscoresToCamelCase(descriptor->package(), true, true); 119 } 120 121 // Returns the Pascal-cased last part of the proto file. For example, 122 // input of "google/protobuf/foo_bar.proto" would result in "FooBar". 123 std::string GetFileNameBase(const FileDescriptor* descriptor) { 124 std::string proto_file = descriptor->name(); 125 int lastslash = proto_file.find_last_of("/"); 126 std::string base = proto_file.substr(lastslash + 1); 127 return UnderscoresToPascalCase(StripDotProto(base)); 128 } 129 130 std::string GetReflectionClassUnqualifiedName(const FileDescriptor* descriptor) { 131 // TODO: Detect collisions with existing messages, 132 // and append an underscore if necessary. 133 return GetFileNameBase(descriptor) + "Reflection"; 134 } 135 136 // TODO(jtattermusch): can we reuse a utility function? 137 std::string UnderscoresToCamelCase(const std::string& input, 138 bool cap_next_letter, 139 bool preserve_period) { 140 string result; 141 // Note: I distrust ctype.h due to locales. 142 for (int i = 0; i < input.size(); i++) { 143 if ('a' <= input[i] && input[i] <= 'z') { 144 if (cap_next_letter) { 145 result += input[i] + ('A' - 'a'); 146 } else { 147 result += input[i]; 148 } 149 cap_next_letter = false; 150 } else if ('A' <= input[i] && input[i] <= 'Z') { 151 if (i == 0 && !cap_next_letter) { 152 // Force first letter to lower-case unless explicitly told to 153 // capitalize it. 154 result += input[i] + ('a' - 'A'); 155 } else { 156 // Capital letters after the first are left as-is. 157 result += input[i]; 158 } 159 cap_next_letter = false; 160 } else if ('0' <= input[i] && input[i] <= '9') { 161 result += input[i]; 162 cap_next_letter = true; 163 } else { 164 cap_next_letter = true; 165 if (input[i] == '.' && preserve_period) { 166 result += '.'; 167 } 168 } 169 } 170 // Add a trailing "_" if the name should be altered. 171 if (input[input.size() - 1] == '#') { 172 result += '_'; 173 } 174 return result; 175 } 176 177 std::string UnderscoresToPascalCase(const std::string& input) { 178 return UnderscoresToCamelCase(input, true); 179 } 180 181 // Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty) 182 // into a PascalCase string. Precise rules implemented: 183 184 // Previous input character Current character Case 185 // Any Non-alphanumeric Skipped 186 // None - first char of input Alphanumeric Upper 187 // Non-letter (e.g. _ or 1) Alphanumeric Upper 188 // Numeric Alphanumeric Upper 189 // Lower letter Alphanumeric Same as current 190 // Upper letter Alphanumeric Lower 191 std::string ShoutyToPascalCase(const std::string& input) { 192 string result; 193 // Simple way of implementing "always start with upper" 194 char previous = '_'; 195 for (int i = 0; i < input.size(); i++) { 196 char current = input[i]; 197 if (!ascii_isalnum(current)) { 198 previous = current; 199 continue; 200 } 201 if (!ascii_isalnum(previous)) { 202 result += ascii_toupper(current); 203 } else if (ascii_isdigit(previous)) { 204 result += ascii_toupper(current); 205 } else if (ascii_islower(previous)) { 206 result += current; 207 } else { 208 result += ascii_tolower(current); 209 } 210 previous = current; 211 } 212 return result; 213 } 214 215 // Attempt to remove a prefix from a value, ignoring casing and skipping underscores. 216 // (foo, foo_bar) => bar - underscore after prefix is skipped 217 // (FOO, foo_bar) => bar - casing is ignored 218 // (foo_bar, foobarbaz) => baz - underscore in prefix is ignored 219 // (foobar, foo_barbaz) => baz - underscore in value is ignored 220 // (foo, bar) => bar - prefix isn't matched; return original value 221 std::string TryRemovePrefix(const std::string& prefix, const std::string& value) { 222 // First normalize to a lower-case no-underscores prefix to match against 223 std::string prefix_to_match = ""; 224 for (size_t i = 0; i < prefix.size(); i++) { 225 if (prefix[i] != '_') { 226 prefix_to_match += ascii_tolower(prefix[i]); 227 } 228 } 229 230 // This keeps track of how much of value we've consumed 231 size_t prefix_index, value_index; 232 for (prefix_index = 0, value_index = 0; 233 prefix_index < prefix_to_match.size() && value_index < value.size(); 234 value_index++) { 235 // Skip over underscores in the value 236 if (value[value_index] == '_') { 237 continue; 238 } 239 if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) { 240 // Failed to match the prefix - bail out early. 241 return value; 242 } 243 } 244 245 // If we didn't finish looking through the prefix, we can't strip it. 246 if (prefix_index < prefix_to_match.size()) { 247 return value; 248 } 249 250 // Step over any underscores after the prefix 251 while (value_index < value.size() && value[value_index] == '_') { 252 value_index++; 253 } 254 255 // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip. 256 if (value_index == value.size()) { 257 return value; 258 } 259 260 return value.substr(value_index); 261 } 262 263 // Format the enum value name in a pleasant way for C#: 264 // - Strip the enum name as a prefix if possible 265 // - Convert to PascalCase. 266 // For example, an enum called Color with a value of COLOR_BLUE should 267 // result in an enum value in C# called just Blue 268 std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) { 269 std::string stripped = TryRemovePrefix(enum_name, enum_value_name); 270 std::string result = ShoutyToPascalCase(stripped); 271 // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned 272 // string is a valid identifier. 273 if (ascii_isdigit(result[0])) { 274 result = "_" + result; 275 } 276 return result; 277 } 278 279 std::string ToCSharpName(const std::string& name, const FileDescriptor* file) { 280 std::string result = GetFileNamespace(file); 281 if (result != "") { 282 result += '.'; 283 } 284 string classname; 285 if (file->package().empty()) { 286 classname = name; 287 } else { 288 // Strip the proto package from full_name since we've replaced it with 289 // the C# namespace. 290 classname = name.substr(file->package().size() + 1); 291 } 292 result += StringReplace(classname, ".", ".Types.", true); 293 return "global::" + result; 294 } 295 296 std::string GetReflectionClassName(const FileDescriptor* descriptor) { 297 std::string result = GetFileNamespace(descriptor); 298 if (!result.empty()) { 299 result += '.'; 300 } 301 result += GetReflectionClassUnqualifiedName(descriptor); 302 return "global::" + result; 303 } 304 305 std::string GetClassName(const Descriptor* descriptor) { 306 return ToCSharpName(descriptor->full_name(), descriptor->file()); 307 } 308 309 std::string GetClassName(const EnumDescriptor* descriptor) { 310 return ToCSharpName(descriptor->full_name(), descriptor->file()); 311 } 312 313 // Groups are hacky: The name of the field is just the lower-cased name 314 // of the group type. In C#, though, we would like to retain the original 315 // capitalization of the type name. 316 std::string GetFieldName(const FieldDescriptor* descriptor) { 317 if (descriptor->type() == FieldDescriptor::TYPE_GROUP) { 318 return descriptor->message_type()->name(); 319 } else { 320 return descriptor->name(); 321 } 322 } 323 324 std::string GetFieldConstantName(const FieldDescriptor* field) { 325 return GetPropertyName(field) + "FieldNumber"; 326 } 327 328 std::string GetPropertyName(const FieldDescriptor* descriptor) { 329 // TODO(jtattermusch): consider introducing csharp_property_name field option 330 std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor)); 331 // Avoid either our own type name or reserved names. Note that not all names 332 // are reserved - a field called to_string, write_to etc would still cause a problem. 333 // There are various ways of ending up with naming collisions, but we try to avoid obvious 334 // ones. 335 if (property_name == descriptor->containing_type()->name() 336 || property_name == "Types" 337 || property_name == "Descriptor") { 338 property_name += "_"; 339 } 340 return property_name; 341 } 342 343 std::string GetOutputFile( 344 const google::protobuf::FileDescriptor* descriptor, 345 const std::string file_extension, 346 const bool generate_directories, 347 const std::string base_namespace, 348 string* error) { 349 string relative_filename = GetFileNameBase(descriptor) + file_extension; 350 if (!generate_directories) { 351 return relative_filename; 352 } 353 string ns = GetFileNamespace(descriptor); 354 string namespace_suffix = ns; 355 if (!base_namespace.empty()) { 356 // Check that the base_namespace is either equal to or a leading part of 357 // the file namespace. This isn't just a simple prefix; "Foo.B" shouldn't 358 // be regarded as a prefix of "Foo.Bar". The simplest option is to add "." 359 // to both. 360 string extended_ns = ns + "."; 361 if (extended_ns.find(base_namespace + ".") != 0) { 362 *error = "Namespace " + ns + " is not a prefix namespace of base namespace " + base_namespace; 363 return ""; // This will be ignored, because we've set an error. 364 } 365 namespace_suffix = ns.substr(base_namespace.length()); 366 if (namespace_suffix.find(".") == 0) { 367 namespace_suffix = namespace_suffix.substr(1); 368 } 369 } 370 371 string namespace_dir = StringReplace(namespace_suffix, ".", "/", true); 372 if (!namespace_dir.empty()) { 373 namespace_dir += "/"; 374 } 375 return namespace_dir + relative_filename; 376 } 377 378 // TODO: c&p from Java protoc plugin 379 // For encodings with fixed sizes, returns that size in bytes. Otherwise 380 // returns -1. 381 int GetFixedSize(FieldDescriptor::Type type) { 382 switch (type) { 383 case FieldDescriptor::TYPE_INT32 : return -1; 384 case FieldDescriptor::TYPE_INT64 : return -1; 385 case FieldDescriptor::TYPE_UINT32 : return -1; 386 case FieldDescriptor::TYPE_UINT64 : return -1; 387 case FieldDescriptor::TYPE_SINT32 : return -1; 388 case FieldDescriptor::TYPE_SINT64 : return -1; 389 case FieldDescriptor::TYPE_FIXED32 : return internal::WireFormatLite::kFixed32Size; 390 case FieldDescriptor::TYPE_FIXED64 : return internal::WireFormatLite::kFixed64Size; 391 case FieldDescriptor::TYPE_SFIXED32: return internal::WireFormatLite::kSFixed32Size; 392 case FieldDescriptor::TYPE_SFIXED64: return internal::WireFormatLite::kSFixed64Size; 393 case FieldDescriptor::TYPE_FLOAT : return internal::WireFormatLite::kFloatSize; 394 case FieldDescriptor::TYPE_DOUBLE : return internal::WireFormatLite::kDoubleSize; 395 396 case FieldDescriptor::TYPE_BOOL : return internal::WireFormatLite::kBoolSize; 397 case FieldDescriptor::TYPE_ENUM : return -1; 398 399 case FieldDescriptor::TYPE_STRING : return -1; 400 case FieldDescriptor::TYPE_BYTES : return -1; 401 case FieldDescriptor::TYPE_GROUP : return -1; 402 case FieldDescriptor::TYPE_MESSAGE : return -1; 403 404 // No default because we want the compiler to complain if any new 405 // types are added. 406 } 407 GOOGLE_LOG(FATAL) << "Can't get here."; 408 return -1; 409 } 410 411 static const char base64_chars[] = 412 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 413 414 std::string StringToBase64(const std::string& input) { 415 std::string result; 416 size_t remaining = input.size(); 417 const unsigned char *src = (const unsigned char*) input.c_str(); 418 while (remaining > 2) { 419 result += base64_chars[src[0] >> 2]; 420 result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)]; 421 result += base64_chars[((src[1] & 0xf) << 2) | (src[2] >> 6)]; 422 result += base64_chars[src[2] & 0x3f]; 423 remaining -= 3; 424 src += 3; 425 } 426 switch (remaining) { 427 case 2: 428 result += base64_chars[src[0] >> 2]; 429 result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)]; 430 result += base64_chars[(src[1] & 0xf) << 2]; 431 result += '='; 432 src += 2; 433 break; 434 case 1: 435 result += base64_chars[src[0] >> 2]; 436 result += base64_chars[((src[0] & 0x3) << 4)]; 437 result += '='; 438 result += '='; 439 src += 1; 440 break; 441 } 442 return result; 443 } 444 445 std::string FileDescriptorToBase64(const FileDescriptor* descriptor) { 446 std::string fdp_bytes; 447 FileDescriptorProto fdp; 448 descriptor->CopyTo(&fdp); 449 fdp.SerializeToString(&fdp_bytes); 450 return StringToBase64(fdp_bytes); 451 } 452 453 FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor, 454 int fieldOrdinal, 455 const Options* options) { 456 switch (descriptor->type()) { 457 case FieldDescriptor::TYPE_GROUP: 458 case FieldDescriptor::TYPE_MESSAGE: 459 if (descriptor->is_repeated()) { 460 if (descriptor->is_map()) { 461 return new MapFieldGenerator(descriptor, fieldOrdinal, options); 462 } else { 463 return new RepeatedMessageFieldGenerator(descriptor, fieldOrdinal, options); 464 } 465 } else { 466 if (IsWrapperType(descriptor)) { 467 if (descriptor->containing_oneof()) { 468 return new WrapperOneofFieldGenerator(descriptor, fieldOrdinal, options); 469 } else { 470 return new WrapperFieldGenerator(descriptor, fieldOrdinal, options); 471 } 472 } else { 473 if (descriptor->containing_oneof()) { 474 return new MessageOneofFieldGenerator(descriptor, fieldOrdinal, options); 475 } else { 476 return new MessageFieldGenerator(descriptor, fieldOrdinal, options); 477 } 478 } 479 } 480 case FieldDescriptor::TYPE_ENUM: 481 if (descriptor->is_repeated()) { 482 return new RepeatedEnumFieldGenerator(descriptor, fieldOrdinal, options); 483 } else { 484 if (descriptor->containing_oneof()) { 485 return new EnumOneofFieldGenerator(descriptor, fieldOrdinal, options); 486 } else { 487 return new EnumFieldGenerator(descriptor, fieldOrdinal, options); 488 } 489 } 490 default: 491 if (descriptor->is_repeated()) { 492 return new RepeatedPrimitiveFieldGenerator(descriptor, fieldOrdinal, options); 493 } else { 494 if (descriptor->containing_oneof()) { 495 return new PrimitiveOneofFieldGenerator(descriptor, fieldOrdinal, options); 496 } else { 497 return new PrimitiveFieldGenerator(descriptor, fieldOrdinal, options); 498 } 499 } 500 } 501 } 502 503 } // namespace csharp 504 } // namespace compiler 505 } // namespace protobuf 506 } // namespace google 507