1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 #ifdef _MSC_VER 32 #include <io.h> 33 #else 34 #include <unistd.h> 35 #endif 36 #include <climits> 37 #include <errno.h> 38 #include <fcntl.h> 39 #include <fstream> 40 #include <iostream> 41 #include <sstream> 42 #include <stdlib.h> 43 #include <vector> 44 45 #include <google/protobuf/stubs/hash.h> 46 #include <google/protobuf/compiler/objectivec/objectivec_helpers.h> 47 #include <google/protobuf/io/coded_stream.h> 48 #include <google/protobuf/io/zero_copy_stream_impl.h> 49 #include <google/protobuf/descriptor.pb.h> 50 #include <google/protobuf/stubs/common.h> 51 #include <google/protobuf/stubs/strutil.h> 52 53 // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some 54 // error cases, so it seems to be ok to use as a back door for errors. 55 56 namespace google { 57 namespace protobuf { 58 namespace compiler { 59 namespace objectivec { 60 61 Options::Options() { 62 // Default is the value of the env for the package prefixes. 63 const char* file_path = getenv("GPB_OBJC_EXPECTED_PACKAGE_PREFIXES"); 64 if (file_path) { 65 expected_prefixes_path = file_path; 66 } 67 } 68 69 namespace { 70 71 hash_set<string> MakeWordsMap(const char* const words[], size_t num_words) { 72 hash_set<string> result; 73 for (int i = 0; i < num_words; i++) { 74 result.insert(words[i]); 75 } 76 return result; 77 } 78 79 const char* const kUpperSegmentsList[] = {"url", "http", "https"}; 80 81 hash_set<string> kUpperSegments = 82 MakeWordsMap(kUpperSegmentsList, GOOGLE_ARRAYSIZE(kUpperSegmentsList)); 83 84 // Internal helper for name handing. 85 // Do not expose this outside of helpers, stick to having functions for specific 86 // cases (ClassName(), FieldName()), so there is always consistent suffix rules. 87 string UnderscoresToCamelCase(const string& input, bool first_capitalized) { 88 vector<string> values; 89 string current; 90 91 bool last_char_was_number = false; 92 bool last_char_was_lower = false; 93 bool last_char_was_upper = false; 94 for (int i = 0; i < input.size(); i++) { 95 char c = input[i]; 96 if (ascii_isdigit(c)) { 97 if (!last_char_was_number) { 98 values.push_back(current); 99 current = ""; 100 } 101 current += c; 102 last_char_was_number = last_char_was_lower = last_char_was_upper = false; 103 last_char_was_number = true; 104 } else if (ascii_islower(c)) { 105 // lowercase letter can follow a lowercase or uppercase letter 106 if (!last_char_was_lower && !last_char_was_upper) { 107 values.push_back(current); 108 current = ""; 109 } 110 current += c; // already lower 111 last_char_was_number = last_char_was_lower = last_char_was_upper = false; 112 last_char_was_lower = true; 113 } else if (ascii_isupper(c)) { 114 if (!last_char_was_upper) { 115 values.push_back(current); 116 current = ""; 117 } 118 current += ascii_tolower(c); 119 last_char_was_number = last_char_was_lower = last_char_was_upper = false; 120 last_char_was_upper = true; 121 } else { 122 last_char_was_number = last_char_was_lower = last_char_was_upper = false; 123 } 124 } 125 values.push_back(current); 126 127 string result; 128 bool first_segment_forces_upper = false; 129 for (vector<string>::iterator i = values.begin(); i != values.end(); ++i) { 130 string value = *i; 131 bool all_upper = (kUpperSegments.count(value) > 0); 132 if (all_upper && (result.length() == 0)) { 133 first_segment_forces_upper = true; 134 } 135 for (int j = 0; j < value.length(); j++) { 136 if (j == 0 || all_upper) { 137 value[j] = ascii_toupper(value[j]); 138 } else { 139 // Nothing, already in lower. 140 } 141 } 142 result += value; 143 } 144 if ((result.length() != 0) && 145 !first_capitalized && 146 !first_segment_forces_upper) { 147 result[0] = ascii_tolower(result[0]); 148 } 149 return result; 150 } 151 152 const char* const kReservedWordList[] = { 153 // Objective C "keywords" that aren't in C 154 // From 155 // http://stackoverflow.com/questions/1873630/reserved-keywords-in-objective-c 156 "id", "_cmd", "super", "in", "out", "inout", "bycopy", "byref", "oneway", 157 "self", 158 159 // C/C++ keywords (Incl C++ 0x11) 160 // From http://en.cppreference.com/w/cpp/keywords 161 "and", "and_eq", "alignas", "alignof", "asm", "auto", "bitand", "bitor", 162 "bool", "break", "case", "catch", "char", "char16_t", "char32_t", "class", 163 "compl", "const", "constexpr", "const_cast", "continue", "decltype", 164 "default", "delete", "double", "dynamic_cast", "else", "enum", "explicit", 165 "export", "extern ", "false", "float", "for", "friend", "goto", "if", 166 "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not", 167 "not_eq", "nullptr", "operator", "or", "or_eq", "private", "protected", 168 "public", "register", "reinterpret_cast", "return", "short", "signed", 169 "sizeof", "static", "static_assert", "static_cast", "struct", "switch", 170 "template", "this", "thread_local", "throw", "true", "try", "typedef", 171 "typeid", "typename", "union", "unsigned", "using", "virtual", "void", 172 "volatile", "wchar_t", "while", "xor", "xor_eq", 173 174 // C99 keywords 175 // From 176 // http://publib.boulder.ibm.com/infocenter/lnxpcomp/v8v101/index.jsp?topic=%2Fcom.ibm.xlcpp8l.doc%2Flanguage%2Fref%2Fkeyw.htm 177 "restrict", 178 179 // Objective-C Runtime typedefs 180 // From <obc/runtime.h> 181 "Category", "Ivar", "Method", "Protocol", 182 183 // NSObject Methods 184 // new is covered by C++ keywords. 185 "description", "debugDescription", "finalize", "hash", "dealloc", "init", 186 "class", "superclass", "retain", "release", "autorelease", "retainCount", 187 "zone", "isProxy", "copy", "mutableCopy", "classForCoder", 188 189 // GPBMessage Methods 190 // Only need to add instance methods that may conflict with 191 // method declared in protos. The main cases are methods 192 // that take no arguments, or setFoo:/hasFoo: type methods. 193 "clear", "data", "delimitedData", "descriptor", "extensionRegistry", 194 "extensionsCurrentlySet", "isInitialized", "serializedSize", 195 "sortedExtensionsInUse", "unknownFields", 196 197 // MacTypes.h names 198 "Fixed", "Fract", "Size", "LogicalAddress", "PhysicalAddress", "ByteCount", 199 "ByteOffset", "Duration", "AbsoluteTime", "OptionBits", "ItemCount", 200 "PBVersion", "ScriptCode", "LangCode", "RegionCode", "OSType", 201 "ProcessSerialNumber", "Point", "Rect", "FixedPoint", "FixedRect", "Style", 202 "StyleParameter", "StyleField", "TimeScale", "TimeBase", "TimeRecord", 203 }; 204 205 hash_set<string> kReservedWords = 206 MakeWordsMap(kReservedWordList, GOOGLE_ARRAYSIZE(kReservedWordList)); 207 208 string SanitizeNameForObjC(const string& input, const string& extension) { 209 if (kReservedWords.count(input) > 0) { 210 return input + extension; 211 } 212 return input; 213 } 214 215 string NameFromFieldDescriptor(const FieldDescriptor* field) { 216 if (field->type() == FieldDescriptor::TYPE_GROUP) { 217 return field->message_type()->name(); 218 } else { 219 return field->name(); 220 } 221 } 222 223 void PathSplit(const string& path, string* directory, string* basename) { 224 string::size_type last_slash = path.rfind('/'); 225 if (last_slash == string::npos) { 226 if (directory) { 227 *directory = ""; 228 } 229 if (basename) { 230 *basename = path; 231 } 232 } else { 233 if (directory) { 234 *directory = path.substr(0, last_slash); 235 } 236 if (basename) { 237 *basename = path.substr(last_slash + 1); 238 } 239 } 240 } 241 242 bool IsSpecialName(const string& name, const string* special_names, 243 size_t count) { 244 for (size_t i = 0; i < count; ++i) { 245 size_t length = special_names[i].length(); 246 if (name.compare(0, length, special_names[i]) == 0) { 247 if (name.length() > length) { 248 // If name is longer than the retained_name[i] that it matches 249 // the next character must be not lower case (newton vs newTon vs 250 // new_ton). 251 return !ascii_islower(name[length]); 252 } else { 253 return true; 254 } 255 } 256 } 257 return false; 258 } 259 260 } // namespace 261 262 // Escape C++ trigraphs by escaping question marks to \? 263 string EscapeTrigraphs(const string& to_escape) { 264 return StringReplace(to_escape, "?", "\\?", true); 265 } 266 267 string StripProto(const string& filename) { 268 if (HasSuffixString(filename, ".protodevel")) { 269 return StripSuffixString(filename, ".protodevel"); 270 } else { 271 return StripSuffixString(filename, ".proto"); 272 } 273 } 274 275 bool IsRetainedName(const string& name) { 276 // List of prefixes from 277 // http://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/MemoryMgmt/Articles/mmRules.html 278 static const string retained_names[] = {"new", "alloc", "copy", 279 "mutableCopy"}; 280 return IsSpecialName(name, retained_names, 281 sizeof(retained_names) / sizeof(retained_names[0])); 282 } 283 284 bool IsInitName(const string& name) { 285 static const string init_names[] = {"init"}; 286 return IsSpecialName(name, init_names, 287 sizeof(init_names) / sizeof(init_names[0])); 288 } 289 290 string BaseFileName(const FileDescriptor* file) { 291 string basename; 292 PathSplit(file->name(), NULL, &basename); 293 return basename; 294 } 295 296 string FileName(const FileDescriptor* file) { 297 string path = FilePath(file); 298 string basename; 299 PathSplit(path, NULL, &basename); 300 return basename; 301 } 302 303 string FilePath(const FileDescriptor* file) { 304 string output; 305 string basename; 306 string directory; 307 PathSplit(file->name(), &directory, &basename); 308 if (directory.length() > 0) { 309 output = directory + "/"; 310 } 311 basename = StripProto(basename); 312 313 // CamelCase to be more ObjC friendly. 314 basename = UnderscoresToCamelCase(basename, true); 315 316 output += basename; 317 return output; 318 } 319 320 string FileClassPrefix(const FileDescriptor* file) { 321 // Default is empty string, no need to check has_objc_class_prefix. 322 string result = file->options().objc_class_prefix(); 323 return result; 324 } 325 326 string FileClassName(const FileDescriptor* file) { 327 string name = FileClassPrefix(file); 328 name += UnderscoresToCamelCase(StripProto(BaseFileName(file)), true); 329 name += "Root"; 330 // There aren't really any reserved words that end in "Root", but playing 331 // it safe and checking. 332 return SanitizeNameForObjC(name, "_RootClass"); 333 } 334 335 string ClassNameWorker(const Descriptor* descriptor) { 336 string name; 337 if (descriptor->containing_type() != NULL) { 338 name = ClassNameWorker(descriptor->containing_type()); 339 name += "_"; 340 } 341 return name + descriptor->name(); 342 } 343 344 string ClassNameWorker(const EnumDescriptor* descriptor) { 345 string name; 346 if (descriptor->containing_type() != NULL) { 347 name = ClassNameWorker(descriptor->containing_type()); 348 name += "_"; 349 } 350 return name + descriptor->name(); 351 } 352 353 string ClassName(const Descriptor* descriptor) { 354 // 1. Message names are used as is (style calls for CamelCase, trust it). 355 // 2. Check for reserved word at the very end and then suffix things. 356 string prefix = FileClassPrefix(descriptor->file()); 357 string name = ClassNameWorker(descriptor); 358 return SanitizeNameForObjC(prefix + name, "_Class"); 359 } 360 361 string EnumName(const EnumDescriptor* descriptor) { 362 // 1. Enum names are used as is (style calls for CamelCase, trust it). 363 // 2. Check for reserved word at the every end and then suffix things. 364 // message Fixed { 365 // message Size {...} 366 // enum Mumble {...} 367 // ... 368 // } 369 // yields Fixed_Class, Fixed_Size. 370 string name = FileClassPrefix(descriptor->file()); 371 name += ClassNameWorker(descriptor); 372 return SanitizeNameForObjC(name, "_Enum"); 373 } 374 375 string EnumValueName(const EnumValueDescriptor* descriptor) { 376 // Because of the Switch enum compatibility, the name on the enum has to have 377 // the suffix handing, so it slightly diverges from how nested classes work. 378 // enum Fixed { 379 // FOO = 1 380 // } 381 // yields Fixed_Enum and Fixed_Enum_Foo (not Fixed_Foo). 382 const string& class_name = EnumName(descriptor->type()); 383 const string& value_str = UnderscoresToCamelCase(descriptor->name(), true); 384 const string& name = class_name + "_" + value_str; 385 // There aren't really any reserved words with an underscore and a leading 386 // capital letter, but playing it safe and checking. 387 return SanitizeNameForObjC(name, "_Value"); 388 } 389 390 string EnumValueShortName(const EnumValueDescriptor* descriptor) { 391 // Enum value names (EnumValueName above) are the enum name turned into 392 // a class name and then the value name is CamelCased and concatenated; the 393 // whole thing then gets sanitized for reserved words. 394 // The "short name" is intended to be the final leaf, the value name; but 395 // you can't simply send that off to sanitize as that could result in it 396 // getting modified when the full name didn't. For example enum 397 // "StorageModes" has a value "retain". So the full name is 398 // "StorageModes_Retain", but if we sanitize "retain" it would become 399 // "RetainValue". 400 // So the right way to get the short name is to take the full enum name 401 // and then strip off the enum name (leaving the value name and anything 402 // done by sanitize). 403 const string& class_name = EnumName(descriptor->type()); 404 const string& long_name_prefix = class_name + "_"; 405 const string& long_name = EnumValueName(descriptor); 406 return StripPrefixString(long_name, long_name_prefix); 407 } 408 409 string UnCamelCaseEnumShortName(const string& name) { 410 string result; 411 for (int i = 0; i < name.size(); i++) { 412 char c = name[i]; 413 if (i > 0 && ascii_isupper(c)) { 414 result += '_'; 415 } 416 result += ascii_toupper(c); 417 } 418 return result; 419 } 420 421 string ExtensionMethodName(const FieldDescriptor* descriptor) { 422 const string& name = NameFromFieldDescriptor(descriptor); 423 const string& result = UnderscoresToCamelCase(name, false); 424 return SanitizeNameForObjC(result, "_Extension"); 425 } 426 427 string FieldName(const FieldDescriptor* field) { 428 const string& name = NameFromFieldDescriptor(field); 429 string result = UnderscoresToCamelCase(name, false); 430 if (field->is_repeated() && !field->is_map()) { 431 // Add "Array" before do check for reserved worlds. 432 result += "Array"; 433 } else { 434 // If it wasn't repeated, but ends in "Array", force on the _p suffix. 435 if (HasSuffixString(result, "Array")) { 436 result += "_p"; 437 } 438 } 439 return SanitizeNameForObjC(result, "_p"); 440 } 441 442 string FieldNameCapitalized(const FieldDescriptor* field) { 443 // Want the same suffix handling, so upcase the first letter of the other 444 // name. 445 string result = FieldName(field); 446 if (result.length() > 0) { 447 result[0] = ascii_toupper(result[0]); 448 } 449 return result; 450 } 451 452 string OneofEnumName(const OneofDescriptor* descriptor) { 453 const Descriptor* fieldDescriptor = descriptor->containing_type(); 454 string name = ClassName(fieldDescriptor); 455 name += "_" + UnderscoresToCamelCase(descriptor->name(), true) + "_OneOfCase"; 456 // No sanitize needed because the OS never has names that end in _OneOfCase. 457 return name; 458 } 459 460 string OneofName(const OneofDescriptor* descriptor) { 461 string name = UnderscoresToCamelCase(descriptor->name(), false); 462 // No sanitize needed because it gets OneOfCase added and that shouldn't 463 // ever conflict. 464 return name; 465 } 466 467 string OneofNameCapitalized(const OneofDescriptor* descriptor) { 468 // Use the common handling and then up-case the first letter. 469 string result = OneofName(descriptor); 470 if (result.length() > 0) { 471 result[0] = ascii_toupper(result[0]); 472 } 473 return result; 474 } 475 476 string UnCamelCaseFieldName(const string& name, const FieldDescriptor* field) { 477 string worker(name); 478 if (HasSuffixString(worker, "_p")) { 479 worker = StripSuffixString(worker, "_p"); 480 } 481 if (field->is_repeated() && HasSuffixString(worker, "Array")) { 482 worker = StripSuffixString(worker, "Array"); 483 } 484 if (field->type() == FieldDescriptor::TYPE_GROUP) { 485 if (worker.length() > 0) { 486 if (ascii_islower(worker[0])) { 487 worker[0] = ascii_toupper(worker[0]); 488 } 489 } 490 return worker; 491 } else { 492 string result; 493 for (int i = 0; i < worker.size(); i++) { 494 char c = worker[i]; 495 if (ascii_isupper(c)) { 496 if (i > 0) { 497 result += '_'; 498 } 499 result += ascii_tolower(c); 500 } else { 501 result += c; 502 } 503 } 504 return result; 505 } 506 } 507 508 string GetCapitalizedType(const FieldDescriptor* field) { 509 switch (field->type()) { 510 case FieldDescriptor::TYPE_INT32: 511 return "Int32"; 512 case FieldDescriptor::TYPE_UINT32: 513 return "UInt32"; 514 case FieldDescriptor::TYPE_SINT32: 515 return "SInt32"; 516 case FieldDescriptor::TYPE_FIXED32: 517 return "Fixed32"; 518 case FieldDescriptor::TYPE_SFIXED32: 519 return "SFixed32"; 520 case FieldDescriptor::TYPE_INT64: 521 return "Int64"; 522 case FieldDescriptor::TYPE_UINT64: 523 return "UInt64"; 524 case FieldDescriptor::TYPE_SINT64: 525 return "SInt64"; 526 case FieldDescriptor::TYPE_FIXED64: 527 return "Fixed64"; 528 case FieldDescriptor::TYPE_SFIXED64: 529 return "SFixed64"; 530 case FieldDescriptor::TYPE_FLOAT: 531 return "Float"; 532 case FieldDescriptor::TYPE_DOUBLE: 533 return "Double"; 534 case FieldDescriptor::TYPE_BOOL: 535 return "Bool"; 536 case FieldDescriptor::TYPE_STRING: 537 return "String"; 538 case FieldDescriptor::TYPE_BYTES: 539 return "Bytes"; 540 case FieldDescriptor::TYPE_ENUM: 541 return "Enum"; 542 case FieldDescriptor::TYPE_GROUP: 543 return "Group"; 544 case FieldDescriptor::TYPE_MESSAGE: 545 return "Message"; 546 } 547 548 // Some compilers report reaching end of function even though all cases of 549 // the enum are handed in the switch. 550 GOOGLE_LOG(FATAL) << "Can't get here."; 551 return NULL; 552 } 553 554 ObjectiveCType GetObjectiveCType(FieldDescriptor::Type field_type) { 555 switch (field_type) { 556 case FieldDescriptor::TYPE_INT32: 557 case FieldDescriptor::TYPE_SINT32: 558 case FieldDescriptor::TYPE_SFIXED32: 559 return OBJECTIVECTYPE_INT32; 560 561 case FieldDescriptor::TYPE_UINT32: 562 case FieldDescriptor::TYPE_FIXED32: 563 return OBJECTIVECTYPE_UINT32; 564 565 case FieldDescriptor::TYPE_INT64: 566 case FieldDescriptor::TYPE_SINT64: 567 case FieldDescriptor::TYPE_SFIXED64: 568 return OBJECTIVECTYPE_INT64; 569 570 case FieldDescriptor::TYPE_UINT64: 571 case FieldDescriptor::TYPE_FIXED64: 572 return OBJECTIVECTYPE_UINT64; 573 574 case FieldDescriptor::TYPE_FLOAT: 575 return OBJECTIVECTYPE_FLOAT; 576 577 case FieldDescriptor::TYPE_DOUBLE: 578 return OBJECTIVECTYPE_DOUBLE; 579 580 case FieldDescriptor::TYPE_BOOL: 581 return OBJECTIVECTYPE_BOOLEAN; 582 583 case FieldDescriptor::TYPE_STRING: 584 return OBJECTIVECTYPE_STRING; 585 586 case FieldDescriptor::TYPE_BYTES: 587 return OBJECTIVECTYPE_DATA; 588 589 case FieldDescriptor::TYPE_ENUM: 590 return OBJECTIVECTYPE_ENUM; 591 592 case FieldDescriptor::TYPE_GROUP: 593 case FieldDescriptor::TYPE_MESSAGE: 594 return OBJECTIVECTYPE_MESSAGE; 595 } 596 597 // Some compilers report reaching end of function even though all cases of 598 // the enum are handed in the switch. 599 GOOGLE_LOG(FATAL) << "Can't get here."; 600 return OBJECTIVECTYPE_INT32; 601 } 602 603 bool IsPrimitiveType(const FieldDescriptor* field) { 604 ObjectiveCType type = GetObjectiveCType(field); 605 switch (type) { 606 case OBJECTIVECTYPE_INT32: 607 case OBJECTIVECTYPE_UINT32: 608 case OBJECTIVECTYPE_INT64: 609 case OBJECTIVECTYPE_UINT64: 610 case OBJECTIVECTYPE_FLOAT: 611 case OBJECTIVECTYPE_DOUBLE: 612 case OBJECTIVECTYPE_BOOLEAN: 613 case OBJECTIVECTYPE_ENUM: 614 return true; 615 break; 616 default: 617 return false; 618 } 619 } 620 621 bool IsReferenceType(const FieldDescriptor* field) { 622 return !IsPrimitiveType(field); 623 } 624 625 static string HandleExtremeFloatingPoint(string val, bool add_float_suffix) { 626 if (val == "nan") { 627 return "NAN"; 628 } else if (val == "inf") { 629 return "INFINITY"; 630 } else if (val == "-inf") { 631 return "-INFINITY"; 632 } else { 633 // float strings with ., e or E need to have f appended 634 if (add_float_suffix && 635 (val.find(".") != string::npos || val.find("e") != string::npos || 636 val.find("E") != string::npos)) { 637 val += "f"; 638 } 639 return val; 640 } 641 } 642 643 string GPBGenericValueFieldName(const FieldDescriptor* field) { 644 // Returns the field within the GPBGenericValue union to use for the given 645 // field. 646 if (field->is_repeated()) { 647 return "valueMessage"; 648 } 649 switch (field->cpp_type()) { 650 case FieldDescriptor::CPPTYPE_INT32: 651 return "valueInt32"; 652 case FieldDescriptor::CPPTYPE_UINT32: 653 return "valueUInt32"; 654 case FieldDescriptor::CPPTYPE_INT64: 655 return "valueInt64"; 656 case FieldDescriptor::CPPTYPE_UINT64: 657 return "valueUInt64"; 658 case FieldDescriptor::CPPTYPE_FLOAT: 659 return "valueFloat"; 660 case FieldDescriptor::CPPTYPE_DOUBLE: 661 return "valueDouble"; 662 case FieldDescriptor::CPPTYPE_BOOL: 663 return "valueBool"; 664 case FieldDescriptor::CPPTYPE_STRING: 665 if (field->type() == FieldDescriptor::TYPE_BYTES) { 666 return "valueData"; 667 } else { 668 return "valueString"; 669 } 670 case FieldDescriptor::CPPTYPE_ENUM: 671 return "valueEnum"; 672 case FieldDescriptor::CPPTYPE_MESSAGE: 673 return "valueMessage"; 674 } 675 676 // Some compilers report reaching end of function even though all cases of 677 // the enum are handed in the switch. 678 GOOGLE_LOG(FATAL) << "Can't get here."; 679 return NULL; 680 } 681 682 683 string DefaultValue(const FieldDescriptor* field) { 684 // Repeated fields don't have defaults. 685 if (field->is_repeated()) { 686 return "nil"; 687 } 688 689 // Switch on cpp_type since we need to know which default_value_* method 690 // of FieldDescriptor to call. 691 switch (field->cpp_type()) { 692 case FieldDescriptor::CPPTYPE_INT32: 693 // gcc and llvm reject the decimal form of kint32min and kint64min. 694 if (field->default_value_int32() == INT_MIN) { 695 return "-0x80000000"; 696 } 697 return SimpleItoa(field->default_value_int32()); 698 case FieldDescriptor::CPPTYPE_UINT32: 699 return SimpleItoa(field->default_value_uint32()) + "U"; 700 case FieldDescriptor::CPPTYPE_INT64: 701 // gcc and llvm reject the decimal form of kint32min and kint64min. 702 if (field->default_value_int64() == LLONG_MIN) { 703 return "-0x8000000000000000LL"; 704 } 705 return SimpleItoa(field->default_value_int64()) + "LL"; 706 case FieldDescriptor::CPPTYPE_UINT64: 707 return SimpleItoa(field->default_value_uint64()) + "ULL"; 708 case FieldDescriptor::CPPTYPE_DOUBLE: 709 return HandleExtremeFloatingPoint( 710 SimpleDtoa(field->default_value_double()), false); 711 case FieldDescriptor::CPPTYPE_FLOAT: 712 return HandleExtremeFloatingPoint( 713 SimpleFtoa(field->default_value_float()), true); 714 case FieldDescriptor::CPPTYPE_BOOL: 715 return field->default_value_bool() ? "YES" : "NO"; 716 case FieldDescriptor::CPPTYPE_STRING: { 717 const bool has_default_value = field->has_default_value(); 718 const string& default_string = field->default_value_string(); 719 if (!has_default_value || default_string.length() == 0) { 720 // If the field is defined as being the empty string, 721 // then we will just assign to nil, as the empty string is the 722 // default for both strings and data. 723 return "nil"; 724 } 725 if (field->type() == FieldDescriptor::TYPE_BYTES) { 726 // We want constant fields in our data structures so we can 727 // declare them as static. To achieve this we cheat and stuff 728 // a escaped c string (prefixed with a length) into the data 729 // field, and cast it to an (NSData*) so it will compile. 730 // The runtime library knows how to handle it. 731 732 // Must convert to a standard byte order for packing length into 733 // a cstring. 734 uint32 length = ghtonl(default_string.length()); 735 string bytes((const char*)&length, sizeof(length)); 736 bytes.append(default_string); 737 return "(NSData*)\"" + EscapeTrigraphs(CEscape(bytes)) + "\""; 738 } else { 739 return "@\"" + EscapeTrigraphs(CEscape(default_string)) + "\""; 740 } 741 } 742 case FieldDescriptor::CPPTYPE_ENUM: 743 return EnumValueName(field->default_value_enum()); 744 case FieldDescriptor::CPPTYPE_MESSAGE: 745 return "nil"; 746 } 747 748 // Some compilers report reaching end of function even though all cases of 749 // the enum are handed in the switch. 750 GOOGLE_LOG(FATAL) << "Can't get here."; 751 return NULL; 752 } 753 754 bool HasNonZeroDefaultValue(const FieldDescriptor* field) { 755 // Repeated fields don't have defaults. 756 if (field->is_repeated()) { 757 return false; 758 } 759 760 // As much as checking field->has_default_value() seems useful, it isn't 761 // because of enums. proto2 syntax allows the first item in an enum (the 762 // default) to be non zero. So checking field->has_default_value() would 763 // result in missing this non zero default. See MessageWithOneBasedEnum in 764 // objectivec/Tests/unittest_objc.proto for a test Message to confirm this. 765 766 // Some proto file set the default to the zero value, so make sure the value 767 // isn't the zero case. 768 switch (field->cpp_type()) { 769 case FieldDescriptor::CPPTYPE_INT32: 770 return field->default_value_int32() != 0; 771 case FieldDescriptor::CPPTYPE_UINT32: 772 return field->default_value_uint32() != 0U; 773 case FieldDescriptor::CPPTYPE_INT64: 774 return field->default_value_int64() != 0LL; 775 case FieldDescriptor::CPPTYPE_UINT64: 776 return field->default_value_uint64() != 0ULL; 777 case FieldDescriptor::CPPTYPE_DOUBLE: 778 return field->default_value_double() != 0.0; 779 case FieldDescriptor::CPPTYPE_FLOAT: 780 return field->default_value_float() != 0.0f; 781 case FieldDescriptor::CPPTYPE_BOOL: 782 return field->default_value_bool(); 783 case FieldDescriptor::CPPTYPE_STRING: { 784 const string& default_string = field->default_value_string(); 785 return default_string.length() != 0; 786 } 787 case FieldDescriptor::CPPTYPE_ENUM: 788 return field->default_value_enum()->number() != 0; 789 case FieldDescriptor::CPPTYPE_MESSAGE: 790 return false; 791 } 792 793 // Some compilers report reaching end of function even though all cases of 794 // the enum are handed in the switch. 795 GOOGLE_LOG(FATAL) << "Can't get here."; 796 return false; 797 } 798 799 string BuildFlagsString(const vector<string>& strings) { 800 if (strings.size() == 0) { 801 return "0"; 802 } 803 string string; 804 for (size_t i = 0; i != strings.size(); ++i) { 805 if (i > 0) { 806 string.append(" | "); 807 } 808 string.append(strings[i]); 809 } 810 return string; 811 } 812 813 string BuildCommentsString(const SourceLocation& location) { 814 const string& comments = location.leading_comments.empty() 815 ? location.trailing_comments 816 : location.leading_comments; 817 vector<string> lines; 818 SplitStringAllowEmpty(comments, "\n", &lines); 819 while (!lines.empty() && lines.back().empty()) { 820 lines.pop_back(); 821 } 822 string prefix("///"); 823 string suffix("\n"); 824 string final_comments; 825 for (int i = 0; i < lines.size(); i++) { 826 // HeaderDoc uses '\' and '@' for markers; escape them. 827 const string line = StringReplace(lines[i], "\\", "\\\\", true); 828 final_comments += 829 prefix + StringReplace(line, "@", "\\@", true) + suffix; 830 } 831 return final_comments; 832 } 833 834 namespace { 835 836 // Internal helper class that parses the expected package to prefix mappings 837 // file. 838 class Parser { 839 public: 840 Parser(map<string, string>* inout_package_to_prefix_map) 841 : prefix_map_(inout_package_to_prefix_map), line_(0) {} 842 843 // Parses a check of input, returning success/failure. 844 bool ParseChunk(StringPiece chunk); 845 846 // Should be called to finish parsing (after all input has been provided via 847 // ParseChunk()). Returns success/failure. 848 bool Finish(); 849 850 int last_line() const { return line_; } 851 string error_str() const { return error_str_; } 852 853 private: 854 bool ParseLoop(); 855 856 map<string, string>* prefix_map_; 857 int line_; 858 string error_str_; 859 StringPiece p_; 860 string leftover_; 861 }; 862 863 bool Parser::ParseChunk(StringPiece chunk) { 864 if (!leftover_.empty()) { 865 chunk.AppendToString(&leftover_); 866 p_ = StringPiece(leftover_); 867 } else { 868 p_ = chunk; 869 } 870 bool result = ParseLoop(); 871 if (p_.empty()) { 872 leftover_.clear(); 873 } else { 874 leftover_ = p_.ToString(); 875 } 876 return result; 877 } 878 879 bool Parser::Finish() { 880 if (leftover_.empty()) { 881 return true; 882 } 883 // Force a newline onto the end to finish parsing. 884 p_ = StringPiece(leftover_ + "\n"); 885 if (!ParseLoop()) { 886 return false; 887 } 888 return p_.empty(); // Everything used? 889 } 890 891 static bool ascii_isnewline(char c) { return c == '\n' || c == '\r'; } 892 893 bool ReadLine(StringPiece* input, StringPiece* line) { 894 for (int len = 0; len < input->size(); ++len) { 895 if (ascii_isnewline((*input)[len])) { 896 *line = StringPiece(input->data(), len); 897 ++len; // advance over the newline 898 *input = StringPiece(input->data() + len, input->size() - len); 899 return true; 900 } 901 } 902 return false; // Ran out of input with no newline. 903 } 904 905 void TrimWhitespace(StringPiece* input) { 906 while (!input->empty() && ascii_isspace(*input->data())) { 907 input->remove_prefix(1); 908 } 909 while (!input->empty() && ascii_isspace((*input)[input->length() - 1])) { 910 input->remove_suffix(1); 911 } 912 } 913 914 void RemoveComment(StringPiece* input) { 915 int offset = input->find('#'); 916 if (offset != StringPiece::npos) { 917 input->remove_suffix(input->length() - offset); 918 } 919 } 920 921 bool Parser::ParseLoop() { 922 StringPiece line; 923 while (ReadLine(&p_, &line)) { 924 ++line_; 925 RemoveComment(&line); 926 TrimWhitespace(&line); 927 if (line.size() == 0) { 928 continue; // Blank line. 929 } 930 int offset = line.find('='); 931 if (offset == StringPiece::npos) { 932 error_str_ = 933 string("Line without equal sign: '") + line.ToString() + "'."; 934 return false; 935 } 936 StringPiece package(line, 0, offset); 937 StringPiece prefix(line, offset + 1, line.length() - offset - 1); 938 TrimWhitespace(&package); 939 TrimWhitespace(&prefix); 940 // Don't really worry about error checking the package/prefix for 941 // being valid. Assume the file is validated when it is created/edited. 942 (*prefix_map_)[package.ToString()] = prefix.ToString(); 943 } 944 return true; 945 } 946 947 bool LoadExpectedPackagePrefixes(const Options &generation_options, 948 map<string, string>* prefix_map, 949 string* out_error) { 950 if (generation_options.expected_prefixes_path.empty()) { 951 return true; 952 } 953 954 int fd; 955 do { 956 fd = open(generation_options.expected_prefixes_path.c_str(), O_RDONLY); 957 } while (fd < 0 && errno == EINTR); 958 if (fd < 0) { 959 *out_error = 960 string("error: Unable to open \"") + 961 generation_options.expected_prefixes_path + 962 "\", " + strerror(errno); 963 return false; 964 } 965 io::FileInputStream file_stream(fd); 966 file_stream.SetCloseOnDelete(true); 967 968 Parser parser(prefix_map); 969 const void* buf; 970 int buf_len; 971 while (file_stream.Next(&buf, &buf_len)) { 972 if (buf_len == 0) { 973 continue; 974 } 975 976 if (!parser.ParseChunk(StringPiece(static_cast<const char*>(buf), buf_len))) { 977 *out_error = 978 string("error: ") + generation_options.expected_prefixes_path + 979 " Line " + SimpleItoa(parser.last_line()) + ", " + parser.error_str(); 980 return false; 981 } 982 } 983 return parser.Finish(); 984 } 985 986 } // namespace 987 988 bool ValidateObjCClassPrefix(const FileDescriptor* file, 989 const Options& generation_options, 990 string* out_error) { 991 const string prefix = file->options().objc_class_prefix(); 992 const string package = file->package(); 993 994 // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some 995 // error cases, so it seems to be ok to use as a back door for warnings. 996 997 // Load any expected package prefixes to validate against those. 998 map<string, string> expected_package_prefixes; 999 if (!LoadExpectedPackagePrefixes(generation_options, 1000 &expected_package_prefixes, 1001 out_error)) { 1002 return false; 1003 } 1004 1005 // Check: Error - See if there was an expected prefix for the package and 1006 // report if it doesn't match (wrong or missing). 1007 map<string, string>::iterator package_match = 1008 expected_package_prefixes.find(package); 1009 if (package_match != expected_package_prefixes.end()) { 1010 // There was an entry, and... 1011 if (package_match->second == prefix) { 1012 // ...it matches. All good, out of here! 1013 return true; 1014 } else { 1015 // ...it didn't match! 1016 *out_error = "error: Expected 'option objc_class_prefix = \"" + 1017 package_match->second + "\";' for package '" + package + 1018 "' in '" + file->name() + "'"; 1019 if (prefix.length()) { 1020 *out_error += "; but found '" + prefix + "' instead"; 1021 } 1022 *out_error += "."; 1023 return false; 1024 } 1025 } 1026 1027 // If there was no prefix option, we're done at this point. 1028 if (prefix.length() == 0) { 1029 // No prefix, nothing left to check. 1030 return true; 1031 } 1032 1033 // Check: Error - Make sure the prefix wasn't expected for a different 1034 // package (overlap is allowed, but it has to be listed as an expected 1035 // overlap). 1036 for (map<string, string>::iterator i = expected_package_prefixes.begin(); 1037 i != expected_package_prefixes.end(); ++i) { 1038 if (i->second == prefix) { 1039 *out_error = 1040 "error: Found 'option objc_class_prefix = \"" + prefix + 1041 "\";' in '" + file->name() + 1042 "'; that prefix is already used for 'package " + i->first + 1043 ";'. It can only be reused by listing it in the expected file (" + 1044 generation_options.expected_prefixes_path + ")."; 1045 return false; // Only report first usage of the prefix. 1046 } 1047 } 1048 1049 // Check: Warning - Make sure the prefix is is a reasonable value according 1050 // to Apple's rules (the checks above implicitly whitelist anything that 1051 // doesn't meet these rules). 1052 if (!ascii_isupper(prefix[0])) { 1053 cerr << endl 1054 << "protoc:0: warning: Invalid 'option objc_class_prefix = \"" 1055 << prefix << "\";' in '" << file->name() << "';" 1056 << " it should start with a capital letter." << endl; 1057 cerr.flush(); 1058 } 1059 if (prefix.length() < 3) { 1060 // Apple reserves 2 character prefixes for themselves. They do use some 1061 // 3 character prefixes, but they haven't updated the rules/docs. 1062 cerr << endl 1063 << "protoc:0: warning: Invalid 'option objc_class_prefix = \"" 1064 << prefix << "\";' in '" << file->name() << "';" 1065 << " Apple recommends they should be at least 3 characters long." 1066 << endl; 1067 cerr.flush(); 1068 } 1069 1070 // Check: Warning - If the given package/prefix pair wasn't expected, issue a 1071 // warning issue a warning suggesting it gets added to the file. 1072 if (!expected_package_prefixes.empty()) { 1073 cerr << endl 1074 << "protoc:0: warning: Found unexpected 'option objc_class_prefix = \"" 1075 << prefix << "\";' in '" << file->name() << "';" 1076 << " consider adding it to the expected prefixes file (" 1077 << generation_options.expected_prefixes_path << ")." << endl; 1078 cerr.flush(); 1079 } 1080 1081 return true; 1082 } 1083 1084 void TextFormatDecodeData::AddString(int32 key, 1085 const string& input_for_decode, 1086 const string& desired_output) { 1087 for (vector<DataEntry>::const_iterator i = entries_.begin(); 1088 i != entries_.end(); ++i) { 1089 if (i->first == key) { 1090 cerr << "error: duplicate key (" << key 1091 << ") making TextFormat data, input: \"" << input_for_decode 1092 << "\", desired: \"" << desired_output << "\"." << endl; 1093 cerr.flush(); 1094 abort(); 1095 } 1096 } 1097 1098 const string& data = TextFormatDecodeData::DecodeDataForString( 1099 input_for_decode, desired_output); 1100 entries_.push_back(DataEntry(key, data)); 1101 } 1102 1103 string TextFormatDecodeData::Data() const { 1104 ostringstream data_stringstream; 1105 1106 if (num_entries() > 0) { 1107 io::OstreamOutputStream data_outputstream(&data_stringstream); 1108 io::CodedOutputStream output_stream(&data_outputstream); 1109 1110 output_stream.WriteVarint32(num_entries()); 1111 for (vector<DataEntry>::const_iterator i = entries_.begin(); 1112 i != entries_.end(); ++i) { 1113 output_stream.WriteVarint32(i->first); 1114 output_stream.WriteString(i->second); 1115 } 1116 } 1117 1118 data_stringstream.flush(); 1119 return data_stringstream.str(); 1120 } 1121 1122 namespace { 1123 1124 // Helper to build up the decode data for a string. 1125 class DecodeDataBuilder { 1126 public: 1127 DecodeDataBuilder() { Reset(); } 1128 1129 bool AddCharacter(const char desired, const char input); 1130 void AddUnderscore() { 1131 Push(); 1132 need_underscore_ = true; 1133 } 1134 string Finish() { 1135 Push(); 1136 return decode_data_; 1137 } 1138 1139 private: 1140 static const uint8 kAddUnderscore = 0x80; 1141 1142 static const uint8 kOpAsIs = 0x00; 1143 static const uint8 kOpFirstUpper = 0x40; 1144 static const uint8 kOpFirstLower = 0x20; 1145 static const uint8 kOpAllUpper = 0x60; 1146 1147 static const int kMaxSegmentLen = 0x1f; 1148 1149 void AddChar(const char desired) { 1150 ++segment_len_; 1151 is_all_upper_ &= ascii_isupper(desired); 1152 } 1153 1154 void Push() { 1155 uint8 op = (op_ | segment_len_); 1156 if (need_underscore_) op |= kAddUnderscore; 1157 if (op != 0) { 1158 decode_data_ += (char)op; 1159 } 1160 Reset(); 1161 } 1162 1163 bool AddFirst(const char desired, const char input) { 1164 if (desired == input) { 1165 op_ = kOpAsIs; 1166 } else if (desired == ascii_toupper(input)) { 1167 op_ = kOpFirstUpper; 1168 } else if (desired == ascii_tolower(input)) { 1169 op_ = kOpFirstLower; 1170 } else { 1171 // Can't be transformed to match. 1172 return false; 1173 } 1174 AddChar(desired); 1175 return true; 1176 } 1177 1178 void Reset() { 1179 need_underscore_ = false; 1180 op_ = 0; 1181 segment_len_ = 0; 1182 is_all_upper_ = true; 1183 } 1184 1185 bool need_underscore_; 1186 bool is_all_upper_; 1187 uint8 op_; 1188 int segment_len_; 1189 1190 string decode_data_; 1191 }; 1192 1193 bool DecodeDataBuilder::AddCharacter(const char desired, const char input) { 1194 // If we've hit the max size, push to start a new segment. 1195 if (segment_len_ == kMaxSegmentLen) { 1196 Push(); 1197 } 1198 if (segment_len_ == 0) { 1199 return AddFirst(desired, input); 1200 } 1201 1202 // Desired and input match... 1203 if (desired == input) { 1204 // If we aren't transforming it, or we're upper casing it and it is 1205 // supposed to be uppercase; just add it to the segment. 1206 if ((op_ != kOpAllUpper) || ascii_isupper(desired)) { 1207 AddChar(desired); 1208 return true; 1209 } 1210 1211 // Add the current segment, and start the next one. 1212 Push(); 1213 return AddFirst(desired, input); 1214 } 1215 1216 // If we need to uppercase, and everything so far has been uppercase, 1217 // promote op to AllUpper. 1218 if ((desired == ascii_toupper(input)) && is_all_upper_) { 1219 op_ = kOpAllUpper; 1220 AddChar(desired); 1221 return true; 1222 } 1223 1224 // Give up, push and start a new segment. 1225 Push(); 1226 return AddFirst(desired, input); 1227 } 1228 1229 // If decode data can't be generated, a directive for the raw string 1230 // is used instead. 1231 string DirectDecodeString(const string& str) { 1232 string result; 1233 result += (char)'\0'; // Marker for full string. 1234 result += str; 1235 result += (char)'\0'; // End of string. 1236 return result; 1237 } 1238 1239 } // namespace 1240 1241 // static 1242 string TextFormatDecodeData::DecodeDataForString(const string& input_for_decode, 1243 const string& desired_output) { 1244 if ((input_for_decode.size() == 0) || (desired_output.size() == 0)) { 1245 cerr << "error: got empty string for making TextFormat data, input: \"" 1246 << input_for_decode << "\", desired: \"" << desired_output << "\"." 1247 << endl; 1248 cerr.flush(); 1249 abort(); 1250 } 1251 if ((input_for_decode.find('\0') != string::npos) || 1252 (desired_output.find('\0') != string::npos)) { 1253 cerr << "error: got a null char in a string for making TextFormat data," 1254 << " input: \"" << CEscape(input_for_decode) << "\", desired: \"" 1255 << CEscape(desired_output) << "\"." << endl; 1256 cerr.flush(); 1257 abort(); 1258 } 1259 1260 DecodeDataBuilder builder; 1261 1262 // Walk the output building it from the input. 1263 int x = 0; 1264 for (int y = 0; y < desired_output.size(); y++) { 1265 const char d = desired_output[y]; 1266 if (d == '_') { 1267 builder.AddUnderscore(); 1268 continue; 1269 } 1270 1271 if (x >= input_for_decode.size()) { 1272 // Out of input, no way to encode it, just return a full decode. 1273 return DirectDecodeString(desired_output); 1274 } 1275 if (builder.AddCharacter(d, input_for_decode[x])) { 1276 ++x; // Consumed one input 1277 } else { 1278 // Couldn't transform for the next character, just return a full decode. 1279 return DirectDecodeString(desired_output); 1280 } 1281 } 1282 1283 if (x != input_for_decode.size()) { 1284 // Extra input (suffix from name sanitizing?), just return a full decode. 1285 return DirectDecodeString(desired_output); 1286 } 1287 1288 // Add the end marker. 1289 return builder.Finish() + (char)'\0'; 1290 } 1291 1292 } // namespace objectivec 1293 } // namespace compiler 1294 } // namespace protobuf 1295 } // namespace google 1296