Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright 2014 Google Inc. All rights reserved.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <algorithm>
     18 #include <list>
     19 
     20 #ifdef _WIN32
     21 #if !defined(_USE_MATH_DEFINES)
     22 #define _USE_MATH_DEFINES  // For M_PI.
     23 #endif                     // !defined(_USE_MATH_DEFINES)
     24 #endif                     // _WIN32
     25 
     26 #include <math.h>
     27 
     28 #include "flatbuffers/idl.h"
     29 #include "flatbuffers/util.h"
     30 
     31 namespace flatbuffers {
     32 
     33 const char *const kTypeNames[] = {
     34   #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
     35     IDLTYPE,
     36     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
     37   #undef FLATBUFFERS_TD
     38   nullptr
     39 };
     40 
     41 const char kTypeSizes[] = {
     42   #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
     43       sizeof(CTYPE),
     44     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
     45   #undef FLATBUFFERS_TD
     46 };
     47 
     48 // The enums in the reflection schema should match the ones we use internally.
     49 // Compare the last element to check if these go out of sync.
     50 static_assert(BASE_TYPE_UNION ==
     51               static_cast<BaseType>(reflection::Union),
     52               "enums don't match");
     53 
     54 // Any parsing calls have to be wrapped in this macro, which automates
     55 // handling of recursive error checking a bit. It will check the received
     56 // CheckedError object, and return straight away on error.
     57 #define ECHECK(call) { auto ce = (call); if (ce.Check()) return ce; }
     58 
     59 // These two functions are called hundreds of times below, so define a short
     60 // form:
     61 #define NEXT() ECHECK(Next())
     62 #define EXPECT(tok) ECHECK(Expect(tok))
     63 
     64 static bool ValidateUTF8(const std::string &str) {
     65   const char *s = &str[0];
     66   const char * const sEnd = s + str.length();
     67   while (s < sEnd) {
     68     if (FromUTF8(&s) < 0) {
     69       return false;
     70     }
     71   }
     72   return true;
     73 }
     74 
     75 CheckedError Parser::Error(const std::string &msg) {
     76   error_ = file_being_parsed_.length() ? AbsolutePath(file_being_parsed_) : "";
     77   #ifdef _WIN32
     78     error_ += "(" + NumToString(line_) + ")";  // MSVC alike
     79   #else
     80     if (file_being_parsed_.length()) error_ += ":";
     81     error_ += NumToString(line_) + ":0";  // gcc alike
     82   #endif
     83   error_ += ": error: " + msg;
     84   return CheckedError(true);
     85 }
     86 
     87 inline CheckedError NoError() { return CheckedError(false); }
     88 
     89 // Ensure that integer values we parse fit inside the declared integer type.
     90 CheckedError Parser::CheckBitsFit(int64_t val, size_t bits) {
     91   // Left-shifting a 64-bit value by 64 bits or more is undefined
     92   // behavior (C99 6.5.7), so check *before* we shift.
     93   if (bits < 64) {
     94     // Bits we allow to be used.
     95     auto mask = static_cast<int64_t>((1ull << bits) - 1);
     96     if ((val & ~mask) != 0 &&  // Positive or unsigned.
     97         (val |  mask) != -1)   // Negative.
     98       return Error("constant does not fit in a " + NumToString(bits) +
     99                    "-bit field");
    100   }
    101   return NoError();
    102 }
    103 
    104 // atot: templated version of atoi/atof: convert a string to an instance of T.
    105 template<typename T> inline CheckedError atot(const char *s, Parser &parser,
    106                                               T *val) {
    107   int64_t i = StringToInt(s);
    108   ECHECK(parser.CheckBitsFit(i, sizeof(T) * 8));
    109   *val = (T)i;
    110   return NoError();
    111 }
    112 template<> inline CheckedError atot<uint64_t>(const char *s, Parser &parser,
    113                                               uint64_t *val) {
    114   (void)parser;
    115   *val = StringToUInt(s);
    116   return NoError();
    117 }
    118 template<> inline CheckedError atot<bool>(const char *s, Parser &parser,
    119                                           bool *val) {
    120   (void)parser;
    121   *val = 0 != atoi(s);
    122   return NoError();
    123 }
    124 template<> inline CheckedError atot<float>(const char *s, Parser &parser,
    125                                            float *val) {
    126   (void)parser;
    127   *val = static_cast<float>(strtod(s, nullptr));
    128   return NoError();
    129 }
    130 template<> inline CheckedError atot<double>(const char *s, Parser &parser,
    131                                             double *val) {
    132   (void)parser;
    133   *val = strtod(s, nullptr);
    134   return NoError();
    135 }
    136 
    137 template<> inline CheckedError atot<Offset<void>>(const char *s, Parser &parser,
    138                                                   Offset<void> *val) {
    139   (void)parser;
    140   *val = Offset<void>(atoi(s));
    141   return NoError();
    142 }
    143 
    144 std::string Namespace::GetFullyQualifiedName(const std::string &name,
    145                                              size_t max_components) const {
    146   // Early exit if we don't have a defined namespace.
    147   if (components.size() == 0 || !max_components) {
    148     return name;
    149   }
    150   std::stringstream stream;
    151   for (size_t i = 0; i < std::min(components.size(), max_components);
    152        i++) {
    153     if (i) {
    154       stream << ".";
    155     }
    156     stream << components[i];
    157   }
    158   if (name.length()) stream << "." << name;
    159   return stream.str();
    160 }
    161 
    162 
    163 
    164 // Declare tokens we'll use. Single character tokens are represented by their
    165 // ascii character code (e.g. '{'), others above 256.
    166 #define FLATBUFFERS_GEN_TOKENS(TD) \
    167   TD(Eof, 256, "end of file") \
    168   TD(StringConstant, 257, "string constant") \
    169   TD(IntegerConstant, 258, "integer constant") \
    170   TD(FloatConstant, 259, "float constant") \
    171   TD(Identifier, 260, "identifier") \
    172   TD(Table, 261, "table") \
    173   TD(Struct, 262, "struct") \
    174   TD(Enum, 263, "enum") \
    175   TD(Union, 264, "union") \
    176   TD(NameSpace, 265, "namespace") \
    177   TD(RootType, 266, "root_type") \
    178   TD(FileIdentifier, 267, "file_identifier") \
    179   TD(FileExtension, 268, "file_extension") \
    180   TD(Include, 269, "include") \
    181   TD(Attribute, 270, "attribute") \
    182   TD(Null, 271, "null") \
    183   TD(Service, 272, "rpc_service") \
    184   TD(NativeInclude, 273, "native_include")
    185 #ifdef __GNUC__
    186 __extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
    187 #endif
    188 enum {
    189   #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
    190     FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
    191   #undef FLATBUFFERS_TOKEN
    192   #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
    193       kToken ## ENUM,
    194     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
    195   #undef FLATBUFFERS_TD
    196 };
    197 
    198 static std::string TokenToString(int t) {
    199   static const char *tokens[] = {
    200     #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
    201       FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
    202     #undef FLATBUFFERS_TOKEN
    203     #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
    204       IDLTYPE,
    205       FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
    206     #undef FLATBUFFERS_TD
    207   };
    208   if (t < 256) {  // A single ascii char token.
    209     std::string s;
    210     s.append(1, static_cast<char>(t));
    211     return s;
    212   } else {       // Other tokens.
    213     return tokens[t - 256];
    214   }
    215 }
    216 
    217 std::string Parser::TokenToStringId(int t) {
    218   return TokenToString(t) + (t == kTokenIdentifier ? ": " + attribute_ : "");
    219 }
    220 
    221 // Parses exactly nibbles worth of hex digits into a number, or error.
    222 CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
    223   for (int i = 0; i < nibbles; i++)
    224     if (!isxdigit(static_cast<const unsigned char>(cursor_[i])))
    225       return Error("escape code must be followed by " + NumToString(nibbles) +
    226                    " hex digits");
    227   std::string target(cursor_, cursor_ + nibbles);
    228   *val = StringToUInt(target.c_str(), nullptr, 16);
    229   cursor_ += nibbles;
    230   return NoError();
    231 }
    232 
    233 CheckedError Parser::SkipByteOrderMark() {
    234   if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
    235   cursor_++;
    236   if (static_cast<unsigned char>(*cursor_) != 0xbb) return Error("invalid utf-8 byte order mark");
    237   cursor_++;
    238   if (static_cast<unsigned char>(*cursor_) != 0xbf) return Error("invalid utf-8 byte order mark");
    239   cursor_++;
    240   return NoError();
    241 }
    242 
    243 bool IsIdentifierStart(char c) {
    244   return isalpha(static_cast<unsigned char>(c)) || c == '_';
    245 }
    246 
    247 CheckedError Parser::Next() {
    248   doc_comment_.clear();
    249   bool seen_newline = false;
    250   attribute_.clear();
    251   for (;;) {
    252     char c = *cursor_++;
    253     token_ = c;
    254     switch (c) {
    255       case '\0': cursor_--; token_ = kTokenEof; return NoError();
    256       case ' ': case '\r': case '\t': break;
    257       case '\n': line_++; seen_newline = true; break;
    258       case '{': case '}': case '(': case ')': case '[': case ']':
    259       case ',': case ':': case ';': case '=': return NoError();
    260       case '.':
    261         if(!isdigit(static_cast<const unsigned char>(*cursor_))) return NoError();
    262         return Error("floating point constant can\'t start with \".\"");
    263       case '\"':
    264       case '\'': {
    265         int unicode_high_surrogate = -1;
    266 
    267         while (*cursor_ != c) {
    268           if (*cursor_ < ' ' && *cursor_ >= 0)
    269             return Error("illegal character in string constant");
    270           if (*cursor_ == '\\') {
    271             cursor_++;
    272             if (unicode_high_surrogate != -1 &&
    273                 *cursor_ != 'u') {
    274               return Error(
    275                 "illegal Unicode sequence (unpaired high surrogate)");
    276             }
    277             switch (*cursor_) {
    278               case 'n':  attribute_ += '\n'; cursor_++; break;
    279               case 't':  attribute_ += '\t'; cursor_++; break;
    280               case 'r':  attribute_ += '\r'; cursor_++; break;
    281               case 'b':  attribute_ += '\b'; cursor_++; break;
    282               case 'f':  attribute_ += '\f'; cursor_++; break;
    283               case '\"': attribute_ += '\"'; cursor_++; break;
    284               case '\'': attribute_ += '\''; cursor_++; break;
    285               case '\\': attribute_ += '\\'; cursor_++; break;
    286               case '/':  attribute_ += '/';  cursor_++; break;
    287               case 'x': {  // Not in the JSON standard
    288                 cursor_++;
    289                 uint64_t val;
    290                 ECHECK(ParseHexNum(2, &val));
    291                 attribute_ += static_cast<char>(val);
    292                 break;
    293               }
    294               case 'u': {
    295                 cursor_++;
    296                 uint64_t val;
    297                 ECHECK(ParseHexNum(4, &val));
    298                 if (val >= 0xD800 && val <= 0xDBFF) {
    299                   if (unicode_high_surrogate != -1) {
    300                     return Error(
    301                       "illegal Unicode sequence (multiple high surrogates)");
    302                   } else {
    303                     unicode_high_surrogate = static_cast<int>(val);
    304                   }
    305                 } else if (val >= 0xDC00 && val <= 0xDFFF) {
    306                   if (unicode_high_surrogate == -1) {
    307                     return Error(
    308                       "illegal Unicode sequence (unpaired low surrogate)");
    309                   } else {
    310                     int code_point = 0x10000 +
    311                       ((unicode_high_surrogate & 0x03FF) << 10) +
    312                       (val & 0x03FF);
    313                     ToUTF8(code_point, &attribute_);
    314                     unicode_high_surrogate = -1;
    315                   }
    316                 } else {
    317                   if (unicode_high_surrogate != -1) {
    318                     return Error(
    319                       "illegal Unicode sequence (unpaired high surrogate)");
    320                   }
    321                   ToUTF8(static_cast<int>(val), &attribute_);
    322                 }
    323                 break;
    324               }
    325               default: return Error("unknown escape code in string constant");
    326             }
    327           } else { // printable chars + UTF-8 bytes
    328             if (unicode_high_surrogate != -1) {
    329               return Error(
    330                 "illegal Unicode sequence (unpaired high surrogate)");
    331             }
    332             attribute_ += *cursor_++;
    333           }
    334         }
    335         if (unicode_high_surrogate != -1) {
    336           return Error(
    337             "illegal Unicode sequence (unpaired high surrogate)");
    338         }
    339         cursor_++;
    340         if (!opts.allow_non_utf8 && !ValidateUTF8(attribute_)) {
    341           return Error("illegal UTF-8 sequence");
    342         }
    343         token_ = kTokenStringConstant;
    344         return NoError();
    345       }
    346       case '/':
    347         if (*cursor_ == '/') {
    348           const char *start = ++cursor_;
    349           while (*cursor_ && *cursor_ != '\n' && *cursor_ != '\r') cursor_++;
    350           if (*start == '/') {  // documentation comment
    351             if (cursor_ != source_ && !seen_newline)
    352               return Error(
    353                     "a documentation comment should be on a line on its own");
    354             doc_comment_.push_back(std::string(start + 1, cursor_));
    355           }
    356           break;
    357         } else if (*cursor_ == '*') {
    358           cursor_++;
    359           // TODO: make nested.
    360           while (*cursor_ != '*' || cursor_[1] != '/') {
    361             if (*cursor_ == '\n') line_++;
    362             if (!*cursor_) return Error("end of file in comment");
    363             cursor_++;
    364           }
    365           cursor_ += 2;
    366           break;
    367         }
    368         // fall thru
    369       default:
    370         if (IsIdentifierStart(c)) {
    371           // Collect all chars of an identifier:
    372           const char *start = cursor_ - 1;
    373           while (isalnum(static_cast<unsigned char>(*cursor_)) ||
    374                  *cursor_ == '_')
    375             cursor_++;
    376           attribute_.append(start, cursor_);
    377           // First, see if it is a type keyword from the table of types:
    378           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
    379             PTYPE) \
    380             if (attribute_ == IDLTYPE) { \
    381               token_ = kToken ## ENUM; \
    382               return NoError(); \
    383             }
    384             FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
    385           #undef FLATBUFFERS_TD
    386           // If it's a boolean constant keyword, turn those into integers,
    387           // which simplifies our logic downstream.
    388           if (attribute_ == "true" || attribute_ == "false") {
    389             attribute_ = NumToString(attribute_ == "true");
    390             token_ = kTokenIntegerConstant;
    391             return NoError();
    392           }
    393           // Check for declaration keywords:
    394           if (attribute_ == "table") {
    395             token_ = kTokenTable;
    396             return NoError();
    397           }
    398           if (attribute_ == "struct") {
    399             token_ = kTokenStruct;
    400             return NoError();
    401           }
    402           if (attribute_ == "enum") {
    403             token_ = kTokenEnum;
    404             return NoError();
    405           }
    406           if (attribute_ == "union") {
    407             token_ = kTokenUnion;
    408             return NoError();
    409           }
    410           if (attribute_ == "namespace") {
    411             token_ = kTokenNameSpace;
    412             return NoError();
    413           }
    414           if (attribute_ == "root_type") {
    415             token_ = kTokenRootType;
    416             return NoError();
    417           }
    418           if (attribute_ == "include") {
    419             token_ = kTokenInclude;
    420             return NoError();
    421           }
    422           if (attribute_ == "attribute") {
    423             token_ = kTokenAttribute;
    424             return NoError();
    425           }
    426           if (attribute_ == "file_identifier") {
    427             token_ = kTokenFileIdentifier;
    428             return NoError();
    429           }
    430           if (attribute_ == "file_extension") {
    431             token_ = kTokenFileExtension;
    432             return NoError();
    433           }
    434           if (attribute_ == "null") {
    435             token_ = kTokenNull;
    436             return NoError();
    437           }
    438           if (attribute_ == "rpc_service") {
    439             token_ = kTokenService;
    440             return NoError();
    441           }
    442           if (attribute_ == "native_include") {
    443             token_ = kTokenNativeInclude;
    444             return NoError();
    445           }
    446           // If not, it is a user-defined identifier:
    447           token_ = kTokenIdentifier;
    448           return NoError();
    449         } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
    450           const char *start = cursor_ - 1;
    451           if (c == '-' && *cursor_ == '0' &&
    452               (cursor_[1] == 'x' || cursor_[1] == 'X')) {
    453             ++start;
    454             ++cursor_;
    455             attribute_.append(&c, &c + 1);
    456             c = '0';
    457           }
    458           if (c == '0' && (*cursor_ == 'x' || *cursor_ == 'X')) {
    459               cursor_++;
    460               while (isxdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
    461               attribute_.append(start + 2, cursor_);
    462               attribute_ = NumToString(static_cast<int64_t>(
    463                              StringToUInt(attribute_.c_str(), nullptr, 16)));
    464               token_ = kTokenIntegerConstant;
    465               return NoError();
    466           }
    467           while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
    468           if (*cursor_ == '.' || *cursor_ == 'e' || *cursor_ == 'E') {
    469             if (*cursor_ == '.') {
    470               cursor_++;
    471               while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
    472             }
    473             // See if this float has a scientific notation suffix. Both JSON
    474             // and C++ (through strtod() we use) have the same format:
    475             if (*cursor_ == 'e' || *cursor_ == 'E') {
    476               cursor_++;
    477               if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
    478               while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
    479             }
    480             token_ = kTokenFloatConstant;
    481           } else {
    482             token_ = kTokenIntegerConstant;
    483           }
    484           attribute_.append(start, cursor_);
    485           return NoError();
    486         }
    487         std::string ch;
    488         ch = c;
    489         if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
    490         return Error("illegal character: " + ch);
    491     }
    492   }
    493 }
    494 
    495 // Check if a given token is next.
    496 bool Parser::Is(int t) {
    497   return t == token_;
    498 }
    499 
    500 // Expect a given token to be next, consume it, or error if not present.
    501 CheckedError Parser::Expect(int t) {
    502   if (t != token_) {
    503     return Error("expecting: " + TokenToString(t) + " instead got: " +
    504                  TokenToStringId(token_));
    505   }
    506   NEXT();
    507   return NoError();
    508 }
    509 
    510 CheckedError Parser::ParseNamespacing(std::string *id, std::string *last) {
    511   while (Is('.')) {
    512     NEXT();
    513     *id += ".";
    514     *id += attribute_;
    515     if (last) *last = attribute_;
    516     EXPECT(kTokenIdentifier);
    517   }
    518   return NoError();
    519 }
    520 
    521 EnumDef *Parser::LookupEnum(const std::string &id) {
    522   // Search thru parent namespaces.
    523   for (int components = static_cast<int>(namespaces_.back()->components.size());
    524        components >= 0; components--) {
    525     auto ed = enums_.Lookup(
    526                 namespaces_.back()->GetFullyQualifiedName(id, components));
    527     if (ed) return ed;
    528   }
    529   return nullptr;
    530 }
    531 
    532 CheckedError Parser::ParseTypeIdent(Type &type) {
    533   std::string id = attribute_;
    534   EXPECT(kTokenIdentifier);
    535   ECHECK(ParseNamespacing(&id, nullptr));
    536   auto enum_def = LookupEnum(id);
    537   if (enum_def) {
    538     type = enum_def->underlying_type;
    539     if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
    540   } else {
    541     type.base_type = BASE_TYPE_STRUCT;
    542     type.struct_def = LookupCreateStruct(id);
    543   }
    544   return NoError();
    545 }
    546 
    547 // Parse any IDL type.
    548 CheckedError Parser::ParseType(Type &type) {
    549   if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) {
    550     type.base_type = static_cast<BaseType>(token_ - kTokenNONE);
    551     NEXT();
    552   } else {
    553     if (token_ == kTokenIdentifier) {
    554       ECHECK(ParseTypeIdent(type));
    555     } else if (token_ == '[') {
    556       NEXT();
    557       Type subtype;
    558       ECHECK(ParseType(subtype));
    559       if (subtype.base_type == BASE_TYPE_VECTOR) {
    560         // We could support this, but it will complicate things, and it's
    561         // easier to work around with a struct around the inner vector.
    562         return Error(
    563               "nested vector types not supported (wrap in table first).");
    564       }
    565       type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
    566       type.element = subtype.base_type;
    567       EXPECT(']');
    568     } else {
    569       return Error("illegal type syntax");
    570     }
    571   }
    572   return NoError();
    573 }
    574 
    575 CheckedError Parser::AddField(StructDef &struct_def, const std::string &name,
    576                               const Type &type, FieldDef **dest) {
    577   auto &field = *new FieldDef();
    578   field.value.offset =
    579     FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
    580   field.name = name;
    581   field.file = struct_def.file;
    582   field.value.type = type;
    583   if (struct_def.fixed) {  // statically compute the field offset
    584     auto size = InlineSize(type);
    585     auto alignment = InlineAlignment(type);
    586     // structs_ need to have a predictable format, so we need to align to
    587     // the largest scalar
    588     struct_def.minalign = std::max(struct_def.minalign, alignment);
    589     struct_def.PadLastField(alignment);
    590     field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
    591     struct_def.bytesize += size;
    592   }
    593   if (struct_def.fields.Add(name, &field))
    594     return Error("field already exists: " + name);
    595   *dest = &field;
    596   return NoError();
    597 }
    598 
    599 CheckedError Parser::ParseField(StructDef &struct_def) {
    600   std::string name = attribute_;
    601   std::vector<std::string> dc = doc_comment_;
    602   EXPECT(kTokenIdentifier);
    603   EXPECT(':');
    604   Type type;
    605   ECHECK(ParseType(type));
    606 
    607   if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
    608     return Error("structs_ may contain only scalar or struct fields");
    609 
    610   FieldDef *typefield = nullptr;
    611   if (type.base_type == BASE_TYPE_UNION) {
    612     // For union fields, add a second auto-generated field to hold the type,
    613     // with a special suffix.
    614     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
    615                     type.enum_def->underlying_type, &typefield));
    616   } else if (type.base_type == BASE_TYPE_VECTOR &&
    617              type.element == BASE_TYPE_UNION) {
    618     // Only cpp supports the union vector feature so far.
    619     if (opts.lang_to_generate != IDLOptions::kCpp) {
    620       return Error("Vectors of unions are not yet supported in all "
    621                    "the specified programming languages.");
    622     }
    623     // For vector of union fields, add a second auto-generated vector field to
    624     // hold the types, with a special suffix.
    625     Type union_vector(BASE_TYPE_VECTOR, nullptr, type.enum_def);
    626     union_vector.element = BASE_TYPE_UTYPE;
    627     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
    628                     union_vector, &typefield));
    629   }
    630 
    631   FieldDef *field;
    632   ECHECK(AddField(struct_def, name, type, &field));
    633 
    634   if (token_ == '=') {
    635     NEXT();
    636     if (!IsScalar(type.base_type))
    637       return Error("default values currently only supported for scalars");
    638     ECHECK(ParseSingleValue(field->value));
    639   }
    640   if (IsFloat(field->value.type.base_type)) {
    641     if (!strpbrk(field->value.constant.c_str(), ".eE"))
    642       field->value.constant += ".0";
    643   }
    644 
    645   if (type.enum_def &&
    646       IsScalar(type.base_type) &&
    647       !struct_def.fixed &&
    648       !type.enum_def->attributes.Lookup("bit_flags") &&
    649       !type.enum_def->ReverseLookup(static_cast<int>(
    650                          StringToInt(field->value.constant.c_str()))))
    651     return Error("enum " + type.enum_def->name +
    652           " does not have a declaration for this field\'s default of " +
    653           field->value.constant);
    654 
    655   field->doc_comment = dc;
    656   ECHECK(ParseMetaData(&field->attributes));
    657   field->deprecated = field->attributes.Lookup("deprecated") != nullptr;
    658   auto hash_name = field->attributes.Lookup("hash");
    659   if (hash_name) {
    660     switch (type.base_type) {
    661       case BASE_TYPE_INT:
    662       case BASE_TYPE_UINT: {
    663         if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
    664           return Error("Unknown hashing algorithm for 32 bit types: " +
    665                 hash_name->constant);
    666         break;
    667       }
    668       case BASE_TYPE_LONG:
    669       case BASE_TYPE_ULONG: {
    670         if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
    671           return Error("Unknown hashing algorithm for 64 bit types: " +
    672                 hash_name->constant);
    673         break;
    674       }
    675       default:
    676         return Error(
    677               "only int, uint, long and ulong data types support hashing.");
    678     }
    679   }
    680   auto cpp_type = field->attributes.Lookup("cpp_type");
    681   if (cpp_type) {
    682     if (!hash_name)
    683       return Error("cpp_type can only be used with a hashed field");
    684   }
    685   if (field->deprecated && struct_def.fixed)
    686     return Error("can't deprecate fields in a struct");
    687   field->required = field->attributes.Lookup("required") != nullptr;
    688   if (field->required && (struct_def.fixed ||
    689                          IsScalar(field->value.type.base_type)))
    690     return Error("only non-scalar fields in tables may be 'required'");
    691   field->key = field->attributes.Lookup("key") != nullptr;
    692   if (field->key) {
    693     if (struct_def.has_key)
    694       return Error("only one field may be set as 'key'");
    695     struct_def.has_key = true;
    696     if (!IsScalar(field->value.type.base_type)) {
    697       field->required = true;
    698       if (field->value.type.base_type != BASE_TYPE_STRING)
    699         return Error("'key' field must be string or scalar type");
    700     }
    701   }
    702 
    703   field->native_inline = field->attributes.Lookup("native_inline") != nullptr;
    704   if (field->native_inline && !IsStruct(field->value.type))
    705     return Error("native_inline can only be defined on structs'");
    706 
    707   auto nested = field->attributes.Lookup("nested_flatbuffer");
    708   if (nested) {
    709     if (nested->type.base_type != BASE_TYPE_STRING)
    710       return Error(
    711             "nested_flatbuffer attribute must be a string (the root type)");
    712     if (field->value.type.base_type != BASE_TYPE_VECTOR ||
    713         field->value.type.element != BASE_TYPE_UCHAR)
    714       return Error(
    715             "nested_flatbuffer attribute may only apply to a vector of ubyte");
    716     // This will cause an error if the root type of the nested flatbuffer
    717     // wasn't defined elsewhere.
    718     LookupCreateStruct(nested->constant);
    719   }
    720 
    721   if (typefield) {
    722     // If this field is a union, and it has a manually assigned id,
    723     // the automatically added type field should have an id as well (of N - 1).
    724     auto attr = field->attributes.Lookup("id");
    725     if (attr) {
    726       auto id = atoi(attr->constant.c_str());
    727       auto val = new Value();
    728       val->type = attr->type;
    729       val->constant = NumToString(id - 1);
    730       typefield->attributes.Add("id", val);
    731     }
    732   }
    733 
    734   EXPECT(';');
    735   return NoError();
    736 }
    737 
    738 CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
    739                                    size_t parent_fieldn,
    740                                    const StructDef *parent_struct_def) {
    741   switch (val.type.base_type) {
    742     case BASE_TYPE_UNION: {
    743       assert(field);
    744       std::string constant;
    745       // Find corresponding type field we may have already parsed.
    746       for (auto elem = field_stack_.rbegin();
    747            elem != field_stack_.rbegin() + parent_fieldn; ++elem) {
    748         auto &type = elem->second->value.type;
    749         if (type.base_type == BASE_TYPE_UTYPE &&
    750             type.enum_def == val.type.enum_def) {
    751           constant = elem->first.constant;
    752           break;
    753         }
    754       }
    755       if (constant.empty()) {
    756         // We haven't seen the type field yet. Sadly a lot of JSON writers
    757         // output these in alphabetical order, meaning it comes after this
    758         // value. So we scan past the value to find it, then come back here.
    759         auto type_name = field->name + UnionTypeFieldSuffix();
    760         assert(parent_struct_def);
    761         auto type_field = parent_struct_def->fields.Lookup(type_name);
    762         assert(type_field);  // Guaranteed by ParseField().
    763         // Remember where we are in the source file, so we can come back here.
    764         auto backup = *static_cast<ParserState *>(this);
    765         ECHECK(SkipAnyJsonValue());  // The table.
    766         EXPECT(',');
    767         auto next_name = attribute_;
    768         if (Is(kTokenStringConstant)) {
    769           NEXT();
    770         } else {
    771           EXPECT(kTokenIdentifier);
    772         }
    773         if (next_name != type_name)
    774           return Error("missing type field after this union value: " +
    775                        type_name);
    776         EXPECT(':');
    777         Value type_val = type_field->value;
    778         ECHECK(ParseAnyValue(type_val, type_field, 0, nullptr));
    779         constant = type_val.constant;
    780         // Got the information we needed, now rewind:
    781         *static_cast<ParserState *>(this) = backup;
    782       }
    783       uint8_t enum_idx;
    784       ECHECK(atot(constant.c_str(), *this, &enum_idx));
    785       auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
    786       if (!enum_val) return Error("illegal type id for: " + field->name);
    787       ECHECK(ParseTable(*enum_val->struct_def, &val.constant, nullptr));
    788       break;
    789     }
    790     case BASE_TYPE_STRUCT:
    791       ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
    792       break;
    793     case BASE_TYPE_STRING: {
    794       auto s = attribute_;
    795       EXPECT(kTokenStringConstant);
    796       val.constant = NumToString(builder_.CreateString(s).o);
    797       break;
    798     }
    799     case BASE_TYPE_VECTOR: {
    800       EXPECT('[');
    801       uoffset_t off;
    802       ECHECK(ParseVector(val.type.VectorType(), &off));
    803       val.constant = NumToString(off);
    804       break;
    805     }
    806     case BASE_TYPE_INT:
    807     case BASE_TYPE_UINT:
    808     case BASE_TYPE_LONG:
    809     case BASE_TYPE_ULONG: {
    810       if (field && field->attributes.Lookup("hash") &&
    811           (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
    812         ECHECK(ParseHash(val, field));
    813       } else {
    814         ECHECK(ParseSingleValue(val));
    815       }
    816       break;
    817     }
    818     default:
    819       ECHECK(ParseSingleValue(val));
    820       break;
    821   }
    822   return NoError();
    823 }
    824 
    825 void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
    826   assert(val.constant.length() == struct_def.bytesize);
    827   builder_.Align(struct_def.minalign);
    828   builder_.PushBytes(reinterpret_cast<const uint8_t *>(val.constant.c_str()),
    829                      struct_def.bytesize);
    830   builder_.AddStructOffset(val.offset, builder_.GetSize());
    831 }
    832 
    833 CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
    834                                 uoffset_t *ovalue) {
    835   EXPECT('{');
    836   size_t fieldn = 0;
    837   for (;;) {
    838     if ((!opts.strict_json || !fieldn) && Is('}')) { NEXT(); break; }
    839     std::string name = attribute_;
    840     if (Is(kTokenStringConstant)) {
    841       NEXT();
    842     } else {
    843       EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
    844     }
    845     auto field = struct_def.fields.Lookup(name);
    846     if (!field) {
    847       if (!opts.skip_unexpected_fields_in_json) {
    848         return Error("unknown field: " + name);
    849       } else {
    850         EXPECT(':');
    851         ECHECK(SkipAnyJsonValue());
    852       }
    853     } else {
    854       EXPECT(':');
    855       if (Is(kTokenNull)) {
    856         NEXT(); // Ignore this field.
    857       } else {
    858         Value val = field->value;
    859         ECHECK(ParseAnyValue(val, field, fieldn, &struct_def));
    860         // Hardcoded insertion-sort with error-check.
    861         // If fields are specified in order, then this loop exits immediately.
    862         auto elem = field_stack_.rbegin();
    863         for (; elem != field_stack_.rbegin() + fieldn; ++elem) {
    864           auto existing_field = elem->second;
    865           if (existing_field == field)
    866             return Error("field set more than once: " + field->name);
    867           if (existing_field->value.offset < field->value.offset) break;
    868         }
    869         // Note: elem points to before the insertion point, thus .base() points
    870         // to the correct spot.
    871         field_stack_.insert(elem.base(), std::make_pair(val, field));
    872         fieldn++;
    873       }
    874     }
    875     if (Is('}')) { NEXT(); break; }
    876     EXPECT(',');
    877   }
    878 
    879   if (struct_def.fixed && fieldn != struct_def.fields.vec.size())
    880     return Error("struct: wrong number of initializers: " + struct_def.name);
    881 
    882   auto start = struct_def.fixed
    883                  ? builder_.StartStruct(struct_def.minalign)
    884                  : builder_.StartTable();
    885 
    886   for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1;
    887        size;
    888        size /= 2) {
    889     // Go through elements in reverse, since we're building the data backwards.
    890     for (auto it = field_stack_.rbegin();
    891              it != field_stack_.rbegin() + fieldn; ++it) {
    892       auto &field_value = it->first;
    893       auto field = it->second;
    894       if (!struct_def.sortbysize ||
    895           size == SizeOf(field_value.type.base_type)) {
    896         switch (field_value.type.base_type) {
    897           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
    898             PTYPE) \
    899             case BASE_TYPE_ ## ENUM: \
    900               builder_.Pad(field->padding); \
    901               if (struct_def.fixed) { \
    902                 CTYPE val; \
    903                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
    904                 builder_.PushElement(val); \
    905               } else { \
    906                 CTYPE val, valdef; \
    907                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
    908                 ECHECK(atot(field->value.constant.c_str(), *this, &valdef)); \
    909                 builder_.AddElement(field_value.offset, val, valdef); \
    910               } \
    911               break;
    912             FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
    913           #undef FLATBUFFERS_TD
    914           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
    915             PTYPE) \
    916             case BASE_TYPE_ ## ENUM: \
    917               builder_.Pad(field->padding); \
    918               if (IsStruct(field->value.type)) { \
    919                 SerializeStruct(*field->value.type.struct_def, field_value); \
    920               } else { \
    921                 CTYPE val; \
    922                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
    923                 builder_.AddOffset(field_value.offset, val); \
    924               } \
    925               break;
    926             FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
    927           #undef FLATBUFFERS_TD
    928         }
    929       }
    930     }
    931   }
    932   for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back();
    933 
    934   if (struct_def.fixed) {
    935     builder_.ClearOffsets();
    936     builder_.EndStruct();
    937     assert(value);
    938     // Temporarily store this struct in the value string, since it is to
    939     // be serialized in-place elsewhere.
    940     value->assign(
    941           reinterpret_cast<const char *>(builder_.GetCurrentBufferPointer()),
    942           struct_def.bytesize);
    943     builder_.PopBytes(struct_def.bytesize);
    944     assert(!ovalue);
    945   } else {
    946     auto val = builder_.EndTable(start,
    947                           static_cast<voffset_t>(struct_def.fields.vec.size()));
    948     if (ovalue) *ovalue = val;
    949     if (value) *value = NumToString(val);
    950   }
    951   return NoError();
    952 }
    953 
    954 CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue) {
    955   int count = 0;
    956   for (;;) {
    957     if ((!opts.strict_json || !count) && Is(']')) { NEXT(); break; }
    958     Value val;
    959     val.type = type;
    960     ECHECK(ParseAnyValue(val, nullptr, 0, nullptr));
    961     field_stack_.push_back(std::make_pair(val, nullptr));
    962     count++;
    963     if (Is(']')) { NEXT(); break; }
    964     EXPECT(',');
    965   }
    966 
    967   builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
    968                        InlineAlignment(type));
    969   for (int i = 0; i < count; i++) {
    970     // start at the back, since we're building the data backwards.
    971     auto &val = field_stack_.back().first;
    972     switch (val.type.base_type) {
    973       #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
    974         case BASE_TYPE_ ## ENUM: \
    975           if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
    976           else { \
    977              CTYPE elem; \
    978              ECHECK(atot(val.constant.c_str(), *this, &elem)); \
    979              builder_.PushElement(elem); \
    980           } \
    981           break;
    982         FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
    983       #undef FLATBUFFERS_TD
    984     }
    985     field_stack_.pop_back();
    986   }
    987 
    988   builder_.ClearOffsets();
    989   *ovalue = builder_.EndVector(count);
    990   return NoError();
    991 }
    992 
    993 CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
    994   if (Is('(')) {
    995     NEXT();
    996     for (;;) {
    997       auto name = attribute_;
    998       EXPECT(kTokenIdentifier);
    999       if (known_attributes_.find(name) == known_attributes_.end())
   1000         return Error("user define attributes must be declared before use: " +
   1001                      name);
   1002       auto e = new Value();
   1003       attributes->Add(name, e);
   1004       if (Is(':')) {
   1005         NEXT();
   1006         ECHECK(ParseSingleValue(*e));
   1007       }
   1008       if (Is(')')) { NEXT(); break; }
   1009       EXPECT(',');
   1010     }
   1011   }
   1012   return NoError();
   1013 }
   1014 
   1015 CheckedError Parser::TryTypedValue(int dtoken, bool check, Value &e,
   1016                                    BaseType req, bool *destmatch) {
   1017   bool match = dtoken == token_;
   1018   if (match) {
   1019     *destmatch = true;
   1020     e.constant = attribute_;
   1021     if (!check) {
   1022       if (e.type.base_type == BASE_TYPE_NONE) {
   1023         e.type.base_type = req;
   1024       } else {
   1025         return Error(std::string("type mismatch: expecting: ") +
   1026                      kTypeNames[e.type.base_type] +
   1027                      ", found: " +
   1028                      kTypeNames[req]);
   1029       }
   1030     }
   1031     NEXT();
   1032   }
   1033   return NoError();
   1034 }
   1035 
   1036 CheckedError Parser::ParseEnumFromString(Type &type, int64_t *result) {
   1037   *result = 0;
   1038   // Parse one or more enum identifiers, separated by spaces.
   1039   const char *next = attribute_.c_str();
   1040   do {
   1041     const char *divider = strchr(next, ' ');
   1042     std::string word;
   1043     if (divider) {
   1044       word = std::string(next, divider);
   1045       next = divider + strspn(divider, " ");
   1046     } else {
   1047       word = next;
   1048       next += word.length();
   1049     }
   1050     if (type.enum_def) {  // The field has an enum type
   1051       auto enum_val = type.enum_def->vals.Lookup(word);
   1052       if (!enum_val)
   1053         return Error("unknown enum value: " + word +
   1054               ", for enum: " + type.enum_def->name);
   1055       *result |= enum_val->value;
   1056     } else {  // No enum type, probably integral field.
   1057       if (!IsInteger(type.base_type))
   1058         return Error("not a valid value for this field: " + word);
   1059       // TODO: could check if its a valid number constant here.
   1060       const char *dot = strrchr(word.c_str(), '.');
   1061       if (!dot)
   1062         return Error("enum values need to be qualified by an enum type");
   1063       std::string enum_def_str(word.c_str(), dot);
   1064       std::string enum_val_str(dot + 1, word.c_str() + word.length());
   1065       auto enum_def = LookupEnum(enum_def_str);
   1066       if (!enum_def) return Error("unknown enum: " + enum_def_str);
   1067       auto enum_val = enum_def->vals.Lookup(enum_val_str);
   1068       if (!enum_val) return Error("unknown enum value: " + enum_val_str);
   1069       *result |= enum_val->value;
   1070     }
   1071   } while(*next);
   1072   return NoError();
   1073 }
   1074 
   1075 
   1076 CheckedError Parser::ParseHash(Value &e, FieldDef* field) {
   1077   assert(field);
   1078   Value *hash_name = field->attributes.Lookup("hash");
   1079   switch (e.type.base_type) {
   1080     case BASE_TYPE_INT:
   1081     case BASE_TYPE_UINT: {
   1082       auto hash = FindHashFunction32(hash_name->constant.c_str());
   1083       uint32_t hashed_value = hash(attribute_.c_str());
   1084       e.constant = NumToString(hashed_value);
   1085       break;
   1086     }
   1087     case BASE_TYPE_LONG:
   1088     case BASE_TYPE_ULONG: {
   1089       auto hash = FindHashFunction64(hash_name->constant.c_str());
   1090       uint64_t hashed_value = hash(attribute_.c_str());
   1091       e.constant = NumToString(hashed_value);
   1092       break;
   1093     }
   1094     default:
   1095       assert(0);
   1096   }
   1097   NEXT();
   1098   return NoError();
   1099 }
   1100 
   1101 CheckedError Parser::ParseSingleValue(Value &e) {
   1102   // First see if this could be a conversion function:
   1103   if (token_ == kTokenIdentifier && *cursor_ == '(') {
   1104     auto functionname = attribute_;
   1105     NEXT();
   1106     EXPECT('(');
   1107     ECHECK(ParseSingleValue(e));
   1108     EXPECT(')');
   1109     #define FLATBUFFERS_FN_DOUBLE(name, op) \
   1110       if (functionname == name) { \
   1111         auto x = strtod(e.constant.c_str(), nullptr); \
   1112         e.constant = NumToString(op); \
   1113       }
   1114     FLATBUFFERS_FN_DOUBLE("deg", x / M_PI * 180);
   1115     FLATBUFFERS_FN_DOUBLE("rad", x * M_PI / 180);
   1116     FLATBUFFERS_FN_DOUBLE("sin", sin(x));
   1117     FLATBUFFERS_FN_DOUBLE("cos", cos(x));
   1118     FLATBUFFERS_FN_DOUBLE("tan", tan(x));
   1119     FLATBUFFERS_FN_DOUBLE("asin", asin(x));
   1120     FLATBUFFERS_FN_DOUBLE("acos", acos(x));
   1121     FLATBUFFERS_FN_DOUBLE("atan", atan(x));
   1122     // TODO(wvo): add more useful conversion functions here.
   1123     #undef FLATBUFFERS_FN_DOUBLE
   1124   // Then check if this could be a string/identifier enum value:
   1125   } else if (e.type.base_type != BASE_TYPE_STRING &&
   1126       e.type.base_type != BASE_TYPE_NONE &&
   1127       (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
   1128     if (IsIdentifierStart(attribute_[0])) {  // Enum value.
   1129       int64_t val;
   1130       ECHECK(ParseEnumFromString(e.type, &val));
   1131       e.constant = NumToString(val);
   1132       NEXT();
   1133     } else {  // Numeric constant in string.
   1134       if (IsInteger(e.type.base_type)) {
   1135         char *end;
   1136         e.constant = NumToString(StringToInt(attribute_.c_str(), &end));
   1137         if (*end)
   1138           return Error("invalid integer: " + attribute_);
   1139       } else if (IsFloat(e.type.base_type)) {
   1140         char *end;
   1141         e.constant = NumToString(strtod(attribute_.c_str(), &end));
   1142         if (*end)
   1143           return Error("invalid float: " + attribute_);
   1144       } else {
   1145         assert(0);  // Shouldn't happen, we covered all types.
   1146         e.constant = "0";
   1147       }
   1148       NEXT();
   1149     }
   1150   } else {
   1151     bool match = false;
   1152     ECHECK(TryTypedValue(kTokenIntegerConstant,
   1153                          IsScalar(e.type.base_type),
   1154                          e,
   1155                          BASE_TYPE_INT,
   1156                          &match));
   1157     ECHECK(TryTypedValue(kTokenFloatConstant,
   1158                          IsFloat(e.type.base_type),
   1159                          e,
   1160                          BASE_TYPE_FLOAT,
   1161                          &match));
   1162     ECHECK(TryTypedValue(kTokenStringConstant,
   1163                          e.type.base_type == BASE_TYPE_STRING,
   1164                          e,
   1165                          BASE_TYPE_STRING,
   1166                          &match));
   1167     if (!match)
   1168       return Error("cannot parse value starting with: " +
   1169                    TokenToStringId(token_));
   1170   }
   1171   return NoError();
   1172 }
   1173 
   1174 StructDef *Parser::LookupCreateStruct(const std::string &name,
   1175                                       bool create_if_new, bool definition) {
   1176   std::string qualified_name = namespaces_.back()->GetFullyQualifiedName(name);
   1177   // See if it exists pre-declared by an unqualified use.
   1178   auto struct_def = structs_.Lookup(name);
   1179   if (struct_def && struct_def->predecl) {
   1180     if (definition) {
   1181       // Make sure it has the current namespace, and is registered under its
   1182       // qualified name.
   1183       struct_def->defined_namespace = namespaces_.back();
   1184       structs_.Move(name, qualified_name);
   1185     }
   1186     return struct_def;
   1187   }
   1188   // See if it exists pre-declared by an qualified use.
   1189   struct_def = structs_.Lookup(qualified_name);
   1190   if (struct_def && struct_def->predecl) {
   1191     if (definition) {
   1192       // Make sure it has the current namespace.
   1193       struct_def->defined_namespace = namespaces_.back();
   1194     }
   1195     return struct_def;
   1196   }
   1197   if (!definition) {
   1198     // Search thru parent namespaces.
   1199     for (size_t components = namespaces_.back()->components.size();
   1200          components && !struct_def; components--) {
   1201       struct_def = structs_.Lookup(
   1202           namespaces_.back()->GetFullyQualifiedName(name, components - 1));
   1203     }
   1204   }
   1205   if (!struct_def && create_if_new) {
   1206     struct_def = new StructDef();
   1207     if (definition) {
   1208       structs_.Add(qualified_name, struct_def);
   1209       struct_def->name = name;
   1210       struct_def->defined_namespace = namespaces_.back();
   1211     } else {
   1212       // Not a definition.
   1213       // Rather than failing, we create a "pre declared" StructDef, due to
   1214       // circular references, and check for errors at the end of parsing.
   1215       // It is defined in the root namespace, since we don't know what the
   1216       // final namespace will be.
   1217       // TODO: maybe safer to use special namespace?
   1218       structs_.Add(name, struct_def);
   1219       struct_def->name = name;
   1220       struct_def->defined_namespace = new Namespace();
   1221       namespaces_.insert(namespaces_.begin(), struct_def->defined_namespace);
   1222     }
   1223   }
   1224   return struct_def;
   1225 }
   1226 
   1227 CheckedError Parser::ParseEnum(bool is_union, EnumDef **dest) {
   1228   std::vector<std::string> enum_comment = doc_comment_;
   1229   NEXT();
   1230   std::string enum_name = attribute_;
   1231   EXPECT(kTokenIdentifier);
   1232   auto &enum_def = *new EnumDef();
   1233   enum_def.name = enum_name;
   1234   enum_def.file = file_being_parsed_;
   1235   enum_def.doc_comment = enum_comment;
   1236   enum_def.is_union = is_union;
   1237   enum_def.defined_namespace = namespaces_.back();
   1238   if (enums_.Add(namespaces_.back()->GetFullyQualifiedName(enum_name),
   1239                  &enum_def))
   1240     return Error("enum already exists: " + enum_name);
   1241   if (is_union) {
   1242     enum_def.underlying_type.base_type = BASE_TYPE_UTYPE;
   1243     enum_def.underlying_type.enum_def = &enum_def;
   1244   } else {
   1245     if (opts.proto_mode) {
   1246       enum_def.underlying_type.base_type = BASE_TYPE_INT;
   1247     } else {
   1248       // Give specialized error message, since this type spec used to
   1249       // be optional in the first FlatBuffers release.
   1250       if (!Is(':')) {
   1251         return Error("must specify the underlying integer type for this"
   1252               " enum (e.g. \': short\', which was the default).");
   1253       } else {
   1254         NEXT();
   1255       }
   1256       // Specify the integer type underlying this enum.
   1257       ECHECK(ParseType(enum_def.underlying_type));
   1258       if (!IsInteger(enum_def.underlying_type.base_type))
   1259         return Error("underlying enum type must be integral");
   1260     }
   1261     // Make this type refer back to the enum it was derived from.
   1262     enum_def.underlying_type.enum_def = &enum_def;
   1263   }
   1264   ECHECK(ParseMetaData(&enum_def.attributes));
   1265   EXPECT('{');
   1266   if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0));
   1267   for (;;) {
   1268     if (opts.proto_mode && attribute_ == "option") {
   1269       ECHECK(ParseProtoOption());
   1270     } else {
   1271       auto value_name = attribute_;
   1272       auto full_name = value_name;
   1273       std::vector<std::string> value_comment = doc_comment_;
   1274       EXPECT(kTokenIdentifier);
   1275       if (is_union) {
   1276         ECHECK(ParseNamespacing(&full_name, &value_name));
   1277         if (opts.union_value_namespacing) {
   1278           // Since we can't namespace the actual enum identifiers, turn
   1279           // namespace parts into part of the identifier.
   1280           value_name = full_name;
   1281           std::replace(value_name.begin(), value_name.end(), '.', '_');
   1282         }
   1283       }
   1284       auto prevsize = enum_def.vals.vec.size();
   1285       auto value = enum_def.vals.vec.size()
   1286         ? enum_def.vals.vec.back()->value + 1
   1287         : 0;
   1288       auto &ev = *new EnumVal(value_name, value);
   1289       if (enum_def.vals.Add(value_name, &ev))
   1290         return Error("enum value already exists: " + value_name);
   1291       ev.doc_comment = value_comment;
   1292       if (is_union) {
   1293         ev.struct_def = LookupCreateStruct(full_name);
   1294       }
   1295       if (Is('=')) {
   1296         NEXT();
   1297         ev.value = StringToInt(attribute_.c_str());
   1298         EXPECT(kTokenIntegerConstant);
   1299         if (!opts.proto_mode && prevsize &&
   1300             enum_def.vals.vec[prevsize - 1]->value >= ev.value)
   1301           return Error("enum values must be specified in ascending order");
   1302       }
   1303       if (opts.proto_mode && Is('[')) {
   1304         NEXT();
   1305         // ignore attributes on enums.
   1306         while (token_ != ']') NEXT();
   1307         NEXT();
   1308       }
   1309     }
   1310     if (!Is(opts.proto_mode ? ';' : ',')) break;
   1311     NEXT();
   1312     if (Is('}')) break;
   1313   }
   1314   EXPECT('}');
   1315   if (enum_def.attributes.Lookup("bit_flags")) {
   1316     for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end();
   1317          ++it) {
   1318       if (static_cast<size_t>((*it)->value) >=
   1319            SizeOf(enum_def.underlying_type.base_type) * 8)
   1320         return Error("bit flag out of range of underlying integral type");
   1321       (*it)->value = 1LL << (*it)->value;
   1322     }
   1323   }
   1324   if (dest) *dest = &enum_def;
   1325   types_.Add(namespaces_.back()->GetFullyQualifiedName(enum_def.name),
   1326              new Type(BASE_TYPE_UNION, nullptr, &enum_def));
   1327   return NoError();
   1328 }
   1329 
   1330 CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
   1331   auto &struct_def = *LookupCreateStruct(name, true, true);
   1332   if (!struct_def.predecl) return Error("datatype already exists: " + name);
   1333   struct_def.predecl = false;
   1334   struct_def.name = name;
   1335   struct_def.file = file_being_parsed_;
   1336   // Move this struct to the back of the vector just in case it was predeclared,
   1337   // to preserve declaration order.
   1338   *remove(structs_.vec.begin(), structs_.vec.end(), &struct_def) = &struct_def;
   1339   *dest = &struct_def;
   1340   return NoError();
   1341 }
   1342 
   1343 CheckedError Parser::CheckClash(std::vector<FieldDef*> &fields,
   1344                                 StructDef *struct_def,
   1345                                 const char *suffix,
   1346                                 BaseType basetype) {
   1347   auto len = strlen(suffix);
   1348   for (auto it = fields.begin(); it != fields.end(); ++it) {
   1349     auto &fname = (*it)->name;
   1350     if (fname.length() > len &&
   1351         fname.compare(fname.length() - len, len, suffix) == 0 &&
   1352         (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
   1353       auto field = struct_def->fields.Lookup(
   1354                                              fname.substr(0, fname.length() - len));
   1355       if (field && field->value.type.base_type == basetype)
   1356         return Error("Field " + fname +
   1357                      " would clash with generated functions for field " +
   1358                      field->name);
   1359     }
   1360   }
   1361   return NoError();
   1362 }
   1363 
   1364 static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
   1365   auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
   1366   auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
   1367   return a_id < b_id;
   1368 }
   1369 
   1370 CheckedError Parser::ParseDecl() {
   1371   std::vector<std::string> dc = doc_comment_;
   1372   bool fixed = Is(kTokenStruct);
   1373   if (fixed) NEXT() else EXPECT(kTokenTable);
   1374   std::string name = attribute_;
   1375   EXPECT(kTokenIdentifier);
   1376   StructDef *struct_def;
   1377   ECHECK(StartStruct(name, &struct_def));
   1378   struct_def->doc_comment = dc;
   1379   struct_def->fixed = fixed;
   1380   ECHECK(ParseMetaData(&struct_def->attributes));
   1381   struct_def->sortbysize =
   1382     struct_def->attributes.Lookup("original_order") == nullptr && !fixed;
   1383   EXPECT('{');
   1384   while (token_ != '}') ECHECK(ParseField(*struct_def));
   1385   auto force_align = struct_def->attributes.Lookup("force_align");
   1386   if (fixed && force_align) {
   1387     auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
   1388     if (force_align->type.base_type != BASE_TYPE_INT ||
   1389         align < struct_def->minalign ||
   1390         align > FLATBUFFERS_MAX_ALIGNMENT ||
   1391         align & (align - 1))
   1392       return Error("force_align must be a power of two integer ranging from the"
   1393                    "struct\'s natural alignment to " +
   1394                    NumToString(FLATBUFFERS_MAX_ALIGNMENT));
   1395     struct_def->minalign = align;
   1396   }
   1397   struct_def->PadLastField(struct_def->minalign);
   1398   // Check if this is a table that has manual id assignments
   1399   auto &fields = struct_def->fields.vec;
   1400   if (!struct_def->fixed && fields.size()) {
   1401     size_t num_id_fields = 0;
   1402     for (auto it = fields.begin(); it != fields.end(); ++it) {
   1403       if ((*it)->attributes.Lookup("id")) num_id_fields++;
   1404     }
   1405     // If any fields have ids..
   1406     if (num_id_fields) {
   1407       // Then all fields must have them.
   1408       if (num_id_fields != fields.size())
   1409         return Error(
   1410               "either all fields or no fields must have an 'id' attribute");
   1411       // Simply sort by id, then the fields are the same as if no ids had
   1412       // been specified.
   1413       std::sort(fields.begin(), fields.end(), compareFieldDefs);
   1414       // Verify we have a contiguous set, and reassign vtable offsets.
   1415       for (int i = 0; i < static_cast<int>(fields.size()); i++) {
   1416         if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
   1417           return Error("field id\'s must be consecutive from 0, id " +
   1418                 NumToString(i) + " missing or set twice");
   1419         fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
   1420       }
   1421     }
   1422   }
   1423 
   1424   ECHECK(CheckClash(fields, struct_def, UnionTypeFieldSuffix(),
   1425                     BASE_TYPE_UNION));
   1426   ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
   1427   ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
   1428   ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
   1429   ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
   1430   ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
   1431   EXPECT('}');
   1432   types_.Add(namespaces_.back()->GetFullyQualifiedName(struct_def->name),
   1433              new Type(BASE_TYPE_STRUCT, struct_def, nullptr));
   1434   return NoError();
   1435 }
   1436 
   1437 CheckedError Parser::ParseService() {
   1438   std::vector<std::string> service_comment = doc_comment_;
   1439   NEXT();
   1440   auto service_name = attribute_;
   1441   EXPECT(kTokenIdentifier);
   1442   auto &service_def = *new ServiceDef();
   1443   service_def.name = service_name;
   1444   service_def.file = file_being_parsed_;
   1445   service_def.doc_comment = service_comment;
   1446   service_def.defined_namespace = namespaces_.back();
   1447   if (services_.Add(namespaces_.back()->GetFullyQualifiedName(service_name),
   1448                     &service_def))
   1449     return Error("service already exists: " + service_name);
   1450   ECHECK(ParseMetaData(&service_def.attributes));
   1451   EXPECT('{');
   1452   do {
   1453     auto rpc_name = attribute_;
   1454     EXPECT(kTokenIdentifier);
   1455     EXPECT('(');
   1456     Type reqtype, resptype;
   1457     ECHECK(ParseTypeIdent(reqtype));
   1458     EXPECT(')');
   1459     EXPECT(':');
   1460     ECHECK(ParseTypeIdent(resptype));
   1461     if (reqtype.base_type != BASE_TYPE_STRUCT || reqtype.struct_def->fixed ||
   1462         resptype.base_type != BASE_TYPE_STRUCT || resptype.struct_def->fixed)
   1463         return Error("rpc request and response types must be tables");
   1464     auto &rpc = *new RPCCall();
   1465     rpc.name = rpc_name;
   1466     rpc.request = reqtype.struct_def;
   1467     rpc.response = resptype.struct_def;
   1468     if (service_def.calls.Add(rpc_name, &rpc))
   1469       return Error("rpc already exists: " + rpc_name);
   1470     ECHECK(ParseMetaData(&rpc.attributes));
   1471     EXPECT(';');
   1472   } while (token_ != '}');
   1473   NEXT();
   1474   return NoError();
   1475 }
   1476 
   1477 bool Parser::SetRootType(const char *name) {
   1478   root_struct_def_ = structs_.Lookup(name);
   1479   if (!root_struct_def_)
   1480     root_struct_def_ = structs_.Lookup(
   1481                          namespaces_.back()->GetFullyQualifiedName(name));
   1482   return root_struct_def_ != nullptr;
   1483 }
   1484 
   1485 void Parser::MarkGenerated() {
   1486   // This function marks all existing definitions as having already
   1487   // been generated, which signals no code for included files should be
   1488   // generated.
   1489   for (auto it = enums_.vec.begin();
   1490            it != enums_.vec.end(); ++it) {
   1491     (*it)->generated = true;
   1492   }
   1493   for (auto it = structs_.vec.begin();
   1494            it != structs_.vec.end(); ++it) {
   1495     (*it)->generated = true;
   1496   }
   1497   for (auto it = services_.vec.begin();
   1498            it != services_.vec.end(); ++it) {
   1499     (*it)->generated = true;
   1500   }
   1501 }
   1502 
   1503 CheckedError Parser::ParseNamespace() {
   1504   NEXT();
   1505   auto ns = new Namespace();
   1506   namespaces_.push_back(ns);
   1507   if (token_ != ';') {
   1508     for (;;) {
   1509       ns->components.push_back(attribute_);
   1510       EXPECT(kTokenIdentifier);
   1511       if (Is('.')) NEXT() else break;
   1512     }
   1513   }
   1514   EXPECT(';');
   1515   return NoError();
   1516 }
   1517 
   1518 static bool compareEnumVals(const EnumVal *a, const EnumVal* b) {
   1519   return a->value < b->value;
   1520 }
   1521 
   1522 // Best effort parsing of .proto declarations, with the aim to turn them
   1523 // in the closest corresponding FlatBuffer equivalent.
   1524 // We parse everything as identifiers instead of keywords, since we don't
   1525 // want protobuf keywords to become invalid identifiers in FlatBuffers.
   1526 CheckedError Parser::ParseProtoDecl() {
   1527   bool isextend = attribute_ == "extend";
   1528   if (attribute_ == "package") {
   1529     // These are identical in syntax to FlatBuffer's namespace decl.
   1530     ECHECK(ParseNamespace());
   1531   } else if (attribute_ == "message" || isextend) {
   1532     std::vector<std::string> struct_comment = doc_comment_;
   1533     NEXT();
   1534     StructDef *struct_def = nullptr;
   1535     if (isextend) {
   1536       if (Is('.')) NEXT();  // qualified names may start with a . ?
   1537       auto id = attribute_;
   1538       EXPECT(kTokenIdentifier);
   1539       ECHECK(ParseNamespacing(&id, nullptr));
   1540       struct_def = LookupCreateStruct(id, false);
   1541       if (!struct_def)
   1542         return Error("cannot extend unknown message type: " + id);
   1543     } else {
   1544       std::string name = attribute_;
   1545       EXPECT(kTokenIdentifier);
   1546       ECHECK(StartStruct(name, &struct_def));
   1547       // Since message definitions can be nested, we create a new namespace.
   1548       auto ns = new Namespace();
   1549       // Copy of current namespace.
   1550       *ns = *namespaces_.back();
   1551       // But with current message name.
   1552       ns->components.push_back(name);
   1553       namespaces_.push_back(ns);
   1554     }
   1555     struct_def->doc_comment = struct_comment;
   1556     ECHECK(ParseProtoFields(struct_def, isextend, false));
   1557     if (!isextend) {
   1558       // We have to remove the nested namespace, but we can't just throw it
   1559       // away, so put it at the beginning of the vector.
   1560       auto ns = namespaces_.back();
   1561       namespaces_.pop_back();
   1562       namespaces_.insert(namespaces_.begin(), ns);
   1563     }
   1564     if (Is(';')) NEXT();
   1565   } else if (attribute_ == "enum") {
   1566     // These are almost the same, just with different terminator:
   1567     EnumDef *enum_def;
   1568     ECHECK(ParseEnum(false, &enum_def));
   1569     if (Is(';')) NEXT();
   1570     // Protobuf allows them to be specified in any order, so sort afterwards.
   1571     auto &v = enum_def->vals.vec;
   1572     std::sort(v.begin(), v.end(), compareEnumVals);
   1573 
   1574     // Temp: remove any duplicates, as .fbs files can't handle them.
   1575     for (auto it = v.begin(); it != v.end(); ) {
   1576       if (it != v.begin() && it[0]->value == it[-1]->value) it = v.erase(it);
   1577       else ++it;
   1578     }
   1579   } else if (attribute_ == "syntax") {  // Skip these.
   1580     NEXT();
   1581     EXPECT('=');
   1582     EXPECT(kTokenStringConstant);
   1583     EXPECT(';');
   1584   } else if (attribute_ == "option") {  // Skip these.
   1585     ECHECK(ParseProtoOption());
   1586     EXPECT(';');
   1587   } else if (attribute_ == "service") {  // Skip these.
   1588     NEXT();
   1589     EXPECT(kTokenIdentifier);
   1590     ECHECK(ParseProtoCurliesOrIdent());
   1591   } else {
   1592     return Error("don\'t know how to parse .proto declaration starting with " +
   1593           TokenToStringId(token_));
   1594   }
   1595   return NoError();
   1596 }
   1597 
   1598 CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
   1599                                       bool inside_oneof) {
   1600   EXPECT('{');
   1601   while (token_ != '}') {
   1602     if (attribute_ == "message" || attribute_ == "extend" ||
   1603         attribute_ == "enum") {
   1604       // Nested declarations.
   1605       ECHECK(ParseProtoDecl());
   1606     } else if (attribute_ == "extensions") {  // Skip these.
   1607       NEXT();
   1608       EXPECT(kTokenIntegerConstant);
   1609       if (Is(kTokenIdentifier)) {
   1610         NEXT();  // to
   1611         NEXT();  // num
   1612       }
   1613       EXPECT(';');
   1614     } else if (attribute_ == "option") {  // Skip these.
   1615       ECHECK(ParseProtoOption());
   1616       EXPECT(';');
   1617     } else if (attribute_ == "reserved") {  // Skip these.
   1618       NEXT();
   1619       EXPECT(kTokenIntegerConstant);
   1620       while (Is(',')) { NEXT(); EXPECT(kTokenIntegerConstant); }
   1621       EXPECT(';');
   1622     } else {
   1623       std::vector<std::string> field_comment = doc_comment_;
   1624       // Parse the qualifier.
   1625       bool required = false;
   1626       bool repeated = false;
   1627       bool oneof = false;
   1628       if (!inside_oneof) {
   1629         if (attribute_ == "optional") {
   1630           // This is the default.
   1631           EXPECT(kTokenIdentifier);
   1632         } else if (attribute_ == "required") {
   1633           required = true;
   1634           EXPECT(kTokenIdentifier);
   1635         } else if (attribute_ == "repeated") {
   1636           repeated = true;
   1637           EXPECT(kTokenIdentifier);
   1638         } else if (attribute_ == "oneof") {
   1639           oneof = true;
   1640           EXPECT(kTokenIdentifier);
   1641         } else {
   1642           // can't error, proto3 allows decls without any of the above.
   1643         }
   1644       }
   1645       StructDef *anonymous_struct = nullptr;
   1646       Type type;
   1647       if (attribute_ == "group" || oneof) {
   1648         if (!oneof) EXPECT(kTokenIdentifier);
   1649         auto name = "Anonymous" + NumToString(anonymous_counter++);
   1650         ECHECK(StartStruct(name, &anonymous_struct));
   1651         type = Type(BASE_TYPE_STRUCT, anonymous_struct);
   1652       } else {
   1653         ECHECK(ParseTypeFromProtoType(&type));
   1654       }
   1655       // Repeated elements get mapped to a vector.
   1656       if (repeated) {
   1657         type.element = type.base_type;
   1658         type.base_type = BASE_TYPE_VECTOR;
   1659       }
   1660       std::string name = attribute_;
   1661       // Protos may use our keywords "attribute" & "namespace" as an identifier.
   1662       if (Is(kTokenAttribute) || Is(kTokenNameSpace)) {
   1663         NEXT();
   1664         // TODO: simpler to just not make these keywords?
   1665         name += "_";  // Have to make it not a keyword.
   1666       } else {
   1667         EXPECT(kTokenIdentifier);
   1668       }
   1669       if (!oneof) {
   1670         // Parse the field id. Since we're just translating schemas, not
   1671         // any kind of binary compatibility, we can safely ignore these, and
   1672         // assign our own.
   1673         EXPECT('=');
   1674         EXPECT(kTokenIntegerConstant);
   1675       }
   1676       FieldDef *field = nullptr;
   1677       if (isextend) {
   1678         // We allow a field to be re-defined when extending.
   1679         // TODO: are there situations where that is problematic?
   1680         field = struct_def->fields.Lookup(name);
   1681       }
   1682       if (!field) ECHECK(AddField(*struct_def, name, type, &field));
   1683       field->doc_comment = field_comment;
   1684       if (!IsScalar(type.base_type)) field->required = required;
   1685       // See if there's a default specified.
   1686       if (Is('[')) {
   1687         NEXT();
   1688         for (;;) {
   1689           auto key = attribute_;
   1690           ECHECK(ParseProtoKey());
   1691           EXPECT('=');
   1692           auto val = attribute_;
   1693           ECHECK(ParseProtoCurliesOrIdent());
   1694           if (key == "default") {
   1695             // Temp: skip non-numeric defaults (enums).
   1696             auto numeric = strpbrk(val.c_str(), "0123456789-+.");
   1697             if (IsScalar(type.base_type) && numeric == val.c_str())
   1698               field->value.constant = val;
   1699           } else if (key == "deprecated") {
   1700             field->deprecated = val == "true";
   1701           }
   1702           if (!Is(',')) break;
   1703           NEXT();
   1704         }
   1705         EXPECT(']');
   1706       }
   1707       if (anonymous_struct) {
   1708         ECHECK(ParseProtoFields(anonymous_struct, false, oneof));
   1709         if (Is(';')) NEXT();
   1710       } else {
   1711         EXPECT(';');
   1712       }
   1713     }
   1714   }
   1715   NEXT();
   1716   return NoError();
   1717 }
   1718 
   1719 CheckedError Parser::ParseProtoKey() {
   1720   if (token_ == '(') {
   1721     NEXT();
   1722     // Skip "(a.b)" style custom attributes.
   1723     while (token_ == '.' || token_ == kTokenIdentifier) NEXT();
   1724     EXPECT(')');
   1725     while (Is('.')) { NEXT(); EXPECT(kTokenIdentifier); }
   1726   } else {
   1727     EXPECT(kTokenIdentifier);
   1728   }
   1729   return NoError();
   1730 }
   1731 
   1732 CheckedError Parser::ParseProtoCurliesOrIdent() {
   1733   if (Is('{')) {
   1734     NEXT();
   1735     for (int nesting = 1; nesting; ) {
   1736       if (token_ == '{') nesting++;
   1737       else if (token_ == '}') nesting--;
   1738       NEXT();
   1739     }
   1740   } else {
   1741     NEXT();  // Any single token.
   1742   }
   1743   return NoError();
   1744 }
   1745 
   1746 CheckedError Parser::ParseProtoOption() {
   1747   NEXT();
   1748   ECHECK(ParseProtoKey());
   1749   EXPECT('=');
   1750   ECHECK(ParseProtoCurliesOrIdent());
   1751   return NoError();
   1752 }
   1753 
   1754 // Parse a protobuf type, and map it to the corresponding FlatBuffer one.
   1755 CheckedError Parser::ParseTypeFromProtoType(Type *type) {
   1756   struct type_lookup { const char *proto_type; BaseType fb_type; };
   1757   static type_lookup lookup[] = {
   1758     { "float", BASE_TYPE_FLOAT },  { "double", BASE_TYPE_DOUBLE },
   1759     { "int32", BASE_TYPE_INT },    { "int64", BASE_TYPE_LONG },
   1760     { "uint32", BASE_TYPE_UINT },  { "uint64", BASE_TYPE_ULONG },
   1761     { "sint32", BASE_TYPE_INT },   { "sint64", BASE_TYPE_LONG },
   1762     { "fixed32", BASE_TYPE_UINT }, { "fixed64", BASE_TYPE_ULONG },
   1763     { "sfixed32", BASE_TYPE_INT }, { "sfixed64", BASE_TYPE_LONG },
   1764     { "bool", BASE_TYPE_BOOL },
   1765     { "string", BASE_TYPE_STRING },
   1766     { "bytes", BASE_TYPE_STRING },
   1767     { nullptr, BASE_TYPE_NONE }
   1768   };
   1769   for (auto tl = lookup; tl->proto_type; tl++) {
   1770     if (attribute_ == tl->proto_type) {
   1771       type->base_type = tl->fb_type;
   1772       NEXT();
   1773       return NoError();
   1774     }
   1775   }
   1776   if (Is('.')) NEXT();  // qualified names may start with a . ?
   1777   ECHECK(ParseTypeIdent(*type));
   1778   return NoError();
   1779 }
   1780 
   1781 CheckedError Parser::SkipAnyJsonValue() {
   1782   switch (token_) {
   1783     case '{':
   1784       ECHECK(SkipJsonObject());
   1785       break;
   1786     case kTokenStringConstant:
   1787       ECHECK(SkipJsonString());
   1788       break;
   1789     case '[':
   1790       ECHECK(SkipJsonArray());
   1791       break;
   1792     case kTokenIntegerConstant:
   1793       EXPECT(kTokenIntegerConstant);
   1794       break;
   1795     case kTokenFloatConstant:
   1796       EXPECT(kTokenFloatConstant);
   1797       break;
   1798     default:
   1799       return Error(std::string("Unexpected token:") + std::string(1, static_cast<char>(token_)));
   1800   }
   1801   return NoError();
   1802 }
   1803 
   1804 CheckedError Parser::SkipJsonObject() {
   1805   EXPECT('{');
   1806   size_t fieldn = 0;
   1807 
   1808   for (;;) {
   1809     if ((!opts.strict_json || !fieldn) && Is('}')) break;
   1810 
   1811     if (!Is(kTokenStringConstant)) {
   1812       EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
   1813     }
   1814     else {
   1815       NEXT();
   1816     }
   1817 
   1818     EXPECT(':');
   1819     ECHECK(SkipAnyJsonValue());
   1820     fieldn++;
   1821 
   1822     if (Is('}')) break;
   1823     EXPECT(',');
   1824   }
   1825 
   1826   NEXT();
   1827   return NoError();
   1828 }
   1829 
   1830 CheckedError Parser::SkipJsonArray() {
   1831   EXPECT('[');
   1832 
   1833   for (;;) {
   1834     if (Is(']')) break;
   1835 
   1836     ECHECK(SkipAnyJsonValue());
   1837 
   1838     if (Is(']')) break;
   1839     EXPECT(',');
   1840   }
   1841 
   1842   NEXT();
   1843   return NoError();
   1844 }
   1845 
   1846 CheckedError Parser::SkipJsonString() {
   1847   EXPECT(kTokenStringConstant);
   1848   return NoError();
   1849 }
   1850 
   1851 bool Parser::Parse(const char *source, const char **include_paths,
   1852                    const char *source_filename) {
   1853   return !DoParse(source, include_paths, source_filename).Check();
   1854 }
   1855 
   1856 CheckedError Parser::DoParse(const char *source, const char **include_paths,
   1857                              const char *source_filename) {
   1858   file_being_parsed_ = source_filename ? source_filename : "";
   1859   if (source_filename &&
   1860       included_files_.find(source_filename) == included_files_.end()) {
   1861     included_files_[source_filename] = true;
   1862     files_included_per_file_[source_filename] = std::set<std::string>();
   1863   }
   1864   if (!include_paths) {
   1865     static const char *current_directory[] = { "", nullptr };
   1866     include_paths = current_directory;
   1867   }
   1868   source_ = cursor_ = source;
   1869   line_ = 1;
   1870   error_.clear();
   1871   field_stack_.clear();
   1872   builder_.Clear();
   1873   // Start with a blank namespace just in case this file doesn't have one.
   1874   namespaces_.push_back(new Namespace());
   1875   ECHECK(SkipByteOrderMark());
   1876   NEXT();
   1877   // Includes must come before type declarations:
   1878   for (;;) {
   1879     // Parse pre-include proto statements if any:
   1880     if (opts.proto_mode &&
   1881         (attribute_ == "option" || attribute_ == "syntax" ||
   1882          attribute_ == "package")) {
   1883         ECHECK(ParseProtoDecl());
   1884     } else if (Is(kTokenNativeInclude)) {
   1885       NEXT();
   1886       native_included_files_.emplace_back(attribute_);
   1887       EXPECT(kTokenStringConstant);
   1888     } else if (Is(kTokenInclude) ||
   1889                (opts.proto_mode &&
   1890                 attribute_ == "import" &&
   1891                 Is(kTokenIdentifier))) {
   1892       NEXT();
   1893       if (opts.proto_mode && attribute_ == "public") NEXT();
   1894       auto name = attribute_;
   1895       EXPECT(kTokenStringConstant);
   1896       // Look for the file in include_paths.
   1897       std::string filepath;
   1898       for (auto paths = include_paths; paths && *paths; paths++) {
   1899         filepath = flatbuffers::ConCatPathFileName(*paths, name);
   1900         if(FileExists(filepath.c_str())) break;
   1901       }
   1902       if (filepath.empty())
   1903         return Error("unable to locate include file: " + name);
   1904       if (source_filename)
   1905         files_included_per_file_[source_filename].insert(filepath);
   1906       if (included_files_.find(filepath) == included_files_.end()) {
   1907         // We found an include file that we have not parsed yet.
   1908         // Load it and parse it.
   1909         std::string contents;
   1910         if (!LoadFile(filepath.c_str(), true, &contents))
   1911           return Error("unable to load include file: " + name);
   1912         ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str()));
   1913         // We generally do not want to output code for any included files:
   1914         if (!opts.generate_all) MarkGenerated();
   1915         // This is the easiest way to continue this file after an include:
   1916         // instead of saving and restoring all the state, we simply start the
   1917         // file anew. This will cause it to encounter the same include
   1918         // statement again, but this time it will skip it, because it was
   1919         // entered into included_files_.
   1920         // This is recursive, but only go as deep as the number of include
   1921         // statements.
   1922         return DoParse(source, include_paths, source_filename);
   1923       }
   1924       EXPECT(';');
   1925     } else {
   1926       break;
   1927     }
   1928   }
   1929   // Now parse all other kinds of declarations:
   1930   while (token_ != kTokenEof) {
   1931     if (opts.proto_mode) {
   1932       ECHECK(ParseProtoDecl());
   1933     } else if (token_ == kTokenNameSpace) {
   1934       ECHECK(ParseNamespace());
   1935     } else if (token_ == '{') {
   1936       if (!root_struct_def_)
   1937         return Error("no root type set to parse json with");
   1938       if (builder_.GetSize()) {
   1939         return Error("cannot have more than one json object in a file");
   1940       }
   1941       uoffset_t toff;
   1942       ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
   1943       builder_.Finish(Offset<Table>(toff),
   1944                 file_identifier_.length() ? file_identifier_.c_str() : nullptr);
   1945     } else if (token_ == kTokenEnum) {
   1946       ECHECK(ParseEnum(false, nullptr));
   1947     } else if (token_ == kTokenUnion) {
   1948       ECHECK(ParseEnum(true, nullptr));
   1949     } else if (token_ == kTokenRootType) {
   1950       NEXT();
   1951       auto root_type = attribute_;
   1952       EXPECT(kTokenIdentifier);
   1953       ECHECK(ParseNamespacing(&root_type, nullptr));
   1954       if (!SetRootType(root_type.c_str()))
   1955         return Error("unknown root type: " + root_type);
   1956       if (root_struct_def_->fixed)
   1957         return Error("root type must be a table");
   1958       EXPECT(';');
   1959     } else if (token_ == kTokenFileIdentifier) {
   1960       NEXT();
   1961       file_identifier_ = attribute_;
   1962       EXPECT(kTokenStringConstant);
   1963       if (file_identifier_.length() !=
   1964           FlatBufferBuilder::kFileIdentifierLength)
   1965         return Error("file_identifier must be exactly " +
   1966               NumToString(FlatBufferBuilder::kFileIdentifierLength) +
   1967               " characters");
   1968       EXPECT(';');
   1969     } else if (token_ == kTokenFileExtension) {
   1970       NEXT();
   1971       file_extension_ = attribute_;
   1972       EXPECT(kTokenStringConstant);
   1973       EXPECT(';');
   1974     } else if(token_ == kTokenInclude) {
   1975       return Error("includes must come before declarations");
   1976     } else if(token_ == kTokenAttribute) {
   1977       NEXT();
   1978       auto name = attribute_;
   1979       EXPECT(kTokenStringConstant);
   1980       EXPECT(';');
   1981       known_attributes_[name] = false;
   1982     } else if (token_ == kTokenService) {
   1983       ECHECK(ParseService());
   1984     } else {
   1985       ECHECK(ParseDecl());
   1986     }
   1987   }
   1988   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
   1989     if ((*it)->predecl) {
   1990       return Error("type referenced but not defined: " + (*it)->name);
   1991     }
   1992   }
   1993   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
   1994     auto &enum_def = **it;
   1995     if (enum_def.is_union) {
   1996       for (auto val_it = enum_def.vals.vec.begin();
   1997            val_it != enum_def.vals.vec.end();
   1998            ++val_it) {
   1999         auto &val = **val_it;
   2000         if (val.struct_def && val.struct_def->fixed)
   2001           return Error("only tables can be union elements: " + val.name);
   2002       }
   2003     }
   2004   }
   2005   return NoError();
   2006 }
   2007 
   2008 std::set<std::string> Parser::GetIncludedFilesRecursive(
   2009     const std::string &file_name) const {
   2010   std::set<std::string> included_files;
   2011   std::list<std::string> to_process;
   2012 
   2013   if (file_name.empty()) return included_files;
   2014   to_process.push_back(file_name);
   2015 
   2016   while (!to_process.empty()) {
   2017     std::string current = to_process.front();
   2018     to_process.pop_front();
   2019     included_files.insert(current);
   2020 
   2021     auto new_files = files_included_per_file_.at(current);
   2022     for (auto it = new_files.begin(); it != new_files.end(); ++it) {
   2023       if (included_files.find(*it) == included_files.end())
   2024         to_process.push_back(*it);
   2025     }
   2026   }
   2027 
   2028   return included_files;
   2029 }
   2030 
   2031 // Schema serialization functionality:
   2032 
   2033 template<typename T> bool compareName(const T* a, const T* b) {
   2034     return a->defined_namespace->GetFullyQualifiedName(a->name)
   2035         < b->defined_namespace->GetFullyQualifiedName(b->name);
   2036 }
   2037 
   2038 template<typename T> void AssignIndices(const std::vector<T *> &defvec) {
   2039   // Pre-sort these vectors, such that we can set the correct indices for them.
   2040   auto vec = defvec;
   2041   std::sort(vec.begin(), vec.end(), compareName<T>);
   2042   for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
   2043 }
   2044 
   2045 void Parser::Serialize() {
   2046   builder_.Clear();
   2047   AssignIndices(structs_.vec);
   2048   AssignIndices(enums_.vec);
   2049   std::vector<Offset<reflection::Object>> object_offsets;
   2050   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
   2051     auto offset = (*it)->Serialize(&builder_, *this);
   2052     object_offsets.push_back(offset);
   2053     (*it)->serialized_location = offset.o;
   2054   }
   2055   std::vector<Offset<reflection::Enum>> enum_offsets;
   2056   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
   2057     auto offset = (*it)->Serialize(&builder_, *this);
   2058     enum_offsets.push_back(offset);
   2059     (*it)->serialized_location = offset.o;
   2060   }
   2061   auto schema_offset = reflection::CreateSchema(
   2062                          builder_,
   2063                          builder_.CreateVectorOfSortedTables(&object_offsets),
   2064                          builder_.CreateVectorOfSortedTables(&enum_offsets),
   2065                          builder_.CreateString(file_identifier_),
   2066                          builder_.CreateString(file_extension_),
   2067                          root_struct_def_
   2068                            ? root_struct_def_->serialized_location
   2069                            : 0);
   2070   builder_.Finish(schema_offset, reflection::SchemaIdentifier());
   2071 }
   2072 
   2073 Offset<reflection::Object> StructDef::Serialize(FlatBufferBuilder *builder,
   2074                                                 const Parser &parser) const {
   2075   std::vector<Offset<reflection::Field>> field_offsets;
   2076   for (auto it = fields.vec.begin(); it != fields.vec.end(); ++it) {
   2077     field_offsets.push_back(
   2078       (*it)->Serialize(builder,
   2079                        static_cast<uint16_t>(it - fields.vec.begin()), parser));
   2080   }
   2081   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
   2082   return reflection::CreateObject(*builder,
   2083                                   builder->CreateString(qualified_name),
   2084                                   builder->CreateVectorOfSortedTables(
   2085                                     &field_offsets),
   2086                                   fixed,
   2087                                   static_cast<int>(minalign),
   2088                                   static_cast<int>(bytesize),
   2089                                   SerializeAttributes(builder, parser),
   2090                                   parser.opts.binary_schema_comments
   2091                                     ? builder->CreateVectorOfStrings(
   2092                                         doc_comment)
   2093                                     : 0);
   2094 }
   2095 
   2096 Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
   2097                                               uint16_t id,
   2098                                               const Parser &parser) const {
   2099   return reflection::CreateField(*builder,
   2100                                  builder->CreateString(name),
   2101                                  value.type.Serialize(builder),
   2102                                  id,
   2103                                  value.offset,
   2104                                  IsInteger(value.type.base_type)
   2105                                    ? StringToInt(value.constant.c_str())
   2106                                    : 0,
   2107                                  IsFloat(value.type.base_type)
   2108                                    ? strtod(value.constant.c_str(), nullptr)
   2109                                    : 0.0,
   2110                                  deprecated,
   2111                                  required,
   2112                                  key,
   2113                                  SerializeAttributes(builder, parser),
   2114                                  parser.opts.binary_schema_comments
   2115                                    ? builder->CreateVectorOfStrings(doc_comment)
   2116                                    : 0);
   2117   // TODO: value.constant is almost always "0", we could save quite a bit of
   2118   // space by sharing it. Same for common values of value.type.
   2119 }
   2120 
   2121 Offset<reflection::Enum> EnumDef::Serialize(FlatBufferBuilder *builder,
   2122                                             const Parser &parser) const {
   2123   std::vector<Offset<reflection::EnumVal>> enumval_offsets;
   2124   for (auto it = vals.vec.begin(); it != vals.vec.end(); ++it) {
   2125     enumval_offsets.push_back((*it)->Serialize(builder));
   2126   }
   2127   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
   2128   return reflection::CreateEnum(*builder,
   2129                                 builder->CreateString(qualified_name),
   2130                                 builder->CreateVector(enumval_offsets),
   2131                                 is_union,
   2132                                 underlying_type.Serialize(builder),
   2133                                 SerializeAttributes(builder, parser),
   2134                                 parser.opts.binary_schema_comments
   2135                                   ? builder->CreateVectorOfStrings(doc_comment)
   2136                                   : 0);
   2137 }
   2138 
   2139 Offset<reflection::EnumVal> EnumVal::Serialize(FlatBufferBuilder *builder) const
   2140                                                                                {
   2141   return reflection::CreateEnumVal(*builder,
   2142                                    builder->CreateString(name),
   2143                                    value,
   2144                                    struct_def
   2145                                      ? struct_def->serialized_location
   2146                                      : 0);
   2147 }
   2148 
   2149 Offset<reflection::Type> Type::Serialize(FlatBufferBuilder *builder) const {
   2150   return reflection::CreateType(*builder,
   2151                                 static_cast<reflection::BaseType>(base_type),
   2152                                 static_cast<reflection::BaseType>(element),
   2153                                 struct_def ? struct_def->index :
   2154                                              (enum_def ? enum_def->index : -1));
   2155 }
   2156 
   2157 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<
   2158   reflection::KeyValue>>>
   2159     Definition::SerializeAttributes(FlatBufferBuilder *builder,
   2160                                     const Parser &parser) const {
   2161   std::vector<flatbuffers::Offset<reflection::KeyValue>> attrs;
   2162   for (auto kv = attributes.dict.begin(); kv != attributes.dict.end(); ++kv) {
   2163     auto it = parser.known_attributes_.find(kv->first);
   2164     assert(it != parser.known_attributes_.end());
   2165     if (!it->second) {  // Custom attribute.
   2166       attrs.push_back(
   2167           reflection::CreateKeyValue(*builder, builder->CreateString(kv->first),
   2168                                      builder->CreateString(
   2169                                          kv->second->constant)));
   2170     }
   2171   }
   2172   if (attrs.size()) {
   2173     return builder->CreateVectorOfSortedTables(&attrs);
   2174   } else {
   2175     return 0;
   2176   }
   2177 }
   2178 
   2179 std::string Parser::ConformTo(const Parser &base) {
   2180   for (auto sit = structs_.vec.begin(); sit != structs_.vec.end(); ++sit) {
   2181     auto &struct_def = **sit;
   2182     auto qualified_name =
   2183         struct_def.defined_namespace->GetFullyQualifiedName(struct_def.name);
   2184     auto struct_def_base = base.structs_.Lookup(qualified_name);
   2185     if (!struct_def_base) continue;
   2186     for (auto fit = struct_def.fields.vec.begin();
   2187              fit != struct_def.fields.vec.end(); ++fit) {
   2188       auto &field = **fit;
   2189       auto field_base = struct_def_base->fields.Lookup(field.name);
   2190       if (field_base) {
   2191         if (field.value.offset != field_base->value.offset)
   2192           return "offsets differ for field: " + field.name;
   2193         if (field.value.constant != field_base->value.constant)
   2194           return "defaults differ for field: " + field.name;
   2195         if (!EqualByName(field.value.type, field_base->value.type))
   2196           return "types differ for field: " + field.name;
   2197       } else {
   2198         // Doesn't have to exist, deleting fields is fine.
   2199         // But we should check if there is a field that has the same offset
   2200         // but is incompatible (in the case of field renaming).
   2201         for (auto fbit = struct_def_base->fields.vec.begin();
   2202                  fbit != struct_def_base->fields.vec.end(); ++fbit) {
   2203           field_base = *fbit;
   2204           if (field.value.offset == field_base->value.offset) {
   2205             if (!EqualByName(field.value.type, field_base->value.type))
   2206               return "field renamed to different type: " + field.name;
   2207             break;
   2208           }
   2209         }
   2210       }
   2211     }
   2212   }
   2213   for (auto eit = enums_.vec.begin(); eit != enums_.vec.end(); ++eit) {
   2214     auto &enum_def = **eit;
   2215     auto qualified_name =
   2216         enum_def.defined_namespace->GetFullyQualifiedName(enum_def.name);
   2217     auto enum_def_base = base.enums_.Lookup(qualified_name);
   2218     if (!enum_def_base) continue;
   2219     for (auto evit = enum_def.vals.vec.begin();
   2220              evit != enum_def.vals.vec.end(); ++evit) {
   2221       auto &enum_val = **evit;
   2222       auto enum_val_base = enum_def_base->vals.Lookup(enum_val.name);
   2223       if (enum_val_base) {
   2224         if (enum_val.value != enum_val_base->value)
   2225           return "values differ for enum: " + enum_val.name;
   2226       }
   2227     }
   2228   }
   2229   return "";
   2230 }
   2231 
   2232 }  // namespace flatbuffers
   2233