Home | History | Annotate | Download | only in symbolize
      1 // Copyright (c) 2006, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 //
     30 // Author: Satoru Takabayashi
     31 //
     32 // For reference check out:
     33 // http://www.codesourcery.com/public/cxx-abi/abi.html#mangling
     34 //
     35 // Note that we only have partial C++0x support yet.
     36 
     37 #include <stdio.h>  // for NULL
     38 #include "demangle.h"
     39 
     40 _START_GOOGLE_NAMESPACE_
     41 
     42 typedef struct {
     43   const char *abbrev;
     44   const char *real_name;
     45 } AbbrevPair;
     46 
     47 // List of operators from Itanium C++ ABI.
     48 static const AbbrevPair kOperatorList[] = {
     49   { "nw", "new" },
     50   { "na", "new[]" },
     51   { "dl", "delete" },
     52   { "da", "delete[]" },
     53   { "ps", "+" },
     54   { "ng", "-" },
     55   { "ad", "&" },
     56   { "de", "*" },
     57   { "co", "~" },
     58   { "pl", "+" },
     59   { "mi", "-" },
     60   { "ml", "*" },
     61   { "dv", "/" },
     62   { "rm", "%" },
     63   { "an", "&" },
     64   { "or", "|" },
     65   { "eo", "^" },
     66   { "aS", "=" },
     67   { "pL", "+=" },
     68   { "mI", "-=" },
     69   { "mL", "*=" },
     70   { "dV", "/=" },
     71   { "rM", "%=" },
     72   { "aN", "&=" },
     73   { "oR", "|=" },
     74   { "eO", "^=" },
     75   { "ls", "<<" },
     76   { "rs", ">>" },
     77   { "lS", "<<=" },
     78   { "rS", ">>=" },
     79   { "eq", "==" },
     80   { "ne", "!=" },
     81   { "lt", "<" },
     82   { "gt", ">" },
     83   { "le", "<=" },
     84   { "ge", ">=" },
     85   { "nt", "!" },
     86   { "aa", "&&" },
     87   { "oo", "||" },
     88   { "pp", "++" },
     89   { "mm", "--" },
     90   { "cm", "," },
     91   { "pm", "->*" },
     92   { "pt", "->" },
     93   { "cl", "()" },
     94   { "ix", "[]" },
     95   { "qu", "?" },
     96   { "st", "sizeof" },
     97   { "sz", "sizeof" },
     98   { NULL, NULL },
     99 };
    100 
    101 // List of builtin types from Itanium C++ ABI.
    102 static const AbbrevPair kBuiltinTypeList[] = {
    103   { "v", "void" },
    104   { "w", "wchar_t" },
    105   { "b", "bool" },
    106   { "c", "char" },
    107   { "a", "signed char" },
    108   { "h", "unsigned char" },
    109   { "s", "short" },
    110   { "t", "unsigned short" },
    111   { "i", "int" },
    112   { "j", "unsigned int" },
    113   { "l", "long" },
    114   { "m", "unsigned long" },
    115   { "x", "long long" },
    116   { "y", "unsigned long long" },
    117   { "n", "__int128" },
    118   { "o", "unsigned __int128" },
    119   { "f", "float" },
    120   { "d", "double" },
    121   { "e", "long double" },
    122   { "g", "__float128" },
    123   { "z", "ellipsis" },
    124   { NULL, NULL }
    125 };
    126 
    127 // List of substitutions Itanium C++ ABI.
    128 static const AbbrevPair kSubstitutionList[] = {
    129   { "St", "" },
    130   { "Sa", "allocator" },
    131   { "Sb", "basic_string" },
    132   // std::basic_string<char, std::char_traits<char>,std::allocator<char> >
    133   { "Ss", "string"},
    134   // std::basic_istream<char, std::char_traits<char> >
    135   { "Si", "istream" },
    136   // std::basic_ostream<char, std::char_traits<char> >
    137   { "So", "ostream" },
    138   // std::basic_iostream<char, std::char_traits<char> >
    139   { "Sd", "iostream" },
    140   { NULL, NULL }
    141 };
    142 
    143 // State needed for demangling.
    144 typedef struct {
    145   const char *mangled_cur;  // Cursor of mangled name.
    146   char *out_cur;            // Cursor of output string.
    147   const char *out_begin;    // Beginning of output string.
    148   const char *out_end;      // End of output string.
    149   const char *prev_name;    // For constructors/destructors.
    150   int prev_name_length;     // For constructors/destructors.
    151   short nest_level;         // For nested names.
    152   bool append;              // Append flag.
    153   bool overflowed;          // True if output gets overflowed.
    154 } State;
    155 
    156 // We don't use strlen() in libc since it's not guaranteed to be async
    157 // signal safe.
    158 static size_t StrLen(const char *str) {
    159   size_t len = 0;
    160   while (*str != '\0') {
    161     ++str;
    162     ++len;
    163   }
    164   return len;
    165 }
    166 
    167 // Returns true if "str" has at least "n" characters remaining.
    168 static bool AtLeastNumCharsRemaining(const char *str, int n) {
    169   for (int i = 0; i < n; ++i) {
    170     if (str[i] == '\0') {
    171       return false;
    172     }
    173   }
    174   return true;
    175 }
    176 
    177 // Returns true if "str" has "prefix" as a prefix.
    178 static bool StrPrefix(const char *str, const char *prefix) {
    179   size_t i = 0;
    180   while (str[i] != '\0' && prefix[i] != '\0' &&
    181          str[i] == prefix[i]) {
    182     ++i;
    183   }
    184   return prefix[i] == '\0';  // Consumed everything in "prefix".
    185 }
    186 
    187 static void InitState(State *state, const char *mangled,
    188                       char *out, int out_size) {
    189   state->mangled_cur = mangled;
    190   state->out_cur = out;
    191   state->out_begin = out;
    192   state->out_end = out + out_size;
    193   state->prev_name  = NULL;
    194   state->prev_name_length = -1;
    195   state->nest_level = -1;
    196   state->append = true;
    197   state->overflowed = false;
    198 }
    199 
    200 // Returns true and advances "mangled_cur" if we find "one_char_token"
    201 // at "mangled_cur" position.  It is assumed that "one_char_token" does
    202 // not contain '\0'.
    203 static bool ParseOneCharToken(State *state, const char one_char_token) {
    204   if (state->mangled_cur[0] == one_char_token) {
    205     ++state->mangled_cur;
    206     return true;
    207   }
    208   return false;
    209 }
    210 
    211 // Returns true and advances "mangled_cur" if we find "two_char_token"
    212 // at "mangled_cur" position.  It is assumed that "two_char_token" does
    213 // not contain '\0'.
    214 static bool ParseTwoCharToken(State *state, const char *two_char_token) {
    215   if (state->mangled_cur[0] == two_char_token[0] &&
    216       state->mangled_cur[1] == two_char_token[1]) {
    217     state->mangled_cur += 2;
    218     return true;
    219   }
    220   return false;
    221 }
    222 
    223 // Returns true and advances "mangled_cur" if we find any character in
    224 // "char_class" at "mangled_cur" position.
    225 static bool ParseCharClass(State *state, const char *char_class) {
    226   const char *p = char_class;
    227   for (; *p != '\0'; ++p) {
    228     if (state->mangled_cur[0] == *p) {
    229       ++state->mangled_cur;
    230       return true;
    231     }
    232   }
    233   return false;
    234 }
    235 
    236 // This function is used for handling an optional non-terminal.
    237 static bool Optional(bool) {
    238   return true;
    239 }
    240 
    241 // This function is used for handling <non-terminal>+ syntax.
    242 typedef bool (*ParseFunc)(State *);
    243 static bool OneOrMore(ParseFunc parse_func, State *state) {
    244   if (parse_func(state)) {
    245     while (parse_func(state)) {
    246     }
    247     return true;
    248   }
    249   return false;
    250 }
    251 
    252 // This function is used for handling <non-terminal>* syntax. The function
    253 // always returns true and must be followed by a termination token or a
    254 // terminating sequence not handled by parse_func (e.g.
    255 // ParseOneCharToken(state, 'E')).
    256 static bool ZeroOrMore(ParseFunc parse_func, State *state) {
    257   while (parse_func(state)) {
    258   }
    259   return true;
    260 }
    261 
    262 // Append "str" at "out_cur".  If there is an overflow, "overflowed"
    263 // is set to true for later use.  The output string is ensured to
    264 // always terminate with '\0' as long as there is no overflow.
    265 static void Append(State *state, const char * const str, const int length) {
    266   int i;
    267   for (i = 0; i < length; ++i) {
    268     if (state->out_cur + 1 < state->out_end) {  // +1 for '\0'
    269       *state->out_cur = str[i];
    270       ++state->out_cur;
    271     } else {
    272       state->overflowed = true;
    273       break;
    274     }
    275   }
    276   if (!state->overflowed) {
    277     *state->out_cur = '\0';  // Terminate it with '\0'
    278   }
    279 }
    280 
    281 // We don't use equivalents in libc to avoid locale issues.
    282 static bool IsLower(char c) {
    283   return c >= 'a' && c <= 'z';
    284 }
    285 
    286 static bool IsAlpha(char c) {
    287   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
    288 }
    289 
    290 static bool IsDigit(char c) {
    291   return c >= '0' && c <= '9';
    292 }
    293 
    294 // Returns true if "str" is a function clone suffix.  These suffixes are used
    295 // by GCC 4.5.x and later versions to indicate functions which have been
    296 // cloned during optimization.  We treat any sequence (.<alpha>+.<digit>+)+ as
    297 // a function clone suffix.
    298 static bool IsFunctionCloneSuffix(const char *str) {
    299   size_t i = 0;
    300   while (str[i] != '\0') {
    301     // Consume a single .<alpha>+.<digit>+ sequence.
    302     if (str[i] != '.' || !IsAlpha(str[i + 1])) {
    303       return false;
    304     }
    305     i += 2;
    306     while (IsAlpha(str[i])) {
    307       ++i;
    308     }
    309     if (str[i] != '.' || !IsDigit(str[i + 1])) {
    310       return false;
    311     }
    312     i += 2;
    313     while (IsDigit(str[i])) {
    314       ++i;
    315     }
    316   }
    317   return true;  // Consumed everything in "str".
    318 }
    319 
    320 // Append "str" with some tweaks, iff "append" state is true.
    321 // Returns true so that it can be placed in "if" conditions.
    322 static void MaybeAppendWithLength(State *state, const char * const str,
    323                                   const int length) {
    324   if (state->append && length > 0) {
    325     // Append a space if the output buffer ends with '<' and "str"
    326     // starts with '<' to avoid <<<.
    327     if (str[0] == '<' && state->out_begin < state->out_cur  &&
    328         state->out_cur[-1] == '<') {
    329       Append(state, " ", 1);
    330     }
    331     // Remember the last identifier name for ctors/dtors.
    332     if (IsAlpha(str[0]) || str[0] == '_') {
    333       state->prev_name = state->out_cur;
    334       state->prev_name_length = length;
    335     }
    336     Append(state, str, length);
    337   }
    338 }
    339 
    340 // A convenient wrapper arount MaybeAppendWithLength().
    341 static bool MaybeAppend(State *state, const char * const str) {
    342   if (state->append) {
    343     int length = StrLen(str);
    344     MaybeAppendWithLength(state, str, length);
    345   }
    346   return true;
    347 }
    348 
    349 // This function is used for handling nested names.
    350 static bool EnterNestedName(State *state) {
    351   state->nest_level = 0;
    352   return true;
    353 }
    354 
    355 // This function is used for handling nested names.
    356 static bool LeaveNestedName(State *state, short prev_value) {
    357   state->nest_level = prev_value;
    358   return true;
    359 }
    360 
    361 // Disable the append mode not to print function parameters, etc.
    362 static bool DisableAppend(State *state) {
    363   state->append = false;
    364   return true;
    365 }
    366 
    367 // Restore the append mode to the previous state.
    368 static bool RestoreAppend(State *state, bool prev_value) {
    369   state->append = prev_value;
    370   return true;
    371 }
    372 
    373 // Increase the nest level for nested names.
    374 static void MaybeIncreaseNestLevel(State *state) {
    375   if (state->nest_level > -1) {
    376     ++state->nest_level;
    377   }
    378 }
    379 
    380 // Appends :: for nested names if necessary.
    381 static void MaybeAppendSeparator(State *state) {
    382   if (state->nest_level >= 1) {
    383     MaybeAppend(state, "::");
    384   }
    385 }
    386 
    387 // Cancel the last separator if necessary.
    388 static void MaybeCancelLastSeparator(State *state) {
    389   if (state->nest_level >= 1 && state->append &&
    390       state->out_begin <= state->out_cur - 2) {
    391     state->out_cur -= 2;
    392     *state->out_cur = '\0';
    393   }
    394 }
    395 
    396 // Returns true if the identifier of the given length pointed to by
    397 // "mangled_cur" is anonymous namespace.
    398 static bool IdentifierIsAnonymousNamespace(State *state, int length) {
    399   static const char anon_prefix[] = "_GLOBAL__N_";
    400   return (length > (int)sizeof(anon_prefix) - 1 &&  // Should be longer.
    401           StrPrefix(state->mangled_cur, anon_prefix));
    402 }
    403 
    404 // Forward declarations of our parsing functions.
    405 static bool ParseMangledName(State *state);
    406 static bool ParseEncoding(State *state);
    407 static bool ParseName(State *state);
    408 static bool ParseUnscopedName(State *state);
    409 static bool ParseUnscopedTemplateName(State *state);
    410 static bool ParseNestedName(State *state);
    411 static bool ParsePrefix(State *state);
    412 static bool ParseUnqualifiedName(State *state);
    413 static bool ParseSourceName(State *state);
    414 static bool ParseLocalSourceName(State *state);
    415 static bool ParseNumber(State *state, int *number_out);
    416 static bool ParseFloatNumber(State *state);
    417 static bool ParseSeqId(State *state);
    418 static bool ParseIdentifier(State *state, int length);
    419 static bool ParseOperatorName(State *state);
    420 static bool ParseSpecialName(State *state);
    421 static bool ParseCallOffset(State *state);
    422 static bool ParseNVOffset(State *state);
    423 static bool ParseVOffset(State *state);
    424 static bool ParseCtorDtorName(State *state);
    425 static bool ParseType(State *state);
    426 static bool ParseCVQualifiers(State *state);
    427 static bool ParseBuiltinType(State *state);
    428 static bool ParseFunctionType(State *state);
    429 static bool ParseBareFunctionType(State *state);
    430 static bool ParseClassEnumType(State *state);
    431 static bool ParseArrayType(State *state);
    432 static bool ParsePointerToMemberType(State *state);
    433 static bool ParseTemplateParam(State *state);
    434 static bool ParseTemplateTemplateParam(State *state);
    435 static bool ParseTemplateArgs(State *state);
    436 static bool ParseTemplateArg(State *state);
    437 static bool ParseExpression(State *state);
    438 static bool ParseExprPrimary(State *state);
    439 static bool ParseLocalName(State *state);
    440 static bool ParseDiscriminator(State *state);
    441 static bool ParseSubstitution(State *state);
    442 
    443 // Implementation note: the following code is a straightforward
    444 // translation of the Itanium C++ ABI defined in BNF with a couple of
    445 // exceptions.
    446 //
    447 // - Support GNU extensions not defined in the Itanium C++ ABI
    448 // - <prefix> and <template-prefix> are combined to avoid infinite loop
    449 // - Reorder patterns to shorten the code
    450 // - Reorder patterns to give greedier functions precedence
    451 //   We'll mark "Less greedy than" for these cases in the code
    452 //
    453 // Each parsing function changes the state and returns true on
    454 // success.  Otherwise, don't change the state and returns false.  To
    455 // ensure that the state isn't changed in the latter case, we save the
    456 // original state before we call more than one parsing functions
    457 // consecutively with &&, and restore the state if unsuccessful.  See
    458 // ParseEncoding() as an example of this convention.  We follow the
    459 // convention throughout the code.
    460 //
    461 // Originally we tried to do demangling without following the full ABI
    462 // syntax but it turned out we needed to follow the full syntax to
    463 // parse complicated cases like nested template arguments.  Note that
    464 // implementing a full-fledged demangler isn't trivial (libiberty's
    465 // cp-demangle.c has +4300 lines).
    466 //
    467 // Note that (foo) in <(foo) ...> is a modifier to be ignored.
    468 //
    469 // Reference:
    470 // - Itanium C++ ABI
    471 //   <http://www.codesourcery.com/cxx-abi/abi.html#mangling>
    472 
    473 // <mangled-name> ::= _Z <encoding>
    474 static bool ParseMangledName(State *state) {
    475   return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
    476 }
    477 
    478 // <encoding> ::= <(function) name> <bare-function-type>
    479 //            ::= <(data) name>
    480 //            ::= <special-name>
    481 static bool ParseEncoding(State *state) {
    482   State copy = *state;
    483   if (ParseName(state) && ParseBareFunctionType(state)) {
    484     return true;
    485   }
    486   *state = copy;
    487 
    488   if (ParseName(state) || ParseSpecialName(state)) {
    489     return true;
    490   }
    491   return false;
    492 }
    493 
    494 // <name> ::= <nested-name>
    495 //        ::= <unscoped-template-name> <template-args>
    496 //        ::= <unscoped-name>
    497 //        ::= <local-name>
    498 static bool ParseName(State *state) {
    499   if (ParseNestedName(state) || ParseLocalName(state)) {
    500     return true;
    501   }
    502 
    503   State copy = *state;
    504   if (ParseUnscopedTemplateName(state) &&
    505       ParseTemplateArgs(state)) {
    506     return true;
    507   }
    508   *state = copy;
    509 
    510   // Less greedy than <unscoped-template-name> <template-args>.
    511   if (ParseUnscopedName(state)) {
    512     return true;
    513   }
    514   return false;
    515 }
    516 
    517 // <unscoped-name> ::= <unqualified-name>
    518 //                 ::= St <unqualified-name>
    519 static bool ParseUnscopedName(State *state) {
    520   if (ParseUnqualifiedName(state)) {
    521     return true;
    522   }
    523 
    524   State copy = *state;
    525   if (ParseTwoCharToken(state, "St") &&
    526       MaybeAppend(state, "std::") &&
    527       ParseUnqualifiedName(state)) {
    528     return true;
    529   }
    530   *state = copy;
    531   return false;
    532 }
    533 
    534 // <unscoped-template-name> ::= <unscoped-name>
    535 //                          ::= <substitution>
    536 static bool ParseUnscopedTemplateName(State *state) {
    537   return ParseUnscopedName(state) || ParseSubstitution(state);
    538 }
    539 
    540 // <nested-name> ::= N [<CV-qualifiers>] <prefix> <unqualified-name> E
    541 //               ::= N [<CV-qualifiers>] <template-prefix> <template-args> E
    542 static bool ParseNestedName(State *state) {
    543   State copy = *state;
    544   if (ParseOneCharToken(state, 'N') &&
    545       EnterNestedName(state) &&
    546       Optional(ParseCVQualifiers(state)) &&
    547       ParsePrefix(state) &&
    548       LeaveNestedName(state, copy.nest_level) &&
    549       ParseOneCharToken(state, 'E')) {
    550     return true;
    551   }
    552   *state = copy;
    553   return false;
    554 }
    555 
    556 // This part is tricky.  If we literally translate them to code, we'll
    557 // end up infinite loop.  Hence we merge them to avoid the case.
    558 //
    559 // <prefix> ::= <prefix> <unqualified-name>
    560 //          ::= <template-prefix> <template-args>
    561 //          ::= <template-param>
    562 //          ::= <substitution>
    563 //          ::= # empty
    564 // <template-prefix> ::= <prefix> <(template) unqualified-name>
    565 //                   ::= <template-param>
    566 //                   ::= <substitution>
    567 static bool ParsePrefix(State *state) {
    568   bool has_something = false;
    569   while (true) {
    570     MaybeAppendSeparator(state);
    571     if (ParseTemplateParam(state) ||
    572         ParseSubstitution(state) ||
    573         ParseUnscopedName(state)) {
    574       has_something = true;
    575       MaybeIncreaseNestLevel(state);
    576       continue;
    577     }
    578     MaybeCancelLastSeparator(state);
    579     if (has_something && ParseTemplateArgs(state)) {
    580       return ParsePrefix(state);
    581     } else {
    582       break;
    583     }
    584   }
    585   return true;
    586 }
    587 
    588 // <unqualified-name> ::= <operator-name>
    589 //                    ::= <ctor-dtor-name>
    590 //                    ::= <source-name>
    591 //                    ::= <local-source-name>
    592 static bool ParseUnqualifiedName(State *state) {
    593   return (ParseOperatorName(state) ||
    594           ParseCtorDtorName(state) ||
    595           ParseSourceName(state) ||
    596           ParseLocalSourceName(state));
    597 }
    598 
    599 // <source-name> ::= <positive length number> <identifier>
    600 static bool ParseSourceName(State *state) {
    601   State copy = *state;
    602   int length = -1;
    603   if (ParseNumber(state, &length) && ParseIdentifier(state, length)) {
    604     return true;
    605   }
    606   *state = copy;
    607   return false;
    608 }
    609 
    610 // <local-source-name> ::= L <source-name> [<discriminator>]
    611 //
    612 // References:
    613 //   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
    614 //   http://gcc.gnu.org/viewcvs?view=rev&revision=124467
    615 static bool ParseLocalSourceName(State *state) {
    616   State copy = *state;
    617   if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
    618       Optional(ParseDiscriminator(state))) {
    619     return true;
    620   }
    621   *state = copy;
    622   return false;
    623 }
    624 
    625 // <number> ::= [n] <non-negative decimal integer>
    626 // If "number_out" is non-null, then *number_out is set to the value of the
    627 // parsed number on success.
    628 static bool ParseNumber(State *state, int *number_out) {
    629   int sign = 1;
    630   if (ParseOneCharToken(state, 'n')) {
    631     sign = -1;
    632   }
    633   const char *p = state->mangled_cur;
    634   int number = 0;
    635   for (;*p != '\0'; ++p) {
    636     if (IsDigit(*p)) {
    637       number = number * 10 + (*p - '0');
    638     } else {
    639       break;
    640     }
    641   }
    642   if (p != state->mangled_cur) {  // Conversion succeeded.
    643     state->mangled_cur = p;
    644     if (number_out != NULL) {
    645       *number_out = number * sign;
    646     }
    647     return true;
    648   }
    649   return false;
    650 }
    651 
    652 // Floating-point literals are encoded using a fixed-length lowercase
    653 // hexadecimal string.
    654 static bool ParseFloatNumber(State *state) {
    655   const char *p = state->mangled_cur;
    656   for (;*p != '\0'; ++p) {
    657     if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) {
    658       break;
    659     }
    660   }
    661   if (p != state->mangled_cur) {  // Conversion succeeded.
    662     state->mangled_cur = p;
    663     return true;
    664   }
    665   return false;
    666 }
    667 
    668 // The <seq-id> is a sequence number in base 36,
    669 // using digits and upper case letters
    670 static bool ParseSeqId(State *state) {
    671   const char *p = state->mangled_cur;
    672   for (;*p != '\0'; ++p) {
    673     if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) {
    674       break;
    675     }
    676   }
    677   if (p != state->mangled_cur) {  // Conversion succeeded.
    678     state->mangled_cur = p;
    679     return true;
    680   }
    681   return false;
    682 }
    683 
    684 // <identifier> ::= <unqualified source code identifier> (of given length)
    685 static bool ParseIdentifier(State *state, int length) {
    686   if (length == -1 ||
    687       !AtLeastNumCharsRemaining(state->mangled_cur, length)) {
    688     return false;
    689   }
    690   if (IdentifierIsAnonymousNamespace(state, length)) {
    691     MaybeAppend(state, "(anonymous namespace)");
    692   } else {
    693     MaybeAppendWithLength(state, state->mangled_cur, length);
    694   }
    695   state->mangled_cur += length;
    696   return true;
    697 }
    698 
    699 // <operator-name> ::= nw, and other two letters cases
    700 //                 ::= cv <type>  # (cast)
    701 //                 ::= v  <digit> <source-name> # vendor extended operator
    702 static bool ParseOperatorName(State *state) {
    703   if (!AtLeastNumCharsRemaining(state->mangled_cur, 2)) {
    704     return false;
    705   }
    706   // First check with "cv" (cast) case.
    707   State copy = *state;
    708   if (ParseTwoCharToken(state, "cv") &&
    709       MaybeAppend(state, "operator ") &&
    710       EnterNestedName(state) &&
    711       ParseType(state) &&
    712       LeaveNestedName(state, copy.nest_level)) {
    713     return true;
    714   }
    715   *state = copy;
    716 
    717   // Then vendor extended operators.
    718   if (ParseOneCharToken(state, 'v') && ParseCharClass(state, "0123456789") &&
    719       ParseSourceName(state)) {
    720     return true;
    721   }
    722   *state = copy;
    723 
    724   // Other operator names should start with a lower alphabet followed
    725   // by a lower/upper alphabet.
    726   if (!(IsLower(state->mangled_cur[0]) &&
    727         IsAlpha(state->mangled_cur[1]))) {
    728     return false;
    729   }
    730   // We may want to perform a binary search if we really need speed.
    731   const AbbrevPair *p;
    732   for (p = kOperatorList; p->abbrev != NULL; ++p) {
    733     if (state->mangled_cur[0] == p->abbrev[0] &&
    734         state->mangled_cur[1] == p->abbrev[1]) {
    735       MaybeAppend(state, "operator");
    736       if (IsLower(*p->real_name)) {  // new, delete, etc.
    737         MaybeAppend(state, " ");
    738       }
    739       MaybeAppend(state, p->real_name);
    740       state->mangled_cur += 2;
    741       return true;
    742     }
    743   }
    744   return false;
    745 }
    746 
    747 // <special-name> ::= TV <type>
    748 //                ::= TT <type>
    749 //                ::= TI <type>
    750 //                ::= TS <type>
    751 //                ::= Tc <call-offset> <call-offset> <(base) encoding>
    752 //                ::= GV <(object) name>
    753 //                ::= T <call-offset> <(base) encoding>
    754 // G++ extensions:
    755 //                ::= TC <type> <(offset) number> _ <(base) type>
    756 //                ::= TF <type>
    757 //                ::= TJ <type>
    758 //                ::= GR <name>
    759 //                ::= GA <encoding>
    760 //                ::= Th <call-offset> <(base) encoding>
    761 //                ::= Tv <call-offset> <(base) encoding>
    762 //
    763 // Note: we don't care much about them since they don't appear in
    764 // stack traces.  The are special data.
    765 static bool ParseSpecialName(State *state) {
    766   State copy = *state;
    767   if (ParseOneCharToken(state, 'T') &&
    768       ParseCharClass(state, "VTIS") &&
    769       ParseType(state)) {
    770     return true;
    771   }
    772   *state = copy;
    773 
    774   if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
    775       ParseCallOffset(state) && ParseEncoding(state)) {
    776     return true;
    777   }
    778   *state = copy;
    779 
    780   if (ParseTwoCharToken(state, "GV") &&
    781       ParseName(state)) {
    782     return true;
    783   }
    784   *state = copy;
    785 
    786   if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
    787       ParseEncoding(state)) {
    788     return true;
    789   }
    790   *state = copy;
    791 
    792   // G++ extensions
    793   if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
    794       ParseNumber(state, NULL) && ParseOneCharToken(state, '_') &&
    795       DisableAppend(state) &&
    796       ParseType(state)) {
    797     RestoreAppend(state, copy.append);
    798     return true;
    799   }
    800   *state = copy;
    801 
    802   if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
    803       ParseType(state)) {
    804     return true;
    805   }
    806   *state = copy;
    807 
    808   if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
    809     return true;
    810   }
    811   *state = copy;
    812 
    813   if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
    814     return true;
    815   }
    816   *state = copy;
    817 
    818   if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
    819       ParseCallOffset(state) && ParseEncoding(state)) {
    820     return true;
    821   }
    822   *state = copy;
    823   return false;
    824 }
    825 
    826 // <call-offset> ::= h <nv-offset> _
    827 //               ::= v <v-offset> _
    828 static bool ParseCallOffset(State *state) {
    829   State copy = *state;
    830   if (ParseOneCharToken(state, 'h') &&
    831       ParseNVOffset(state) && ParseOneCharToken(state, '_')) {
    832     return true;
    833   }
    834   *state = copy;
    835 
    836   if (ParseOneCharToken(state, 'v') &&
    837       ParseVOffset(state) && ParseOneCharToken(state, '_')) {
    838     return true;
    839   }
    840   *state = copy;
    841 
    842   return false;
    843 }
    844 
    845 // <nv-offset> ::= <(offset) number>
    846 static bool ParseNVOffset(State *state) {
    847   return ParseNumber(state, NULL);
    848 }
    849 
    850 // <v-offset>  ::= <(offset) number> _ <(virtual offset) number>
    851 static bool ParseVOffset(State *state) {
    852   State copy = *state;
    853   if (ParseNumber(state, NULL) && ParseOneCharToken(state, '_') &&
    854       ParseNumber(state, NULL)) {
    855     return true;
    856   }
    857   *state = copy;
    858   return false;
    859 }
    860 
    861 // <ctor-dtor-name> ::= C1 | C2 | C3
    862 //                  ::= D0 | D1 | D2
    863 static bool ParseCtorDtorName(State *state) {
    864   State copy = *state;
    865   if (ParseOneCharToken(state, 'C') &&
    866       ParseCharClass(state, "123")) {
    867     const char * const prev_name = state->prev_name;
    868     const int prev_name_length = state->prev_name_length;
    869     MaybeAppendWithLength(state, prev_name, prev_name_length);
    870     return true;
    871   }
    872   *state = copy;
    873 
    874   if (ParseOneCharToken(state, 'D') &&
    875       ParseCharClass(state, "012")) {
    876     const char * const prev_name = state->prev_name;
    877     const int prev_name_length = state->prev_name_length;
    878     MaybeAppend(state, "~");
    879     MaybeAppendWithLength(state, prev_name, prev_name_length);
    880     return true;
    881   }
    882   *state = copy;
    883   return false;
    884 }
    885 
    886 // <type> ::= <CV-qualifiers> <type>
    887 //        ::= P <type>   # pointer-to
    888 //        ::= R <type>   # reference-to
    889 //        ::= O <type>   # rvalue reference-to (C++0x)
    890 //        ::= C <type>   # complex pair (C 2000)
    891 //        ::= G <type>   # imaginary (C 2000)
    892 //        ::= U <source-name> <type>  # vendor extended type qualifier
    893 //        ::= <builtin-type>
    894 //        ::= <function-type>
    895 //        ::= <class-enum-type>
    896 //        ::= <array-type>
    897 //        ::= <pointer-to-member-type>
    898 //        ::= <template-template-param> <template-args>
    899 //        ::= <template-param>
    900 //        ::= <substitution>
    901 //        ::= Dp <type>          # pack expansion of (C++0x)
    902 //        ::= Dt <expression> E  # decltype of an id-expression or class
    903 //                               # member access (C++0x)
    904 //        ::= DT <expression> E  # decltype of an expression (C++0x)
    905 //
    906 static bool ParseType(State *state) {
    907   // We should check CV-qualifers, and PRGC things first.
    908   State copy = *state;
    909   if (ParseCVQualifiers(state) && ParseType(state)) {
    910     return true;
    911   }
    912   *state = copy;
    913 
    914   if (ParseCharClass(state, "OPRCG") && ParseType(state)) {
    915     return true;
    916   }
    917   *state = copy;
    918 
    919   if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
    920     return true;
    921   }
    922   *state = copy;
    923 
    924   if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
    925       ParseExpression(state) && ParseOneCharToken(state, 'E')) {
    926     return true;
    927   }
    928   *state = copy;
    929 
    930   if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
    931       ParseType(state)) {
    932     return true;
    933   }
    934   *state = copy;
    935 
    936   if (ParseBuiltinType(state) ||
    937       ParseFunctionType(state) ||
    938       ParseClassEnumType(state) ||
    939       ParseArrayType(state) ||
    940       ParsePointerToMemberType(state) ||
    941       ParseSubstitution(state)) {
    942     return true;
    943   }
    944 
    945   if (ParseTemplateTemplateParam(state) &&
    946       ParseTemplateArgs(state)) {
    947     return true;
    948   }
    949   *state = copy;
    950 
    951   // Less greedy than <template-template-param> <template-args>.
    952   if (ParseTemplateParam(state)) {
    953     return true;
    954   }
    955 
    956   return false;
    957 }
    958 
    959 // <CV-qualifiers> ::= [r] [V] [K]
    960 // We don't allow empty <CV-qualifiers> to avoid infinite loop in
    961 // ParseType().
    962 static bool ParseCVQualifiers(State *state) {
    963   int num_cv_qualifiers = 0;
    964   num_cv_qualifiers += ParseOneCharToken(state, 'r');
    965   num_cv_qualifiers += ParseOneCharToken(state, 'V');
    966   num_cv_qualifiers += ParseOneCharToken(state, 'K');
    967   return num_cv_qualifiers > 0;
    968 }
    969 
    970 // <builtin-type> ::= v, etc.
    971 //                ::= u <source-name>
    972 static bool ParseBuiltinType(State *state) {
    973   const AbbrevPair *p;
    974   for (p = kBuiltinTypeList; p->abbrev != NULL; ++p) {
    975     if (state->mangled_cur[0] == p->abbrev[0]) {
    976       MaybeAppend(state, p->real_name);
    977       ++state->mangled_cur;
    978       return true;
    979     }
    980   }
    981 
    982   State copy = *state;
    983   if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
    984     return true;
    985   }
    986   *state = copy;
    987   return false;
    988 }
    989 
    990 // <function-type> ::= F [Y] <bare-function-type> E
    991 static bool ParseFunctionType(State *state) {
    992   State copy = *state;
    993   if (ParseOneCharToken(state, 'F') &&
    994       Optional(ParseOneCharToken(state, 'Y')) &&
    995       ParseBareFunctionType(state) && ParseOneCharToken(state, 'E')) {
    996     return true;
    997   }
    998   *state = copy;
    999   return false;
   1000 }
   1001 
   1002 // <bare-function-type> ::= <(signature) type>+
   1003 static bool ParseBareFunctionType(State *state) {
   1004   State copy = *state;
   1005   DisableAppend(state);
   1006   if (OneOrMore(ParseType, state)) {
   1007     RestoreAppend(state, copy.append);
   1008     MaybeAppend(state, "()");
   1009     return true;
   1010   }
   1011   *state = copy;
   1012   return false;
   1013 }
   1014 
   1015 // <class-enum-type> ::= <name>
   1016 static bool ParseClassEnumType(State *state) {
   1017   return ParseName(state);
   1018 }
   1019 
   1020 // <array-type> ::= A <(positive dimension) number> _ <(element) type>
   1021 //              ::= A [<(dimension) expression>] _ <(element) type>
   1022 static bool ParseArrayType(State *state) {
   1023   State copy = *state;
   1024   if (ParseOneCharToken(state, 'A') && ParseNumber(state, NULL) &&
   1025       ParseOneCharToken(state, '_') && ParseType(state)) {
   1026     return true;
   1027   }
   1028   *state = copy;
   1029 
   1030   if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
   1031       ParseOneCharToken(state, '_') && ParseType(state)) {
   1032     return true;
   1033   }
   1034   *state = copy;
   1035   return false;
   1036 }
   1037 
   1038 // <pointer-to-member-type> ::= M <(class) type> <(member) type>
   1039 static bool ParsePointerToMemberType(State *state) {
   1040   State copy = *state;
   1041   if (ParseOneCharToken(state, 'M') && ParseType(state) &&
   1042       ParseType(state)) {
   1043     return true;
   1044   }
   1045   *state = copy;
   1046   return false;
   1047 }
   1048 
   1049 // <template-param> ::= T_
   1050 //                  ::= T <parameter-2 non-negative number> _
   1051 static bool ParseTemplateParam(State *state) {
   1052   if (ParseTwoCharToken(state, "T_")) {
   1053     MaybeAppend(state, "?");  // We don't support template substitutions.
   1054     return true;
   1055   }
   1056 
   1057   State copy = *state;
   1058   if (ParseOneCharToken(state, 'T') && ParseNumber(state, NULL) &&
   1059       ParseOneCharToken(state, '_')) {
   1060     MaybeAppend(state, "?");  // We don't support template substitutions.
   1061     return true;
   1062   }
   1063   *state = copy;
   1064   return false;
   1065 }
   1066 
   1067 
   1068 // <template-template-param> ::= <template-param>
   1069 //                           ::= <substitution>
   1070 static bool ParseTemplateTemplateParam(State *state) {
   1071   return (ParseTemplateParam(state) ||
   1072           ParseSubstitution(state));
   1073 }
   1074 
   1075 // <template-args> ::= I <template-arg>+ E
   1076 static bool ParseTemplateArgs(State *state) {
   1077   State copy = *state;
   1078   DisableAppend(state);
   1079   if (ParseOneCharToken(state, 'I') &&
   1080       OneOrMore(ParseTemplateArg, state) &&
   1081       ParseOneCharToken(state, 'E')) {
   1082     RestoreAppend(state, copy.append);
   1083     MaybeAppend(state, "<>");
   1084     return true;
   1085   }
   1086   *state = copy;
   1087   return false;
   1088 }
   1089 
   1090 // <template-arg>  ::= <type>
   1091 //                 ::= <expr-primary>
   1092 //                 ::= I <template-arg>* E        # argument pack
   1093 //                 ::= X <expression> E
   1094 static bool ParseTemplateArg(State *state) {
   1095   State copy = *state;
   1096   if (ParseOneCharToken(state, 'I') &&
   1097       ZeroOrMore(ParseTemplateArg, state) &&
   1098       ParseOneCharToken(state, 'E')) {
   1099     return true;
   1100   }
   1101   *state = copy;
   1102 
   1103   if (ParseType(state) ||
   1104       ParseExprPrimary(state)) {
   1105     return true;
   1106   }
   1107   *state = copy;
   1108 
   1109   if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
   1110       ParseOneCharToken(state, 'E')) {
   1111     return true;
   1112   }
   1113   *state = copy;
   1114   return false;
   1115 }
   1116 
   1117 // <expression> ::= <template-param>
   1118 //              ::= <expr-primary>
   1119 //              ::= <unary operator-name> <expression>
   1120 //              ::= <binary operator-name> <expression> <expression>
   1121 //              ::= <trinary operator-name> <expression> <expression>
   1122 //                  <expression>
   1123 //              ::= st <type>
   1124 //              ::= sr <type> <unqualified-name> <template-args>
   1125 //              ::= sr <type> <unqualified-name>
   1126 static bool ParseExpression(State *state) {
   1127   if (ParseTemplateParam(state) || ParseExprPrimary(state)) {
   1128     return true;
   1129   }
   1130 
   1131   State copy = *state;
   1132   if (ParseOperatorName(state) &&
   1133       ParseExpression(state) &&
   1134       ParseExpression(state) &&
   1135       ParseExpression(state)) {
   1136     return true;
   1137   }
   1138   *state = copy;
   1139 
   1140   if (ParseOperatorName(state) &&
   1141       ParseExpression(state) &&
   1142       ParseExpression(state)) {
   1143     return true;
   1144   }
   1145   *state = copy;
   1146 
   1147   if (ParseOperatorName(state) &&
   1148       ParseExpression(state)) {
   1149     return true;
   1150   }
   1151   *state = copy;
   1152 
   1153   if (ParseTwoCharToken(state, "st") && ParseType(state)) {
   1154     return true;
   1155   }
   1156   *state = copy;
   1157 
   1158   if (ParseTwoCharToken(state, "sr") && ParseType(state) &&
   1159       ParseUnqualifiedName(state) &&
   1160       ParseTemplateArgs(state)) {
   1161     return true;
   1162   }
   1163   *state = copy;
   1164 
   1165   if (ParseTwoCharToken(state, "sr") && ParseType(state) &&
   1166       ParseUnqualifiedName(state)) {
   1167     return true;
   1168   }
   1169   *state = copy;
   1170   return false;
   1171 }
   1172 
   1173 // <expr-primary> ::= L <type> <(value) number> E
   1174 //                ::= L <type> <(value) float> E
   1175 //                ::= L <mangled-name> E
   1176 //                // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
   1177 //                ::= LZ <encoding> E
   1178 static bool ParseExprPrimary(State *state) {
   1179   State copy = *state;
   1180   if (ParseOneCharToken(state, 'L') && ParseType(state) &&
   1181       ParseNumber(state, NULL) &&
   1182       ParseOneCharToken(state, 'E')) {
   1183     return true;
   1184   }
   1185   *state = copy;
   1186 
   1187   if (ParseOneCharToken(state, 'L') && ParseType(state) &&
   1188       ParseFloatNumber(state) &&
   1189       ParseOneCharToken(state, 'E')) {
   1190     return true;
   1191   }
   1192   *state = copy;
   1193 
   1194   if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
   1195       ParseOneCharToken(state, 'E')) {
   1196     return true;
   1197   }
   1198   *state = copy;
   1199 
   1200   if (ParseTwoCharToken(state, "LZ") && ParseEncoding(state) &&
   1201       ParseOneCharToken(state, 'E')) {
   1202     return true;
   1203   }
   1204   *state = copy;
   1205 
   1206   return false;
   1207 }
   1208 
   1209 // <local-name> := Z <(function) encoding> E <(entity) name>
   1210 //                 [<discriminator>]
   1211 //              := Z <(function) encoding> E s [<discriminator>]
   1212 static bool ParseLocalName(State *state) {
   1213   State copy = *state;
   1214   if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
   1215       ParseOneCharToken(state, 'E') && MaybeAppend(state, "::") &&
   1216       ParseName(state) && Optional(ParseDiscriminator(state))) {
   1217     return true;
   1218   }
   1219   *state = copy;
   1220 
   1221   if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
   1222       ParseTwoCharToken(state, "Es") && Optional(ParseDiscriminator(state))) {
   1223     return true;
   1224   }
   1225   *state = copy;
   1226   return false;
   1227 }
   1228 
   1229 // <discriminator> := _ <(non-negative) number>
   1230 static bool ParseDiscriminator(State *state) {
   1231   State copy = *state;
   1232   if (ParseOneCharToken(state, '_') && ParseNumber(state, NULL)) {
   1233     return true;
   1234   }
   1235   *state = copy;
   1236   return false;
   1237 }
   1238 
   1239 // <substitution> ::= S_
   1240 //                ::= S <seq-id> _
   1241 //                ::= St, etc.
   1242 static bool ParseSubstitution(State *state) {
   1243   if (ParseTwoCharToken(state, "S_")) {
   1244     MaybeAppend(state, "?");  // We don't support substitutions.
   1245     return true;
   1246   }
   1247 
   1248   State copy = *state;
   1249   if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
   1250       ParseOneCharToken(state, '_')) {
   1251     MaybeAppend(state, "?");  // We don't support substitutions.
   1252     return true;
   1253   }
   1254   *state = copy;
   1255 
   1256   // Expand abbreviations like "St" => "std".
   1257   if (ParseOneCharToken(state, 'S')) {
   1258     const AbbrevPair *p;
   1259     for (p = kSubstitutionList; p->abbrev != NULL; ++p) {
   1260       if (state->mangled_cur[0] == p->abbrev[1]) {
   1261         MaybeAppend(state, "std");
   1262         if (p->real_name[0] != '\0') {
   1263           MaybeAppend(state, "::");
   1264           MaybeAppend(state, p->real_name);
   1265         }
   1266         ++state->mangled_cur;
   1267         return true;
   1268       }
   1269     }
   1270   }
   1271   *state = copy;
   1272   return false;
   1273 }
   1274 
   1275 // Parse <mangled-name>, optionally followed by either a function-clone suffix
   1276 // or version suffix.  Returns true only if all of "mangled_cur" was consumed.
   1277 static bool ParseTopLevelMangledName(State *state) {
   1278   if (ParseMangledName(state)) {
   1279     if (state->mangled_cur[0] != '\0') {
   1280       // Drop trailing function clone suffix, if any.
   1281       if (IsFunctionCloneSuffix(state->mangled_cur)) {
   1282         return true;
   1283       }
   1284       // Append trailing version suffix if any.
   1285       // ex. _Z3foo@@GLIBCXX_3.4
   1286       if (state->mangled_cur[0] == '@') {
   1287         MaybeAppend(state, state->mangled_cur);
   1288         return true;
   1289       }
   1290       return false;  // Unconsumed suffix.
   1291     }
   1292     return true;
   1293   }
   1294   return false;
   1295 }
   1296 
   1297 // The demangler entry point.
   1298 bool Demangle(const char *mangled, char *out, int out_size) {
   1299   State state;
   1300   InitState(&state, mangled, out, out_size);
   1301   return ParseTopLevelMangledName(&state) && !state.overflowed;
   1302 }
   1303 
   1304 _END_GOOGLE_NAMESPACE_
   1305