Home | History | Annotate | Download | only in bookmaker
      1 /*
      2  * Copyright 2018 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef textParser_DEFINED
      9 #define textParser_DEFINED
     10 
     11 #include <functional>
     12 
     13 #include "bookmaker.h"
     14 
     15 class BmhParser;
     16 class Definition;
     17 
     18 class TextParser : public NonAssignable {
     19     TextParser() {}  // only for ParserCommon, TextParserSave
     20     friend class ParserCommon;
     21     friend class TextParserSave;
     22 public:
     23     virtual ~TextParser() {}
     24 
     25     TextParser(string fileName, const char* start, const char* end, int lineCount)
     26         : fFileName(fileName)
     27         , fStart(start)
     28         , fLine(start)
     29         , fChar(start)
     30         , fEnd(end)
     31         , fLineCount(lineCount)
     32     {
     33     }
     34 
     35     TextParser(const Definition* );
     36 
     37     const char* anyOf(const char* str) const {
     38         const char* ptr = fChar;
     39         while (ptr < fEnd) {
     40             if (strchr(str, ptr[0])) {
     41                 return ptr;
     42             }
     43             ++ptr;
     44         }
     45         return nullptr;
     46     }
     47 
     48     const char* anyOf(const char* wordStart, const char* wordList[], size_t wordListCount) const {
     49         const char** wordPtr = wordList;
     50         const char** wordEnd = wordPtr + wordListCount;
     51         const size_t matchLen = fChar - wordStart;
     52         while (wordPtr < wordEnd) {
     53             const char* word = *wordPtr++;
     54             if (strlen(word) == matchLen && !strncmp(wordStart, word, matchLen)) {
     55                 return word;
     56             }
     57         }
     58         return nullptr;
     59     }
     60 
     61     // words must be alpha only
     62     string anyWord(const vector<string>& wordList, int spaces) const {
     63         const char* matchStart = fChar;
     64         do {
     65             int count = spaces;
     66             while (matchStart < fEnd && !isalpha(matchStart[0])) {
     67                 ++matchStart;
     68             }
     69             const char* matchEnd = matchStart;
     70             const char* nextWord = nullptr;
     71             while (matchEnd < fEnd) {
     72                 if (isalpha(matchEnd[0])) {
     73                 } else if (' ' == matchEnd[0] && --count >= 0) {
     74                     if (!nextWord) {
     75                         nextWord = matchEnd;
     76                     }
     77                 } else {
     78                     break;
     79                 }
     80                 ++matchEnd;
     81             }
     82             size_t matchLen = matchEnd - matchStart;
     83             for (auto word : wordList) {
     84                 if (word.length() != matchLen) {
     85                     continue;
     86                 }
     87                 for (unsigned index = 0; index < matchLen; ++index) {
     88                     if (tolower(matchStart[index]) != word[index]) {
     89                         goto nextWord;
     90                     }
     91                 }
     92                 return word;
     93         nextWord: ;
     94             }
     95             matchStart = nextWord ? nextWord : matchEnd;
     96         } while (matchStart < fEnd);
     97         return "";
     98     }
     99 
    100     bool back(const char* pattern) {
    101         size_t len = strlen(pattern);
    102         const char* start = fChar - len;
    103         if (start <= fStart) {
    104             return false;
    105         }
    106         if (strncmp(start, pattern, len)) {
    107             return false;
    108         }
    109         fChar = start;
    110         return true;
    111     }
    112 
    113     char backup(const char* pattern) const {
    114         size_t len = strlen(pattern);
    115         const char* start = fChar - len;
    116         if (start <= fStart) {
    117             return '\0';
    118         }
    119         if (strncmp(start, pattern, len)) {
    120             return '\0';
    121         }
    122         return start[-1];
    123     }
    124 
    125     void backupWord() {
    126         while (fChar > fStart && isalpha(fChar[-1])) {
    127             --fChar;
    128         }
    129     }
    130 
    131     bool contains(const char* match, const char* lineEnd, const char** loc) const {
    132         const char* result = this->strnstr(match, lineEnd);
    133         if (loc) {
    134             *loc = result;
    135         }
    136         return result;
    137     }
    138 
    139     bool containsWord(const char* match, const char* lineEnd, const char** loc) {
    140         size_t len = strlen(match);
    141         do {
    142             const char* result = this->strnstr(match, lineEnd);
    143             if (!result) {
    144                 return false;
    145             }
    146             if ((result > fStart && isalnum(result[-1])) || (result + len < fEnd
    147                     && isalnum(result[len]))) {
    148                 fChar = result + len;
    149                 continue;
    150             }
    151             if (loc) {
    152                 *loc = result;
    153             }
    154             return true;
    155         } while (true);
    156     }
    157 
    158     // either /n/n or /n# will stop parsing a typedef
    159     const char* doubleLF() const {
    160         const char* ptr = fChar - 1;
    161         const char* doubleStart = nullptr;
    162         while (++ptr < fEnd) {
    163             if (!doubleStart) {
    164                 if ('\n' == ptr[0]) {
    165                     doubleStart = ptr;
    166                 }
    167                 continue;
    168             }
    169             if ('\n' == ptr[0] || '#' == ptr[0]) {
    170                 return doubleStart;
    171             }
    172             if (' ' < ptr[0]) {
    173                 doubleStart = nullptr;
    174             }
    175         }
    176         return nullptr;
    177     }
    178 
    179     bool endsWith(const char* match) {
    180         int matchLen = strlen(match);
    181         if (matchLen > fChar - fLine) {
    182             return false;
    183         }
    184         return !strncmp(fChar - matchLen, match, matchLen);
    185     }
    186 
    187     bool eof() const { return fChar >= fEnd; }
    188 
    189     const char* lineEnd() const {
    190         const char* ptr = fChar;
    191         do {
    192             if (ptr >= fEnd) {
    193                 return ptr;
    194             }
    195             char test = *ptr++;
    196             if (test == '\n' || test == '\0') {
    197                 break;
    198             }
    199         } while (true);
    200         return ptr;
    201     }
    202 
    203     ptrdiff_t lineLength() const {
    204         return this->lineEnd() - fLine;
    205     }
    206 
    207     bool match(TextParser* );
    208 
    209     char next() {
    210         SkASSERT(fChar < fEnd);
    211         char result = *fChar++;
    212         if ('\n' == result) {
    213             ++fLineCount;
    214             fLine = fChar;
    215         }
    216         return result;
    217     }
    218 
    219     char peek() const { SkASSERT(fChar < fEnd); return *fChar; }
    220 
    221     void restorePlace(const TextParser& save) {
    222         fChar = save.fChar;
    223         fLine = save.fLine;
    224         fLineCount = save.fLineCount;
    225     }
    226 
    227     void savePlace(TextParser* save) {
    228         save->fChar = fChar;
    229         save->fLine = fLine;
    230         save->fLineCount = fLineCount;
    231     }
    232 
    233     void reportError(const char* errorStr) const;
    234     static string ReportFilename(string file);
    235     void reportWarning(const char* errorStr) const;
    236 
    237     template <typename T> T reportError(const char* errorStr) const {
    238         this->reportError(errorStr);
    239         return T();
    240     }
    241 
    242     bool sentenceEnd(const char* check) const {
    243         while (check > fStart) {
    244             --check;
    245             if (' ' < check[0] && '.' != check[0]) {
    246                 return false;
    247             }
    248             if ('.' == check[0]) {
    249                 return ' ' >= check[1];
    250             }
    251             if ('\n' == check[0] && '\n' == check[1]) {
    252                 return true;
    253             }
    254         }
    255         return true;
    256     }
    257 
    258     void setForErrorReporting(const Definition* , const char* );
    259 
    260     bool skipToBalancedEndBracket(char startB, char endB) {
    261         SkASSERT(fChar < fEnd);
    262         SkASSERT(startB == fChar[0]);
    263         int startCount = 0;
    264         do {
    265             char test = this->next();
    266             startCount += startB == test;
    267             startCount -= endB  == test;
    268         } while (startCount && fChar < fEnd);
    269         return !startCount;
    270     }
    271 
    272     bool skipToEndBracket(char endBracket, const char* end = nullptr) {
    273         if (nullptr == end) {
    274             end = fEnd;
    275         }
    276         while (fChar[0] != endBracket) {
    277             if (fChar >= end) {
    278                 return false;
    279             }
    280             (void) this->next();
    281         }
    282         return true;
    283     }
    284 
    285     bool skipToEndBracket(const char* endBracket) {
    286         size_t len = strlen(endBracket);
    287         while (strncmp(fChar, endBracket, len)) {
    288             if (fChar >= fEnd) {
    289                 return false;
    290             }
    291             (void) this->next();
    292         }
    293         return true;
    294     }
    295 
    296     bool skipLine() {
    297         return skipToEndBracket('\n');
    298     }
    299 
    300     void skipTo(const char* skip) {
    301        while (fChar < skip) {
    302            this->next();
    303        }
    304     }
    305 
    306     void skipToAlpha() {
    307         while (fChar < fEnd && !isalpha(fChar[0])) {
    308             fChar++;
    309         }
    310     }
    311 
    312     // returns true if saw close brace
    313     bool skipToAlphaNum() {
    314         bool sawCloseBrace = false;
    315         while (fChar < fEnd && !isalnum(fChar[0])) {
    316             sawCloseBrace |= '}' == *fChar++;
    317         }
    318         return sawCloseBrace;
    319     }
    320 
    321     bool skipExact(const char* pattern) {
    322         if (!this->startsWith(pattern)) {
    323             return false;
    324         }
    325         this->skipName(pattern);
    326         return true;
    327     }
    328 
    329     // differs from skipToNonAlphaNum in that a.b isn't considered a full name,
    330     // since a.b can't be found as a named definition
    331     void skipFullName() {
    332         do {
    333             char last = '\0';
    334             while (fChar < fEnd && (isalnum(fChar[0])
    335                     || '_' == fChar[0]  /* || '-' == fChar[0] */
    336                     || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]))) {
    337                 if (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) {
    338                     fChar++;
    339                 }
    340                 last = fChar[0];
    341                 fChar++;
    342             }
    343             if (fChar + 1 >= fEnd || '/' != fChar[0] || !isalpha(last) || !isalpha(fChar[1])) {
    344                 break;  // stop unless pattern is xxx/xxx as in I/O
    345             }
    346             fChar++; // skip slash
    347         } while (true);
    348     }
    349 
    350     int skipToLineBalance(char open, char close) {
    351         int match = 0;
    352         while (!this->eof() && '\n' != fChar[0]) {
    353             match += open == this->peek();
    354             match -= close == this->next();
    355         }
    356         return match;
    357     }
    358 
    359     bool skipToLineStart() {
    360         if (!this->skipLine()) {
    361             return false;
    362         }
    363         if (!this->eof()) {
    364             return this->skipWhiteSpace();
    365         }
    366         return true;
    367     }
    368 
    369     void skipToLineStart(int* indent, bool* sawReturn) {
    370         SkAssertResult(this->skipLine());
    371         this->skipWhiteSpace(indent, sawReturn);
    372     }
    373 
    374     void skipLower() {
    375         while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) {
    376             fChar++;
    377         }
    378     }
    379 
    380     void skipToNonAlphaNum() {
    381         while (fChar < fEnd && (isalnum(fChar[0]) || '_' == fChar[0])) {
    382             fChar++;
    383         }
    384     }
    385 
    386     void skipToNonName() {
    387         while (fChar < fEnd && (isalnum(fChar[0])
    388                 || '_' == fChar[0] || '-' == fChar[0]
    389                 || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1])
    390                 || ('.' == fChar[0] && fChar + 1 < fEnd && isalpha(fChar[1])))) {
    391             if (':' == fChar[0] && fChar +1 < fEnd && ':' == fChar[1]) {
    392                 fChar++;
    393             }
    394             fChar++;
    395         }
    396     }
    397 
    398     void skipPhraseName() {
    399         while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) {
    400             fChar++;
    401         }
    402     }
    403 
    404     void skipToSpace() {
    405         while (fChar < fEnd && ' ' != fChar[0]) {
    406             fChar++;
    407         }
    408     }
    409 
    410     void skipToWhiteSpace() {
    411         while (fChar < fEnd && ' ' < fChar[0]) {
    412             fChar++;
    413         }
    414     }
    415 
    416     bool skipName(const char* word) {
    417         size_t len = strlen(word);
    418         if (len <= (size_t) (fEnd - fChar) && !strncmp(word, fChar, len)) {
    419             for (size_t i = 0; i < len; ++i) {
    420                 this->next();
    421             }
    422         }
    423         return this->eof() || ' ' >= fChar[0];
    424     }
    425 
    426     bool skipSpace() {
    427         while (' ' == this->peek()) {
    428             (void) this->next();
    429             if (fChar >= fEnd) {
    430                 return false;
    431             }
    432         }
    433         return true;
    434     }
    435 
    436     bool skipWord(const char* word) {
    437         if (!this->skipWhiteSpace()) {
    438             return false;
    439         }
    440         const char* save = fChar;
    441         if (!this->skipName(word)) {
    442             fChar = save;
    443             return false;
    444         }
    445         if (!this->skipWhiteSpace()) {
    446             return false;
    447         }
    448         return true;
    449     }
    450 
    451     bool skipWhiteSpace() {
    452         while (' ' >= this->peek()) {
    453             (void) this->next();
    454             if (fChar >= fEnd) {
    455                 return false;
    456             }
    457         }
    458         return true;
    459     }
    460 
    461     bool skipWhiteSpace(int* indent, bool* skippedReturn) {
    462         while (' ' >= this->peek()) {
    463             *indent = *skippedReturn ? *indent + 1 : 1;
    464             if ('\n' == this->peek()) {
    465                 *skippedReturn |= true;
    466                 *indent = 0;
    467             }
    468             (void) this->next();
    469             if (fChar >= fEnd) {
    470                 return false;
    471             }
    472         }
    473         return true;
    474     }
    475 
    476     bool startsWith(const char* str) const {
    477         size_t len = strlen(str);
    478         ptrdiff_t lineLen = fEnd - fChar;
    479         return len <= (size_t) lineLen && 0 == strncmp(str, fChar, len);
    480     }
    481 
    482     // ignores minor white space differences
    483     bool startsWith(const char* str, size_t oLen) const {
    484         size_t tIndex = 0;
    485         size_t tLen = fEnd - fChar;
    486         size_t oIndex = 0;
    487         while (oIndex < oLen && tIndex < tLen) {
    488             bool tSpace = ' ' >= fChar[tIndex];
    489             bool oSpace = ' ' >= str[oIndex];
    490             if (tSpace != oSpace) {
    491                 break;
    492             }
    493             if (tSpace) {
    494                 do {
    495                     ++tIndex;
    496                 } while (tIndex < tLen && ' ' >= fChar[tIndex]);
    497                 do {
    498                     ++oIndex;
    499                 } while (oIndex < oLen && ' ' >= str[oIndex]);
    500                 continue;
    501             }
    502             if (fChar[tIndex] != str[oIndex]) {
    503                 break;
    504             }
    505             ++tIndex;
    506             ++oIndex;
    507         }
    508         return oIndex >= oLen;
    509     }
    510 
    511     const char* strnchr(char ch, const char* end) const {
    512         const char* ptr = fChar;
    513         while (ptr < end) {
    514             if (ptr[0] == ch) {
    515                 return ptr;
    516             }
    517             ++ptr;
    518         }
    519         return nullptr;
    520     }
    521 
    522     const char* strnstr(const char *match, const char* end) const {
    523         size_t matchLen = strlen(match);
    524         SkASSERT(matchLen > 0);
    525         ptrdiff_t len = end - fChar;
    526         SkASSERT(len >= 0);
    527         if ((size_t) len < matchLen ) {
    528             return nullptr;
    529         }
    530         size_t count = len - matchLen;
    531         for (size_t index = 0; index <= count; index++) {
    532             if (0 == strncmp(&fChar[index], match, matchLen)) {
    533                 return &fChar[index];
    534             }
    535         }
    536         return nullptr;
    537     }
    538 
    539     const char* trimmedBracketEnd(const char bracket) const {
    540         int max = (int) (this->lineLength());
    541         int index = 0;
    542         while (index < max && bracket != fChar[index]) {
    543             ++index;
    544         }
    545         SkASSERT(index < max);
    546         while (index > 0 && ' ' >= fChar[index - 1]) {
    547             --index;
    548         }
    549         return fChar + index;
    550     }
    551 
    552     const char* trimmedBracketEnd(string bracket) const {
    553         size_t max = (size_t) (this->lineLength());
    554         string line(fChar, max);
    555         size_t index = line.find(bracket);
    556         SkASSERT(index < max);
    557         while (index > 0 && ' ' >= fChar[index - 1]) {
    558             --index;
    559         }
    560         return fChar + index;
    561     }
    562 
    563     const char* trimmedBracketNoEnd(const char bracket) const {
    564         int max = (int) (fEnd - fChar);
    565         int index = 0;
    566         while (index < max && bracket != fChar[index]) {
    567             ++index;
    568         }
    569         SkASSERT(index < max);
    570         while (index > 0 && ' ' >= fChar[index - 1]) {
    571             --index;
    572         }
    573         return fChar + index;
    574     }
    575 
    576     const char* trimmedLineEnd() const {
    577         const char* result = this->lineEnd();
    578         while (result > fChar && ' ' >= result[-1]) {
    579             --result;
    580         }
    581         return result;
    582     }
    583 
    584     void trimEnd() {
    585         while (fEnd > fStart && ' ' >= fEnd[-1]) {
    586             --fEnd;
    587         }
    588     }
    589 
    590     // FIXME: nothing else in TextParser knows from C++ --
    591     // there could be a class between TextParser and ParserCommon
    592     virtual string typedefName();
    593 
    594     const char* wordEnd() const {
    595         const char* end = fChar;
    596         while (isalnum(end[0]) || '_' == end[0] || '-' == end[0]) {
    597             ++end;
    598         }
    599         return end;
    600     }
    601 
    602     string fFileName;
    603     const char* fStart;
    604     const char* fLine;
    605     const char* fChar;
    606     const char* fEnd;
    607     size_t fLineCount;
    608 };
    609 
    610 class TextParserSave {
    611 public:
    612     TextParserSave(TextParser* parser) {
    613         fParser = parser;
    614         fSave.fFileName = parser->fFileName;
    615         fSave.fStart = parser->fStart;
    616         fSave.fLine = parser->fLine;
    617         fSave.fChar = parser->fChar;
    618         fSave.fEnd = parser->fEnd;
    619         fSave.fLineCount = parser->fLineCount;
    620     }
    621 
    622     void restore() const {
    623         fParser->fFileName = fSave.fFileName;
    624         fParser->fStart = fSave.fStart;
    625         fParser->fLine = fSave.fLine;
    626         fParser->fChar = fSave.fChar;
    627         fParser->fEnd = fSave.fEnd;
    628         fParser->fLineCount = fSave.fLineCount;
    629     }
    630 
    631 private:
    632     TextParser* fParser;
    633     TextParser fSave;
    634 };
    635 
    636 static inline bool has_nonwhitespace(string s) {
    637     bool nonwhite = false;
    638     for (const char& c : s) {
    639         if (' ' < c) {
    640             nonwhite = true;
    641             break;
    642         }
    643     }
    644     return nonwhite;
    645 }
    646 
    647 static inline void trim_end(string &s) {
    648     s.erase(std::find_if(s.rbegin(), s.rend(),
    649             std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
    650 }
    651 
    652 static inline void trim_end_spaces(string &s) {
    653     while (s.length() > 0 && ' ' == s.back()) {
    654         s.pop_back();
    655     }
    656 }
    657 
    658 static inline void trim_start(string &s) {
    659     s.erase(s.begin(), std::find_if(s.begin(), s.end(),
    660             std::not1(std::ptr_fun<int, int>(std::isspace))));
    661 }
    662 
    663 static inline void trim_start_end(string& s) {
    664     trim_start(s);
    665     trim_end(s);
    666 }
    667 
    668 static inline string trim_inline_spaces(string s) {
    669     bool lastSpace = false;
    670     string trimmed;
    671     for (const char* ptr = &s.front(); ptr <= &s.back(); ++ptr) {
    672         char c = *ptr;
    673         if (' ' >= c) {
    674             if (!lastSpace) {
    675                 trimmed += ' ';
    676             }
    677             lastSpace = true;
    678             continue;
    679         }
    680         lastSpace = false;
    681         trimmed += c;
    682     }
    683     return trimmed;
    684 }
    685 
    686 class EscapeParser : public TextParser {
    687 public:
    688     EscapeParser(const char* start, const char* end) :
    689             TextParser("", start, end, 0) {
    690         const char* reader = fStart;
    691         fStorage = new char[end - start];
    692         char* writer = fStorage;
    693         while (reader < fEnd) {
    694             char ch = *reader++;
    695             if (ch != '\\') {
    696                 *writer++ = ch;
    697             } else {
    698                 char ctrl = *reader++;
    699                 if (ctrl == 'u') {
    700                     unsigned unicode = 0;
    701                     for (int i = 0; i < 4; ++i) {
    702                         unicode <<= 4;
    703                         SkASSERT((reader[0] >= '0' && reader[0] <= '9') ||
    704                             (reader[0] >= 'A' && reader[0] <= 'F') ||
    705                             (reader[0] >= 'a' && reader[0] <= 'f'));
    706                         int nibble = *reader++ - '0';
    707                         if (nibble > 9) {
    708                             nibble = (nibble & ~('a' - 'A')) - 'A' + '9' + 1;
    709                         }
    710                         unicode |= nibble;
    711                     }
    712                     SkASSERT(unicode < 256);
    713                     *writer++ = (unsigned char) unicode;
    714                 } else {
    715                     SkASSERT(ctrl == 'n');
    716                     *writer++ = '\n';
    717                 }
    718             }
    719         }
    720         fStart = fLine = fChar = fStorage;
    721         fEnd = writer;
    722     }
    723 
    724     ~EscapeParser() override {
    725         delete fStorage;
    726     }
    727 private:
    728     char* fStorage;
    729 };
    730 
    731 // some methods cannot be trivially parsed; look for class-name / ~ / operator
    732 class MethodParser : public TextParser {
    733 public:
    734     MethodParser(string className, string fileName,
    735             const char* start, const char* end, int lineCount)
    736         : TextParser(fileName, start, end, lineCount)
    737         , fClassName(className) {
    738         size_t doubleColons = className.find_last_of("::");
    739         if (string::npos != doubleColons) {
    740             fLocalName = className.substr(doubleColons + 1);
    741             SkASSERT(fLocalName.length() > 0);
    742         }
    743     }
    744 
    745     ~MethodParser() override {}
    746 
    747     string localName() const {
    748         return fLocalName;
    749     }
    750 
    751     void setLocalName(string name) {
    752         if (name == fClassName) {
    753             fLocalName = "";
    754         } else {
    755             fLocalName = name;
    756         }
    757     }
    758 
    759     // returns true if close brace was skipped
    760     int skipToMethodStart() {
    761         if (!fClassName.length()) {
    762             return this->skipToAlphaNum();
    763         }
    764         int braceCount = 0;
    765         while (!this->eof() && !isalnum(this->peek()) && '~' != this->peek()) {
    766             braceCount += '{' == this->peek();
    767             braceCount -= '}' == this->peek();
    768             this->next();
    769         }
    770         return braceCount;
    771     }
    772 
    773     void skipToMethodEnd(Resolvable resolvable);
    774 
    775     bool wordEndsWith(const char* str) const {
    776         const char* space = this->strnchr(' ', fEnd);
    777         if (!space) {
    778             return false;
    779         }
    780         size_t len = strlen(str);
    781         if (space < fChar + len) {
    782             return false;
    783         }
    784         return !strncmp(str, space - len, len);
    785     }
    786 
    787 private:
    788     string fClassName;
    789     string fLocalName;
    790     typedef TextParser INHERITED;
    791 };
    792 
    793 #endif
    794