1 /* 2 * Copyright 2018 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef textParser_DEFINED 9 #define textParser_DEFINED 10 11 #include <functional> 12 13 #include "bookmaker.h" 14 15 class BmhParser; 16 class Definition; 17 18 class TextParser : public NonAssignable { 19 TextParser() {} // only for ParserCommon, TextParserSave 20 friend class ParserCommon; 21 friend class TextParserSave; 22 public: 23 virtual ~TextParser() {} 24 25 TextParser(string fileName, const char* start, const char* end, int lineCount) 26 : fFileName(fileName) 27 , fStart(start) 28 , fLine(start) 29 , fChar(start) 30 , fEnd(end) 31 , fLineCount(lineCount) 32 { 33 } 34 35 TextParser(const Definition* ); 36 37 const char* anyOf(const char* str) const { 38 const char* ptr = fChar; 39 while (ptr < fEnd) { 40 if (strchr(str, ptr[0])) { 41 return ptr; 42 } 43 ++ptr; 44 } 45 return nullptr; 46 } 47 48 const char* anyOf(const char* wordStart, const char* wordList[], size_t wordListCount) const { 49 const char** wordPtr = wordList; 50 const char** wordEnd = wordPtr + wordListCount; 51 const size_t matchLen = fChar - wordStart; 52 while (wordPtr < wordEnd) { 53 const char* word = *wordPtr++; 54 if (strlen(word) == matchLen && !strncmp(wordStart, word, matchLen)) { 55 return word; 56 } 57 } 58 return nullptr; 59 } 60 61 // words must be alpha only 62 string anyWord(const vector<string>& wordList, int spaces) const { 63 const char* matchStart = fChar; 64 do { 65 int count = spaces; 66 while (matchStart < fEnd && !isalpha(matchStart[0])) { 67 ++matchStart; 68 } 69 const char* matchEnd = matchStart; 70 const char* nextWord = nullptr; 71 while (matchEnd < fEnd) { 72 if (isalpha(matchEnd[0])) { 73 } else if (' ' == matchEnd[0] && --count >= 0) { 74 if (!nextWord) { 75 nextWord = matchEnd; 76 } 77 } else { 78 break; 79 } 80 ++matchEnd; 81 } 82 size_t matchLen = matchEnd - matchStart; 83 for (auto word : wordList) { 84 if (word.length() != matchLen) { 85 continue; 86 } 87 for (unsigned index = 0; index < matchLen; ++index) { 88 if (tolower(matchStart[index]) != word[index]) { 89 goto nextWord; 90 } 91 } 92 return word; 93 nextWord: ; 94 } 95 matchStart = nextWord ? nextWord : matchEnd; 96 } while (matchStart < fEnd); 97 return ""; 98 } 99 100 bool back(const char* pattern) { 101 size_t len = strlen(pattern); 102 const char* start = fChar - len; 103 if (start <= fStart) { 104 return false; 105 } 106 if (strncmp(start, pattern, len)) { 107 return false; 108 } 109 fChar = start; 110 return true; 111 } 112 113 char backup(const char* pattern) const { 114 size_t len = strlen(pattern); 115 const char* start = fChar - len; 116 if (start <= fStart) { 117 return '\0'; 118 } 119 if (strncmp(start, pattern, len)) { 120 return '\0'; 121 } 122 return start[-1]; 123 } 124 125 void backupWord() { 126 while (fChar > fStart && isalpha(fChar[-1])) { 127 --fChar; 128 } 129 } 130 131 bool contains(const char* match, const char* lineEnd, const char** loc) const { 132 const char* result = this->strnstr(match, lineEnd); 133 if (loc) { 134 *loc = result; 135 } 136 return result; 137 } 138 139 bool containsWord(const char* match, const char* lineEnd, const char** loc) { 140 size_t len = strlen(match); 141 do { 142 const char* result = this->strnstr(match, lineEnd); 143 if (!result) { 144 return false; 145 } 146 if ((result > fStart && isalnum(result[-1])) || (result + len < fEnd 147 && isalnum(result[len]))) { 148 fChar = result + len; 149 continue; 150 } 151 if (loc) { 152 *loc = result; 153 } 154 return true; 155 } while (true); 156 } 157 158 // either /n/n or /n# will stop parsing a typedef 159 const char* doubleLF() const { 160 const char* ptr = fChar - 1; 161 const char* doubleStart = nullptr; 162 while (++ptr < fEnd) { 163 if (!doubleStart) { 164 if ('\n' == ptr[0]) { 165 doubleStart = ptr; 166 } 167 continue; 168 } 169 if ('\n' == ptr[0] || '#' == ptr[0]) { 170 return doubleStart; 171 } 172 if (' ' < ptr[0]) { 173 doubleStart = nullptr; 174 } 175 } 176 return nullptr; 177 } 178 179 bool endsWith(const char* match) { 180 int matchLen = strlen(match); 181 if (matchLen > fChar - fLine) { 182 return false; 183 } 184 return !strncmp(fChar - matchLen, match, matchLen); 185 } 186 187 bool eof() const { return fChar >= fEnd; } 188 189 const char* lineEnd() const { 190 const char* ptr = fChar; 191 do { 192 if (ptr >= fEnd) { 193 return ptr; 194 } 195 char test = *ptr++; 196 if (test == '\n' || test == '\0') { 197 break; 198 } 199 } while (true); 200 return ptr; 201 } 202 203 ptrdiff_t lineLength() const { 204 return this->lineEnd() - fLine; 205 } 206 207 bool match(TextParser* ); 208 209 char next() { 210 SkASSERT(fChar < fEnd); 211 char result = *fChar++; 212 if ('\n' == result) { 213 ++fLineCount; 214 fLine = fChar; 215 } 216 return result; 217 } 218 219 char peek() const { SkASSERT(fChar < fEnd); return *fChar; } 220 221 void restorePlace(const TextParser& save) { 222 fChar = save.fChar; 223 fLine = save.fLine; 224 fLineCount = save.fLineCount; 225 } 226 227 void savePlace(TextParser* save) { 228 save->fChar = fChar; 229 save->fLine = fLine; 230 save->fLineCount = fLineCount; 231 } 232 233 void reportError(const char* errorStr) const; 234 static string ReportFilename(string file); 235 void reportWarning(const char* errorStr) const; 236 237 template <typename T> T reportError(const char* errorStr) const { 238 this->reportError(errorStr); 239 return T(); 240 } 241 242 bool sentenceEnd(const char* check) const { 243 while (check > fStart) { 244 --check; 245 if (' ' < check[0] && '.' != check[0]) { 246 return false; 247 } 248 if ('.' == check[0]) { 249 return ' ' >= check[1]; 250 } 251 if ('\n' == check[0] && '\n' == check[1]) { 252 return true; 253 } 254 } 255 return true; 256 } 257 258 void setForErrorReporting(const Definition* , const char* ); 259 260 bool skipToBalancedEndBracket(char startB, char endB) { 261 SkASSERT(fChar < fEnd); 262 SkASSERT(startB == fChar[0]); 263 int startCount = 0; 264 do { 265 char test = this->next(); 266 startCount += startB == test; 267 startCount -= endB == test; 268 } while (startCount && fChar < fEnd); 269 return !startCount; 270 } 271 272 bool skipToEndBracket(char endBracket, const char* end = nullptr) { 273 if (nullptr == end) { 274 end = fEnd; 275 } 276 while (fChar[0] != endBracket) { 277 if (fChar >= end) { 278 return false; 279 } 280 (void) this->next(); 281 } 282 return true; 283 } 284 285 bool skipToEndBracket(const char* endBracket) { 286 size_t len = strlen(endBracket); 287 while (strncmp(fChar, endBracket, len)) { 288 if (fChar >= fEnd) { 289 return false; 290 } 291 (void) this->next(); 292 } 293 return true; 294 } 295 296 bool skipLine() { 297 return skipToEndBracket('\n'); 298 } 299 300 void skipTo(const char* skip) { 301 while (fChar < skip) { 302 this->next(); 303 } 304 } 305 306 void skipToAlpha() { 307 while (fChar < fEnd && !isalpha(fChar[0])) { 308 fChar++; 309 } 310 } 311 312 // returns true if saw close brace 313 bool skipToAlphaNum() { 314 bool sawCloseBrace = false; 315 while (fChar < fEnd && !isalnum(fChar[0])) { 316 sawCloseBrace |= '}' == *fChar++; 317 } 318 return sawCloseBrace; 319 } 320 321 bool skipExact(const char* pattern) { 322 if (!this->startsWith(pattern)) { 323 return false; 324 } 325 this->skipName(pattern); 326 return true; 327 } 328 329 // differs from skipToNonAlphaNum in that a.b isn't considered a full name, 330 // since a.b can't be found as a named definition 331 void skipFullName() { 332 do { 333 char last = '\0'; 334 while (fChar < fEnd && (isalnum(fChar[0]) 335 || '_' == fChar[0] /* || '-' == fChar[0] */ 336 || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]))) { 337 if (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) { 338 fChar++; 339 } 340 last = fChar[0]; 341 fChar++; 342 } 343 if (fChar + 1 >= fEnd || '/' != fChar[0] || !isalpha(last) || !isalpha(fChar[1])) { 344 break; // stop unless pattern is xxx/xxx as in I/O 345 } 346 fChar++; // skip slash 347 } while (true); 348 } 349 350 int skipToLineBalance(char open, char close) { 351 int match = 0; 352 while (!this->eof() && '\n' != fChar[0]) { 353 match += open == this->peek(); 354 match -= close == this->next(); 355 } 356 return match; 357 } 358 359 bool skipToLineStart() { 360 if (!this->skipLine()) { 361 return false; 362 } 363 if (!this->eof()) { 364 return this->skipWhiteSpace(); 365 } 366 return true; 367 } 368 369 void skipToLineStart(int* indent, bool* sawReturn) { 370 SkAssertResult(this->skipLine()); 371 this->skipWhiteSpace(indent, sawReturn); 372 } 373 374 void skipLower() { 375 while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) { 376 fChar++; 377 } 378 } 379 380 void skipToNonAlphaNum() { 381 while (fChar < fEnd && (isalnum(fChar[0]) || '_' == fChar[0])) { 382 fChar++; 383 } 384 } 385 386 void skipToNonName() { 387 while (fChar < fEnd && (isalnum(fChar[0]) 388 || '_' == fChar[0] || '-' == fChar[0] 389 || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) 390 || ('.' == fChar[0] && fChar + 1 < fEnd && isalpha(fChar[1])))) { 391 if (':' == fChar[0] && fChar +1 < fEnd && ':' == fChar[1]) { 392 fChar++; 393 } 394 fChar++; 395 } 396 } 397 398 void skipPhraseName() { 399 while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) { 400 fChar++; 401 } 402 } 403 404 void skipToSpace() { 405 while (fChar < fEnd && ' ' != fChar[0]) { 406 fChar++; 407 } 408 } 409 410 void skipToWhiteSpace() { 411 while (fChar < fEnd && ' ' < fChar[0]) { 412 fChar++; 413 } 414 } 415 416 bool skipName(const char* word) { 417 size_t len = strlen(word); 418 if (len <= (size_t) (fEnd - fChar) && !strncmp(word, fChar, len)) { 419 for (size_t i = 0; i < len; ++i) { 420 this->next(); 421 } 422 } 423 return this->eof() || ' ' >= fChar[0]; 424 } 425 426 bool skipSpace() { 427 while (' ' == this->peek()) { 428 (void) this->next(); 429 if (fChar >= fEnd) { 430 return false; 431 } 432 } 433 return true; 434 } 435 436 bool skipWord(const char* word) { 437 if (!this->skipWhiteSpace()) { 438 return false; 439 } 440 const char* save = fChar; 441 if (!this->skipName(word)) { 442 fChar = save; 443 return false; 444 } 445 if (!this->skipWhiteSpace()) { 446 return false; 447 } 448 return true; 449 } 450 451 bool skipWhiteSpace() { 452 while (' ' >= this->peek()) { 453 (void) this->next(); 454 if (fChar >= fEnd) { 455 return false; 456 } 457 } 458 return true; 459 } 460 461 bool skipWhiteSpace(int* indent, bool* skippedReturn) { 462 while (' ' >= this->peek()) { 463 *indent = *skippedReturn ? *indent + 1 : 1; 464 if ('\n' == this->peek()) { 465 *skippedReturn |= true; 466 *indent = 0; 467 } 468 (void) this->next(); 469 if (fChar >= fEnd) { 470 return false; 471 } 472 } 473 return true; 474 } 475 476 bool startsWith(const char* str) const { 477 size_t len = strlen(str); 478 ptrdiff_t lineLen = fEnd - fChar; 479 return len <= (size_t) lineLen && 0 == strncmp(str, fChar, len); 480 } 481 482 // ignores minor white space differences 483 bool startsWith(const char* str, size_t oLen) const { 484 size_t tIndex = 0; 485 size_t tLen = fEnd - fChar; 486 size_t oIndex = 0; 487 while (oIndex < oLen && tIndex < tLen) { 488 bool tSpace = ' ' >= fChar[tIndex]; 489 bool oSpace = ' ' >= str[oIndex]; 490 if (tSpace != oSpace) { 491 break; 492 } 493 if (tSpace) { 494 do { 495 ++tIndex; 496 } while (tIndex < tLen && ' ' >= fChar[tIndex]); 497 do { 498 ++oIndex; 499 } while (oIndex < oLen && ' ' >= str[oIndex]); 500 continue; 501 } 502 if (fChar[tIndex] != str[oIndex]) { 503 break; 504 } 505 ++tIndex; 506 ++oIndex; 507 } 508 return oIndex >= oLen; 509 } 510 511 const char* strnchr(char ch, const char* end) const { 512 const char* ptr = fChar; 513 while (ptr < end) { 514 if (ptr[0] == ch) { 515 return ptr; 516 } 517 ++ptr; 518 } 519 return nullptr; 520 } 521 522 const char* strnstr(const char *match, const char* end) const { 523 size_t matchLen = strlen(match); 524 SkASSERT(matchLen > 0); 525 ptrdiff_t len = end - fChar; 526 SkASSERT(len >= 0); 527 if ((size_t) len < matchLen ) { 528 return nullptr; 529 } 530 size_t count = len - matchLen; 531 for (size_t index = 0; index <= count; index++) { 532 if (0 == strncmp(&fChar[index], match, matchLen)) { 533 return &fChar[index]; 534 } 535 } 536 return nullptr; 537 } 538 539 const char* trimmedBracketEnd(const char bracket) const { 540 int max = (int) (this->lineLength()); 541 int index = 0; 542 while (index < max && bracket != fChar[index]) { 543 ++index; 544 } 545 SkASSERT(index < max); 546 while (index > 0 && ' ' >= fChar[index - 1]) { 547 --index; 548 } 549 return fChar + index; 550 } 551 552 const char* trimmedBracketEnd(string bracket) const { 553 size_t max = (size_t) (this->lineLength()); 554 string line(fChar, max); 555 size_t index = line.find(bracket); 556 SkASSERT(index < max); 557 while (index > 0 && ' ' >= fChar[index - 1]) { 558 --index; 559 } 560 return fChar + index; 561 } 562 563 const char* trimmedBracketNoEnd(const char bracket) const { 564 int max = (int) (fEnd - fChar); 565 int index = 0; 566 while (index < max && bracket != fChar[index]) { 567 ++index; 568 } 569 SkASSERT(index < max); 570 while (index > 0 && ' ' >= fChar[index - 1]) { 571 --index; 572 } 573 return fChar + index; 574 } 575 576 const char* trimmedLineEnd() const { 577 const char* result = this->lineEnd(); 578 while (result > fChar && ' ' >= result[-1]) { 579 --result; 580 } 581 return result; 582 } 583 584 void trimEnd() { 585 while (fEnd > fStart && ' ' >= fEnd[-1]) { 586 --fEnd; 587 } 588 } 589 590 // FIXME: nothing else in TextParser knows from C++ -- 591 // there could be a class between TextParser and ParserCommon 592 virtual string typedefName(); 593 594 const char* wordEnd() const { 595 const char* end = fChar; 596 while (isalnum(end[0]) || '_' == end[0] || '-' == end[0]) { 597 ++end; 598 } 599 return end; 600 } 601 602 string fFileName; 603 const char* fStart; 604 const char* fLine; 605 const char* fChar; 606 const char* fEnd; 607 size_t fLineCount; 608 }; 609 610 class TextParserSave { 611 public: 612 TextParserSave(TextParser* parser) { 613 fParser = parser; 614 fSave.fFileName = parser->fFileName; 615 fSave.fStart = parser->fStart; 616 fSave.fLine = parser->fLine; 617 fSave.fChar = parser->fChar; 618 fSave.fEnd = parser->fEnd; 619 fSave.fLineCount = parser->fLineCount; 620 } 621 622 void restore() const { 623 fParser->fFileName = fSave.fFileName; 624 fParser->fStart = fSave.fStart; 625 fParser->fLine = fSave.fLine; 626 fParser->fChar = fSave.fChar; 627 fParser->fEnd = fSave.fEnd; 628 fParser->fLineCount = fSave.fLineCount; 629 } 630 631 private: 632 TextParser* fParser; 633 TextParser fSave; 634 }; 635 636 static inline bool has_nonwhitespace(string s) { 637 bool nonwhite = false; 638 for (const char& c : s) { 639 if (' ' < c) { 640 nonwhite = true; 641 break; 642 } 643 } 644 return nonwhite; 645 } 646 647 static inline void trim_end(string &s) { 648 s.erase(std::find_if(s.rbegin(), s.rend(), 649 std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end()); 650 } 651 652 static inline void trim_end_spaces(string &s) { 653 while (s.length() > 0 && ' ' == s.back()) { 654 s.pop_back(); 655 } 656 } 657 658 static inline void trim_start(string &s) { 659 s.erase(s.begin(), std::find_if(s.begin(), s.end(), 660 std::not1(std::ptr_fun<int, int>(std::isspace)))); 661 } 662 663 static inline void trim_start_end(string& s) { 664 trim_start(s); 665 trim_end(s); 666 } 667 668 static inline string trim_inline_spaces(string s) { 669 bool lastSpace = false; 670 string trimmed; 671 for (const char* ptr = &s.front(); ptr <= &s.back(); ++ptr) { 672 char c = *ptr; 673 if (' ' >= c) { 674 if (!lastSpace) { 675 trimmed += ' '; 676 } 677 lastSpace = true; 678 continue; 679 } 680 lastSpace = false; 681 trimmed += c; 682 } 683 return trimmed; 684 } 685 686 class EscapeParser : public TextParser { 687 public: 688 EscapeParser(const char* start, const char* end) : 689 TextParser("", start, end, 0) { 690 const char* reader = fStart; 691 fStorage = new char[end - start]; 692 char* writer = fStorage; 693 while (reader < fEnd) { 694 char ch = *reader++; 695 if (ch != '\\') { 696 *writer++ = ch; 697 } else { 698 char ctrl = *reader++; 699 if (ctrl == 'u') { 700 unsigned unicode = 0; 701 for (int i = 0; i < 4; ++i) { 702 unicode <<= 4; 703 SkASSERT((reader[0] >= '0' && reader[0] <= '9') || 704 (reader[0] >= 'A' && reader[0] <= 'F') || 705 (reader[0] >= 'a' && reader[0] <= 'f')); 706 int nibble = *reader++ - '0'; 707 if (nibble > 9) { 708 nibble = (nibble & ~('a' - 'A')) - 'A' + '9' + 1; 709 } 710 unicode |= nibble; 711 } 712 SkASSERT(unicode < 256); 713 *writer++ = (unsigned char) unicode; 714 } else { 715 SkASSERT(ctrl == 'n'); 716 *writer++ = '\n'; 717 } 718 } 719 } 720 fStart = fLine = fChar = fStorage; 721 fEnd = writer; 722 } 723 724 ~EscapeParser() override { 725 delete fStorage; 726 } 727 private: 728 char* fStorage; 729 }; 730 731 // some methods cannot be trivially parsed; look for class-name / ~ / operator 732 class MethodParser : public TextParser { 733 public: 734 MethodParser(string className, string fileName, 735 const char* start, const char* end, int lineCount) 736 : TextParser(fileName, start, end, lineCount) 737 , fClassName(className) { 738 size_t doubleColons = className.find_last_of("::"); 739 if (string::npos != doubleColons) { 740 fLocalName = className.substr(doubleColons + 1); 741 SkASSERT(fLocalName.length() > 0); 742 } 743 } 744 745 ~MethodParser() override {} 746 747 string localName() const { 748 return fLocalName; 749 } 750 751 void setLocalName(string name) { 752 if (name == fClassName) { 753 fLocalName = ""; 754 } else { 755 fLocalName = name; 756 } 757 } 758 759 // returns true if close brace was skipped 760 int skipToMethodStart() { 761 if (!fClassName.length()) { 762 return this->skipToAlphaNum(); 763 } 764 int braceCount = 0; 765 while (!this->eof() && !isalnum(this->peek()) && '~' != this->peek()) { 766 braceCount += '{' == this->peek(); 767 braceCount -= '}' == this->peek(); 768 this->next(); 769 } 770 return braceCount; 771 } 772 773 void skipToMethodEnd(Resolvable resolvable); 774 775 bool wordEndsWith(const char* str) const { 776 const char* space = this->strnchr(' ', fEnd); 777 if (!space) { 778 return false; 779 } 780 size_t len = strlen(str); 781 if (space < fChar + len) { 782 return false; 783 } 784 return !strncmp(str, space - len, len); 785 } 786 787 private: 788 string fClassName; 789 string fLocalName; 790 typedef TextParser INHERITED; 791 }; 792 793 #endif 794