Home | History | Annotate | Download | only in bookmaker
      1 /*
      2  * Copyright 2017 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "bookmaker.h"
      9 
     10 #include "SkOSFile.h"
     11 #include "SkOSPath.h"
     12 
     13 /*
     14 things to do
     15 if cap word is beginning of sentence, add it to table as lower-case
     16    word must have only a single initial capital
     17 
     18 if word is camel cased, look for :: matches on suffix
     19 
     20 when function crosses lines, whole thing isn't seen as a 'word' e.g., search for largeArc in path
     21 
     22 words in external not seen
     23  */
     24 struct CheckEntry {
     25     string fFile;
     26     int fLine;
     27     int fCount;
     28 };
     29 
     30 class SpellCheck : public ParserCommon {
     31 public:
     32     SpellCheck(const BmhParser& bmh) : ParserCommon()
     33         , fBmhParser(bmh) {
     34         this->reset();
     35     }
     36     bool check(const char* match);
     37     void report(SkCommandLineFlags::StringArray report);
     38 private:
     39     enum class TableState {
     40         kNone,
     41         kRow,
     42         kColumn,
     43     };
     44 
     45     bool check(Definition* );
     46     bool checkable(MarkType markType);
     47     void childCheck(const Definition* def, const char* start);
     48     void leafCheck(const char* start, const char* end);
     49     bool parseFromFile(const char* path) override { return true; }
     50     void printCheck(const string& str);
     51 
     52     void reset() override {
     53         INHERITED::resetCommon();
     54         fMethod = nullptr;
     55         fRoot = nullptr;
     56         fTableState = TableState::kNone;
     57         fInCode = false;
     58         fInConst = false;
     59         fInFormula = false;
     60         fInDescription = false;
     61         fInStdOut = false;
     62     }
     63 
     64     void wordCheck(const string& str);
     65     void wordCheck(ptrdiff_t len, const char* ch);
     66 
     67     unordered_map<string, CheckEntry> fCode;
     68     unordered_map<string, CheckEntry> fColons;
     69     unordered_map<string, CheckEntry> fDigits;
     70     unordered_map<string, CheckEntry> fDots;
     71     unordered_map<string, CheckEntry> fParens;  // also hold destructors, operators
     72     unordered_map<string, CheckEntry> fUnderscores;
     73     unordered_map<string, CheckEntry> fWords;
     74     const BmhParser& fBmhParser;
     75     Definition* fMethod;
     76     RootDefinition* fRoot;
     77     TableState fTableState;
     78     bool fInCode;
     79     bool fInConst;
     80     bool fInDescription;
     81     bool fInFormula;
     82     bool fInStdOut;
     83     typedef ParserCommon INHERITED;
     84 };
     85 
     86 /* This doesn't perform a traditional spell or grammar check, although
     87    maybe it should. Instead it looks for words used uncommonly and lower
     88    case words that match capitalized words that are not sentence starters.
     89    It also looks for articles preceeding capitalized words and their
     90    modifiers to try to maintain a consistent voice.
     91    Maybe also look for passive verbs (e.g. 'is') and suggest active ones?
     92  */
     93 void BmhParser::spellCheck(const char* match, SkCommandLineFlags::StringArray report) const {
     94     SpellCheck checker(*this);
     95     checker.check(match);
     96     checker.report(report);
     97 }
     98 
     99 void BmhParser::spellStatus(const char* statusFile, SkCommandLineFlags::StringArray report) const {
    100     SpellCheck checker(*this);
    101     StatusIter iter(statusFile, ".bmh", StatusFilter::kInProgress);
    102     string match = iter.baseDir();
    103     checker.check(match.c_str());
    104     checker.report(report);
    105 }
    106 
    107 bool SpellCheck::check(const char* match) {
    108     for (const auto& topic : fBmhParser.fTopicMap) {
    109         Definition* topicDef = topic.second;
    110         if (topicDef->fParent) {
    111             continue;
    112         }
    113         if (!topicDef->isRoot()) {
    114             return this->reportError<bool>("expected root topic");
    115         }
    116         fRoot = topicDef->asRoot();
    117         if (string::npos == fRoot->fFileName.rfind(match)) {
    118             continue;
    119         }
    120        this->check(topicDef);
    121     }
    122     return true;
    123 }
    124 
    125 static bool all_lower(const string& str) {
    126     for (auto c : str) {
    127         if (!islower(c)) {
    128             return false;
    129         }
    130     }
    131     return true;
    132 }
    133 
    134 bool SpellCheck::check(Definition* def) {
    135     fFileName = def->fFileName;
    136     fLineCount = def->fLineCount;
    137     string printable = def->printableName();
    138     const char* textStart = def->fContentStart;
    139     if (MarkType::kParam != def->fMarkType && MarkType::kConst != def->fMarkType &&
    140             MarkType::kPrivate != def->fMarkType && TableState::kNone != fTableState) {
    141         fTableState = TableState::kNone;
    142     }
    143     switch (def->fMarkType) {
    144         case MarkType::kAlias:
    145             break;
    146         case MarkType::kAnchor:
    147             break;
    148         case MarkType::kBug:
    149             break;
    150         case MarkType::kClass:
    151             this->wordCheck(def->fName);
    152             break;
    153         case MarkType::kCode:
    154             fInCode = true;
    155             break;
    156         case MarkType::kColumn:
    157             break;
    158         case MarkType::kComment:
    159             break;
    160         case MarkType::kConst: {
    161             fInConst = true;
    162             if (TableState::kNone == fTableState) {
    163                 fTableState = TableState::kRow;
    164             }
    165             if (TableState::kRow == fTableState) {
    166                 fTableState = TableState::kColumn;
    167             }
    168             this->wordCheck(def->fName);
    169             const char* lineEnd = strchr(textStart, '\n');
    170             this->wordCheck(lineEnd - textStart, textStart);
    171             textStart = lineEnd;
    172         } break;
    173         case MarkType::kDefine:
    174             break;
    175         case MarkType::kDefinedBy:
    176             break;
    177         case MarkType::kDeprecated:
    178             break;
    179         case MarkType::kDescription:
    180             fInDescription = true;
    181             break;
    182         case MarkType::kDoxygen:
    183             break;
    184         case MarkType::kDuration:
    185             break;
    186         case MarkType::kEnum:
    187         case MarkType::kEnumClass:
    188             this->wordCheck(def->fName);
    189             break;
    190         case MarkType::kExample:
    191             break;
    192         case MarkType::kExperimental:
    193             break;
    194         case MarkType::kExternal:
    195             break;
    196         case MarkType::kFile:
    197             break;
    198         case MarkType::kFormula:
    199             fInFormula = true;
    200             break;
    201         case MarkType::kFunction:
    202             break;
    203         case MarkType::kHeight:
    204             break;
    205         case MarkType::kImage:
    206             break;
    207         case MarkType::kLegend:
    208             break;
    209         case MarkType::kLink:
    210             break;
    211         case MarkType::kList:
    212             break;
    213         case MarkType::kLiteral:
    214             break;
    215         case MarkType::kMarkChar:
    216             break;
    217         case MarkType::kMember:
    218             break;
    219         case MarkType::kMethod: {
    220             string method_name = def->methodName();
    221             if (all_lower(method_name)) {
    222                 method_name += "()";
    223             }
    224             string formattedStr = def->formatFunction();
    225             if (!def->isClone() && Definition::MethodType::kOperator != def->fMethodType) {
    226                 this->wordCheck(method_name);
    227             }
    228             fTableState = TableState::kNone;
    229             fMethod = def;
    230             } break;
    231         case MarkType::kNoExample:
    232             break;
    233         case MarkType::kOutdent:
    234             break;
    235         case MarkType::kParam: {
    236             if (TableState::kNone == fTableState) {
    237                 fTableState = TableState::kRow;
    238             }
    239             if (TableState::kRow == fTableState) {
    240                 fTableState = TableState::kColumn;
    241             }
    242             TextParser paramParser(def->fFileName, def->fStart, def->fContentStart,
    243                     def->fLineCount);
    244             paramParser.skipWhiteSpace();
    245             SkASSERT(paramParser.startsWith("#Param"));
    246             paramParser.next(); // skip hash
    247             paramParser.skipToNonAlphaNum(); // skip Param
    248             paramParser.skipSpace();
    249             const char* paramName = paramParser.fChar;
    250             paramParser.skipToSpace();
    251             fInCode = true;
    252             this->wordCheck(paramParser.fChar - paramName, paramName);
    253             fInCode = false;
    254        } break;
    255         case MarkType::kPlatform:
    256             break;
    257         case MarkType::kPrivate:
    258             break;
    259         case MarkType::kReturn:
    260             break;
    261         case MarkType::kRow:
    262             break;
    263         case MarkType::kSeeAlso:
    264             break;
    265         case MarkType::kStdOut: {
    266             fInStdOut = true;
    267             TextParser code(def);
    268             code.skipSpace();
    269             while (!code.eof()) {
    270                 const char* end = code.trimmedLineEnd();
    271                 this->wordCheck(end - code.fChar, code.fChar);
    272                 code.skipToLineStart();
    273             }
    274             fInStdOut = false;
    275             } break;
    276         case MarkType::kStruct:
    277             fRoot = def->asRoot();
    278             this->wordCheck(def->fName);
    279             break;
    280         case MarkType::kSubstitute:
    281             break;
    282         case MarkType::kSubtopic:
    283             this->printCheck(printable);
    284             break;
    285         case MarkType::kTable:
    286             break;
    287         case MarkType::kTemplate:
    288             break;
    289         case MarkType::kText:
    290             break;
    291         case MarkType::kTime:
    292             break;
    293         case MarkType::kToDo:
    294             break;
    295         case MarkType::kTopic:
    296             this->printCheck(printable);
    297             break;
    298         case MarkType::kTrack:
    299             // don't output children
    300             return true;
    301         case MarkType::kTypedef:
    302             break;
    303         case MarkType::kUnion:
    304             break;
    305         case MarkType::kVolatile:
    306             break;
    307         case MarkType::kWidth:
    308             break;
    309         default:
    310             SkASSERT(0); // handle everything
    311             break;
    312     }
    313     this->childCheck(def, textStart);
    314     switch (def->fMarkType) {  // post child work, at least for tables
    315         case MarkType::kCode:
    316             fInCode = false;
    317             break;
    318         case MarkType::kColumn:
    319             break;
    320         case MarkType::kDescription:
    321             fInDescription = false;
    322             break;
    323         case MarkType::kEnum:
    324         case MarkType::kEnumClass:
    325             break;
    326         case MarkType::kExample:
    327             break;
    328         case MarkType::kFormula:
    329             fInFormula = false;
    330             break;
    331         case MarkType::kLegend:
    332             break;
    333         case MarkType::kMethod:
    334             fMethod = nullptr;
    335             break;
    336         case MarkType::kConst:
    337             fInConst = false;
    338         case MarkType::kParam:
    339             SkASSERT(TableState::kColumn == fTableState);
    340             fTableState = TableState::kRow;
    341             break;
    342         case MarkType::kReturn:
    343         case MarkType::kSeeAlso:
    344             break;
    345         case MarkType::kRow:
    346             break;
    347         case MarkType::kStruct:
    348             fRoot = fRoot->rootParent();
    349             break;
    350         case MarkType::kTable:
    351             break;
    352         default:
    353             break;
    354     }
    355     return true;
    356 }
    357 
    358 bool SpellCheck::checkable(MarkType markType) {
    359     return BmhParser::Resolvable::kYes == fBmhParser.fMaps[(int) markType].fResolve;
    360 }
    361 
    362 void SpellCheck::childCheck(const Definition* def, const char* start) {
    363     const char* end;
    364     fLineCount = def->fLineCount;
    365     if (def->isRoot()) {
    366         fRoot = const_cast<RootDefinition*>(def->asRoot());
    367     }
    368     for (auto& child : def->fChildren) {
    369         end = child->fStart;
    370         if (this->checkable(def->fMarkType)) {
    371             this->leafCheck(start, end);
    372         }
    373         this->check(child);
    374         start = child->fTerminator;
    375     }
    376     if (this->checkable(def->fMarkType)) {
    377         end = def->fContentEnd;
    378         this->leafCheck(start, end);
    379     }
    380 }
    381 
    382 void SpellCheck::leafCheck(const char* start, const char* end) {
    383     const char* chPtr = start;
    384     int inAngles = 0;
    385     int inParens = 0;
    386     bool inQuotes = false;
    387     bool allLower = true;
    388     char priorCh = 0;
    389     char lastCh = 0;
    390     const char* wordStart = nullptr;
    391     const char* wordEnd = nullptr;
    392     const char* possibleEnd = nullptr;
    393     do {
    394         if (wordStart && wordEnd) {
    395             if (!allLower || (!inQuotes && '\"' != lastCh && !inParens
    396                     && ')' != lastCh && !inAngles && '>' != lastCh)) {
    397                 string word(wordStart, (possibleEnd ? possibleEnd : wordEnd) - wordStart);
    398                 wordCheck(word);
    399             }
    400             wordStart = nullptr;
    401         }
    402         if (chPtr == end) {
    403             break;
    404         }
    405         switch (*chPtr) {
    406             case '>':
    407                 if (isalpha(lastCh)) {
    408                     --inAngles;
    409                     SkASSERT(inAngles >= 0);
    410                 }
    411                 wordEnd = chPtr;
    412                 break;
    413             case '(':
    414                 ++inParens;
    415                 possibleEnd = chPtr;
    416                 break;
    417             case ')':
    418                 --inParens;
    419                 if ('(' == lastCh) {
    420                     wordEnd = chPtr + 1;
    421                 } else {
    422                     wordEnd = chPtr;
    423                 }
    424                 SkASSERT(inParens >= 0 || fInStdOut);
    425                 break;
    426             case '\"':
    427                 inQuotes = !inQuotes;
    428                 wordEnd = chPtr;
    429                 SkASSERT(inQuotes == !wordStart);
    430                 break;
    431             case 'A': case 'B': case 'C': case 'D': case 'E':
    432             case 'F': case 'G': case 'H': case 'I': case 'J':
    433             case 'K': case 'L': case 'M': case 'N': case 'O':
    434             case 'P': case 'Q': case 'R': case 'S': case 'T':
    435             case 'U': case 'V': case 'W': case 'X': case 'Y':
    436             case 'Z':
    437                 allLower = false;
    438             case 'a': case 'b': case 'c': case 'd': case 'e':
    439             case 'f': case 'g': case 'h': case 'i': case 'j':
    440             case 'k': case 'l': case 'm': case 'n': case 'o':
    441             case 'p': case 'q': case 'r': case 's': case 't':
    442             case 'u': case 'v': case 'w': case 'x': case 'y':
    443             case 'z':
    444                 if (!wordStart) {
    445                     wordStart = chPtr;
    446                     wordEnd = nullptr;
    447                     possibleEnd = nullptr;
    448                     allLower = 'a' <= *chPtr;
    449                     if ('<' == lastCh || ('<' == priorCh && '/' == lastCh)) {
    450                         ++inAngles;
    451                     }
    452                 }
    453                 break;
    454             case '0': case '1': case '2': case '3': case '4':
    455             case '5': case '6': case '7': case '8': case '9':
    456             case '_':
    457                 allLower = false;
    458             case '-':  // note that dash doesn't clear allLower
    459                 break;
    460             default:
    461                 wordEnd = chPtr;
    462                 break;
    463         }
    464         priorCh = lastCh;
    465         lastCh = *chPtr;
    466     } while (++chPtr <= end);
    467 }
    468 
    469 void SpellCheck::printCheck(const string& str) {
    470     string word;
    471     for (std::stringstream stream(str); stream >> word; ) {
    472         wordCheck(word);
    473     }
    474 }
    475 
    476 static bool stringCompare(const std::pair<string, CheckEntry>& i, const std::pair<string, CheckEntry>& j) {
    477     return i.first.compare(j.first) < 0;
    478 }
    479 
    480 void SpellCheck::report(SkCommandLineFlags::StringArray report) {
    481     vector<std::pair<string, CheckEntry>> elems(fWords.begin(), fWords.end());
    482     std::sort(elems.begin(), elems.end(), stringCompare);
    483     if (report.contains("once")) {
    484         for (auto iter : elems) {
    485             if (string::npos != iter.second.fFile.find("undocumented.bmh")) {
    486                 continue;
    487             }
    488             if (string::npos != iter.second.fFile.find("markup.bmh")) {
    489                 continue;
    490             }
    491             if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) {
    492                 continue;
    493             }
    494             if (iter.second.fCount == 1) {
    495                 SkDebugf("%s(%d): %s\n", iter.second.fFile.c_str(), iter.second.fLine,
    496                         iter.first.c_str());
    497             }
    498         }
    499         SkDebugf("\n");
    500         return;
    501     }
    502     if (report.contains("all")) {
    503         int column = 0;
    504         char lastInitial = 'a';
    505         int count = 0;
    506         for (auto iter : elems) {
    507             if (string::npos != iter.second.fFile.find("undocumented.bmh")) {
    508                 continue;
    509             }
    510             if (string::npos != iter.second.fFile.find("markup.bmh")) {
    511                 continue;
    512             }
    513             if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) {
    514                 continue;
    515             }
    516             string check = iter.first.c_str();
    517             bool allLower = true;
    518             for (auto c : check) {
    519                 if (isupper(c)) {
    520                     allLower = false;
    521                     break;
    522                 }
    523             }
    524             if (!allLower) {
    525                 continue;
    526             }
    527             if (column + check.length() > 100 || check[0] != lastInitial) {
    528                 SkDebugf("\n");
    529                 column = 0;
    530             }
    531             if (check[0] != lastInitial) {
    532                 SkDebugf("\n");
    533                 lastInitial = check[0];
    534             }
    535             SkDebugf("%s ", check.c_str());
    536             column += check.length();
    537             ++count;
    538         }
    539         SkDebugf("\n\ncount = %d\n", count);
    540         return;
    541     }
    542     int index = 0;
    543     const char* mispelled = report[0];
    544     for (auto iter : elems) {
    545         if (string::npos != iter.second.fFile.find("undocumented.bmh")) {
    546             continue;
    547         }
    548         if (string::npos != iter.second.fFile.find("markup.bmh")) {
    549             continue;
    550         }
    551         if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) {
    552             continue;
    553         }
    554         string check = iter.first.c_str();
    555         while (check.compare(mispelled) > 0) {
    556             SkDebugf("%s not found\n", mispelled);
    557             if (report.count() == ++index) {
    558                 break;
    559             }
    560         }
    561         if (report.count() == index) {
    562             break;
    563         }
    564         if (check.compare(mispelled) == 0) {
    565             SkDebugf("%s(%d): %s\n", iter.second.fFile.c_str(), iter.second.fLine,
    566                     iter.first.c_str());
    567             if (report.count() == ++index) {
    568                 break;
    569             }
    570         }
    571     }
    572 }
    573 
    574 void SpellCheck::wordCheck(const string& str) {
    575     if ("nullptr" == str) {
    576         return;  // doesn't seem worth it, treating nullptr as a word in need of correction
    577     }
    578     bool hasColon = false;
    579     bool hasDot = false;
    580     bool hasParen = false;
    581     bool hasUnderscore = false;
    582     bool sawDash = false;
    583     bool sawDigit = false;
    584     bool sawSpecial = false;
    585     SkASSERT(str.length() > 0);
    586     SkASSERT(isalpha(str[0]) || '~' == str[0]);
    587     for (char ch : str) {
    588         if (isalpha(ch) || '-' == ch) {
    589             sawDash |= '-' == ch;
    590             continue;
    591         }
    592         bool isColon = ':' == ch;
    593         hasColon |= isColon;
    594         bool isDot = '.' == ch;
    595         hasDot |= isDot;
    596         bool isParen = '(' == ch || ')' == ch || '~' == ch || '=' == ch || '!' == ch ||
    597                 '[' == ch || ']' == ch;
    598         hasParen |= isParen;
    599         bool isUnderscore = '_' == ch;
    600         hasUnderscore |= isUnderscore;
    601         if (isColon || isDot || isUnderscore || isParen) {
    602             continue;
    603         }
    604         if (isdigit(ch)) {
    605             sawDigit = true;
    606             continue;
    607         }
    608         if ('&' == ch || ',' == ch || ' ' == ch) {
    609             sawSpecial = true;
    610             continue;
    611         }
    612         SkASSERT(0);
    613     }
    614     if (sawSpecial && !hasParen) {
    615         SkASSERT(0);
    616     }
    617     bool inCode = fInCode;
    618     if (hasUnderscore && isupper(str[0]) && ('S' != str[0] || 'K' != str[1])
    619             && !hasColon && !hasDot && !hasParen && !fInStdOut && !inCode && !fInConst
    620             && !sawDigit && !sawSpecial && !sawDash) {
    621         std::istringstream ss(str);
    622         string token;
    623         while (std::getline(ss, token, '_')) {
    624             if (token.length()) {
    625                 this->wordCheck(token);
    626             }
    627         }
    628         return;
    629     }
    630     if (!hasColon && !hasDot && !hasParen && !hasUnderscore
    631             && !fInStdOut && !inCode && !fInConst && !sawDigit
    632             && islower(str[0]) && isupper(str[1])) {
    633         inCode = true;
    634     }
    635     bool methodParam = false;
    636     if (fMethod) {
    637         for (auto child : fMethod->fChildren) {
    638             if (MarkType::kParam == child->fMarkType && str == child->fName) {
    639                 methodParam = true;
    640                 break;
    641             }
    642         }
    643     }
    644     auto& mappy = hasColon ? fColons :
    645                   hasDot ? fDots :
    646                   hasParen ? fParens :
    647                   hasUnderscore ? fUnderscores :
    648                   fInStdOut || fInFormula || inCode || fInConst || methodParam ? fCode :
    649                   sawDigit ? fDigits : fWords;
    650     auto iter = mappy.find(str);
    651     if (mappy.end() != iter) {
    652         iter->second.fCount += 1;
    653     } else {
    654         CheckEntry* entry = &mappy[str];
    655         entry->fFile = fFileName;
    656         entry->fLine = fLineCount;
    657         entry->fCount = 1;
    658     }
    659 }
    660 
    661 void SpellCheck::wordCheck(ptrdiff_t len, const char* ch) {
    662     leafCheck(ch, ch + len);
    663 }
    664