1 /* 2 * Copyright 2017 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "RegexNode.h" 9 10 #include "NFA.h" 11 12 std::vector<int> RegexNode::createStates(NFA* nfa, const std::vector<int>& accept) const { 13 std::vector<int> result; 14 switch (fKind) { 15 case kChar_Kind: 16 result.push_back(nfa->addState(NFAState(fPayload.fChar, accept))); 17 break; 18 case kCharset_Kind: { 19 std::vector<bool> chars; 20 for (const RegexNode& child : fChildren) { 21 if (child.fKind == kChar_Kind) { 22 while (chars.size() <= (size_t) child.fPayload.fChar) { 23 chars.push_back(false); 24 } 25 chars[child.fPayload.fChar] = true; 26 } else { 27 ASSERT(child.fKind == kRange_Kind); 28 while (chars.size() <= (size_t) child.fChildren[1].fPayload.fChar) { 29 chars.push_back(false); 30 } 31 for (char c = child.fChildren[0].fPayload.fChar; 32 c <= child.fChildren[1].fPayload.fChar; 33 ++c) { 34 chars[c] = true; 35 } 36 } 37 } 38 result.push_back(nfa->addState(NFAState(fPayload.fBool, chars, accept))); 39 break; 40 } 41 case kConcat_Kind: { 42 std::vector<int> right = fChildren[1].createStates(nfa, accept); 43 result = fChildren[0].createStates(nfa, right); 44 break; 45 } 46 case kDot_Kind: 47 result.push_back(nfa->addState(NFAState(NFAState::kDot_Kind, accept))); 48 break; 49 case kOr_Kind: { 50 std::vector<int> states = fChildren[0].createStates(nfa, accept); 51 result.insert(result.end(), states.begin(), states.end()); 52 states = fChildren[1].createStates(nfa, accept); 53 result.insert(result.end(), states.begin(), states.end()); 54 break; 55 } 56 case kPlus_Kind: { 57 std::vector<int> next = accept; 58 std::vector<int> placeholder; 59 int id = nfa->addState(NFAState(placeholder)); 60 next.push_back(id); 61 result = fChildren[0].createStates(nfa, next); 62 nfa->fStates[id] = NFAState(result); 63 break; 64 } 65 case kQuestion_Kind: 66 result = fChildren[0].createStates(nfa, accept); 67 result.insert(result.end(), accept.begin(), accept.end()); 68 break; 69 case kRange_Kind: 70 ABORT("unreachable"); 71 case kStar_Kind: { 72 std::vector<int> next = accept; 73 std::vector<int> placeholder; 74 int id = nfa->addState(NFAState(placeholder)); 75 next.push_back(id); 76 result = fChildren[0].createStates(nfa, next); 77 result.insert(result.end(), accept.begin(), accept.end()); 78 nfa->fStates[id] = NFAState(result); 79 break; 80 } 81 } 82 return result; 83 } 84 85 std::string RegexNode::description() const { 86 switch (fKind) { 87 case kChar_Kind: 88 return std::string(1, fPayload.fChar); 89 case kCharset_Kind: { 90 std::string result("["); 91 if (fPayload.fBool) { 92 result += "^"; 93 } 94 for (const RegexNode& c : fChildren) { 95 result += c.description(); 96 } 97 result += "]"; 98 return result; 99 } 100 case kConcat_Kind: 101 return fChildren[0].description() + fChildren[1].description(); 102 case kDot_Kind: 103 return "."; 104 case kOr_Kind: 105 return "(" + fChildren[0].description() + "|" + fChildren[1].description() + ")"; 106 case kPlus_Kind: 107 return fChildren[0].description() + "+"; 108 case kQuestion_Kind: 109 return fChildren[0].description() + "?"; 110 case kRange_Kind: 111 return fChildren[0].description() + "-" + fChildren[1].description(); 112 case kStar_Kind: 113 return fChildren[0].description() + "*"; 114 default: 115 return "<" + std::to_string(fKind) + ">"; 116 } 117 } 118