Lines Matching refs:DFA
5 // A DFA (deterministic finite automaton)-based regular expression search.
7 // The DFA search has two main parts: the construction of the automaton,
20 // the definition of class DFA.
31 "Whether the RE2 DFA should bail out early "
47 // Changing this to true compiles in prints that trace execution of the DFA.
51 // A DFA implementation of a regular expression program.
54 // the comments in the sections that follow the DFA definition.
55 class DFA {
57 DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem);
58 ~DFA();
70 // If "run_forward" is true, the DFA runs from text.begin() to text.end().
71 // If it is false, the DFA runs from text.end() to text.begin(),
73 // If the DFA cannot complete the search (for example, if it is out of
79 // Builds out all states for the entire DFA. FOR TESTING ONLY
93 // A single DFA state. The DFA is represented as a graph of these
198 // Resets the DFA State cache, flushing all saved State* information.
342 Prog::MatchKind kind_; // The kind of DFA.
379 // Internally, the DFA uses a sparse array of
384 class DFA::Workq : public SparseSet {
434 DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
453 // Account for space needed for DFA, q0, q1, astack.
454 mem_budget_ -= sizeof(DFA);
459 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
474 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
485 DFA::~DFA() {
492 // In the DFA state graph, s->next[c] == NULL means that the
507 string DFA::DumpWorkq(Workq* q) {
510 for (DFA::Workq::iterator it = q->begin(); it != q->end(); ++it) {
523 string DFA::DumpState(State* state) {
548 // DFA state graph construction.
550 // The DFA state graph is a heavily-linked collection of State* structures.
560 // textbook DFA implementation, which would use an unordered set.
562 // the DFA matches, not where it matches in the text. To decide where the
563 // DFA matches, we need to mimic the behavior of the dominant backtracking
566 // The DFA execution tries these many executions in parallel, representing
576 // The flag word also contains two DFA-specific bits: kFlagMatch if the state
589 // NOTE(rsc): The choice of State construction determines whether the DFA
591 // traditional DFA implementations (so-called leftmost longest matching as
604 DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) {
737 DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) {
781 void DFA::ClearCache() {
795 void DFA::StateToWorkq(State* s, Workq* q) {
807 void DFA::AddToQueue(Workq* q, int id, uint flag) {
851 case kInstCapture: // DFA treats captures as no-ops.
895 void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint flag) {
909 void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
959 DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) {
961 // even if the DFA is being run by multiple threads.
967 DFA::State* DFA::RunStateOnByte(State* state, int c) {
1076 // DFA cache reset.
1080 // The DFA uses a reader-writer mutex to protect the state graph itself.
1093 class DFA::RWLocker {
1116 DFA::RWLocker::RWLocker(Mutex* mu)
1124 void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
1132 DFA::RWLocker::~RWLocker() {
1140 // When the DFA's State cache fills, we discard all the states in the
1151 void DFA::ResetCache(RWLocker* cache_lock) {
1162 LOG(INFO) << "DFA memory cache could be too small: "
1181 // in the new cache. For example, in a DFA method ("this" is a DFA):
1189 // is known to have room for at least a couple states (otherwise the DFA
1192 class DFA::StateSaver {
1194 explicit StateSaver(DFA* dfa, State* state);
1200 // since the DFA guarantees to have room in the cache
1206 DFA* dfa_; // the DFA to use
1216 DFA::StateSaver::StateSaver(DFA* dfa, State* state) {
1217 dfa_ = dfa;
1234 DFA::StateSaver::~StateSaver() {
1239 DFA::State* DFA::StateSaver::Restore() {
1252 // DFA execution.
1264 // that the DFA has reached a so-called "dead state", in which any match
1269 // quite large (2kB on 64-bit machines). Instead, dfa->bytemap_[]
1279 // Third, it is common for a DFA for an unanchored match to begin in a
1280 // state in which only one particular byte value can take the DFA to a
1282 // situation, the DFA can do better than executing the simple loop.
1291 // match is found, it must be noted, but the DFA must continue on in
1297 // Fifth, one algorithm that uses the DFA needs it to run over the
1299 // Passing false for the "run_forward" flag causes the DFA to run backward.
1318 inline bool DFA::InlinedSearchLoop(SearchParams* params,
1378 // The alternative would be either one DFA per thread
1398 // so if resetp != NULL, it means we filled the DFA state
1447 // The DFA notices the match one byte late,
1528 bool DFA::SearchFFF(SearchParams* params) {
1531 bool DFA::SearchFFT(SearchParams* params) {
1534 bool DFA::SearchFTF(SearchParams* params) {
1537 bool DFA::SearchFTT(SearchParams* params) {
1540 bool DFA::SearchTFF(SearchParams* params) {
1543 bool DFA::SearchTFT(SearchParams* params) {
1546 bool DFA::SearchTTF(SearchParams* params) {
1549 bool DFA::SearchTTT(SearchParams* params) {
1554 bool DFA::SlowSearchLoop(SearchParams* params) {
1563 bool DFA::FastSearchLoop(SearchParams* params) {
1566 static bool (DFA::*Searches[])(SearchParams*) = {
1567 &DFA::SearchFFF,
1568 &DFA::SearchFFT,
1569 &DFA::SearchFTF,
1570 &DFA::SearchFTT,
1571 &DFA::SearchTFF,
1572 &DFA::SearchTFT,
1573 &DFA::SearchTTF,
1574 &DFA::SearchTTT,
1585 // The discussion of DFA execution above ignored the question of how
1603 // combine to make the eight cases recorded in the DFA's begin_text_[2],
1609 // state for the DFA search loop. Fills in params and returns true on success.
1611 bool DFA::AnalyzeSearch(SearchParams* params) {
1682 bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
1746 // The actual DFA search: calls AnalyzeSearch and then FastSearchLoop.
1747 bool DFA::Search(const StringPiece& text,
1800 // Deletes dfa.
1804 // class DFA out of this file. If you set
1805 // prog->dfa_ = dfa;
1808 // so that ~Prog can delete the dfa.
1809 static void DeleteDFA(DFA* dfa) {
1810 delete dfa;
1813 DFA* Prog::GetDFA(MatchKind kind) {
1814 DFA*volatile* pdfa;
1823 DFA *dfa = ANNOTATE_UNPROTECTED_READ(*pdfa);
1824 if (dfa != NULL) {
1825 ANNOTATE_HAPPENS_AFTER(dfa);
1826 return dfa;
1830 dfa = *pdfa;
1831 if (dfa != NULL) {
1832 ANNOTATE_HAPPENS_AFTER(dfa);
1833 return dfa;
1836 // For a forward DFA, half the memory goes to each DFA.
1837 // For a reverse DFA, all the memory goes to the
1838 // "longest match" DFA, because RE2 never does reverse
1847 dfa = new DFA(this, kind, m);
1851 ANNOTATE_HAPPENS_BEFORE(dfa);
1853 *pdfa = dfa;
1855 return dfa;
1864 // and sets match0->begin() to text.begin(), since the DFA can't track
1867 // This is the only external interface (class DFA only exists in this file).
1909 DFA* dfa = GetDFA(kind);
1911 bool matched = dfa->Search(text, context, anchored,
1933 // Build out all states in DFA. Returns number of states.
1934 int DFA::BuildAllStates() {
1967 // Build out all states in DFA for kind. Returns number of states.
1975 bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
2008 // The DFA is essentially a big graph rooted at params.start,
2025 // The RunStateOnByteUnlocked call asks the DFA to build out the graph.
2026 // It returns NULL only if the DFA has run out of memory,
2046 if (ns == NULL) // DFA out of memory
2055 if (ns == NULL) // DFA out of memory
2118 DFA* dfa = NULL;
2124 dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
2127 dfa = dfa_longest_;
2129 return dfa->PossibleMatchRange(min, max, maxlen);