Home | History | Annotate | Download | only in text
      1 /*
      2     Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      3 
      4     This library is free software; you can redistribute it and/or
      5     modify it under the terms of the GNU Library General Public
      6     License as published by the Free Software Foundation; either
      7     version 2 of the License, or (at your option) any later version.
      8 
      9     This library is distributed in the hope that it will be useful,
     10     but WITHOUT ANY WARRANTY; without even the implied warranty of
     11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12     Library General Public License for more details.
     13 
     14     You should have received a copy of the GNU Library General Public License
     15     along with this library; see the file COPYING.LIB.  If not, write to
     16     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     17     Boston, MA 02110-1301, USA.
     18 */
     19 
     20 #ifndef SegmentedString_h
     21 #define SegmentedString_h
     22 
     23 #include "PlatformString.h"
     24 #include <wtf/Deque.h>
     25 #include <wtf/text/TextPosition.h>
     26 
     27 namespace WebCore {
     28 
     29 class SegmentedString;
     30 
     31 class SegmentedSubstring {
     32 public:
     33     SegmentedSubstring()
     34         : m_length(0)
     35         , m_current(0)
     36         , m_doNotExcludeLineNumbers(true)
     37     {
     38     }
     39 
     40     SegmentedSubstring(const String& str)
     41         : m_length(str.length())
     42         , m_current(str.isEmpty() ? 0 : str.characters())
     43         , m_string(str)
     44         , m_doNotExcludeLineNumbers(true)
     45     {
     46     }
     47 
     48     void clear() { m_length = 0; m_current = 0; }
     49 
     50     bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
     51     bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
     52 
     53     void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
     54 
     55     int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
     56 
     57     void appendTo(String& str) const
     58     {
     59         if (m_string.characters() == m_current) {
     60             if (str.isEmpty())
     61                 str = m_string;
     62             else
     63                 str.append(m_string);
     64         } else
     65             str.append(String(m_current, m_length));
     66     }
     67 
     68 public:
     69     int m_length;
     70     const UChar* m_current;
     71 
     72 private:
     73     String m_string;
     74     bool m_doNotExcludeLineNumbers;
     75 };
     76 
     77 class SegmentedString {
     78 public:
     79     SegmentedString()
     80         : m_pushedChar1(0)
     81         , m_pushedChar2(0)
     82         , m_currentChar(0)
     83         , m_numberOfCharactersConsumedPriorToCurrentString(0)
     84         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
     85         , m_currentLine(0)
     86         , m_closed(false)
     87     {
     88     }
     89 
     90     SegmentedString(const String& str)
     91         : m_pushedChar1(0)
     92         , m_pushedChar2(0)
     93         , m_currentString(str)
     94         , m_currentChar(m_currentString.m_current)
     95         , m_numberOfCharactersConsumedPriorToCurrentString(0)
     96         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
     97         , m_currentLine(0)
     98         , m_closed(false)
     99     {
    100     }
    101 
    102     SegmentedString(const SegmentedString&);
    103 
    104     const SegmentedString& operator=(const SegmentedString&);
    105 
    106     void clear();
    107     void close();
    108 
    109     void append(const SegmentedString&);
    110     void prepend(const SegmentedString&);
    111 
    112     bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
    113     void setExcludeLineNumbers();
    114 
    115     void push(UChar c)
    116     {
    117         if (!m_pushedChar1) {
    118             m_pushedChar1 = c;
    119             m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
    120         } else {
    121             ASSERT(!m_pushedChar2);
    122             m_pushedChar2 = c;
    123         }
    124     }
    125 
    126     bool isEmpty() const { return !current(); }
    127     unsigned length() const;
    128 
    129     bool isClosed() const { return m_closed; }
    130 
    131     enum LookAheadResult {
    132         DidNotMatch,
    133         DidMatch,
    134         NotEnoughCharacters,
    135     };
    136 
    137     LookAheadResult lookAhead(const String& string) { return lookAheadInline<SegmentedString::equalsLiterally>(string); }
    138     LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline<SegmentedString::equalsIgnoringCase>(string); }
    139 
    140     void advance()
    141     {
    142         if (!m_pushedChar1 && m_currentString.m_length > 1) {
    143             --m_currentString.m_length;
    144             m_currentChar = ++m_currentString.m_current;
    145             return;
    146         }
    147         advanceSlowCase();
    148     }
    149 
    150     void advanceAndASSERT(UChar expectedCharacter)
    151     {
    152         ASSERT_UNUSED(expectedCharacter, *current() == expectedCharacter);
    153         advance();
    154     }
    155 
    156     void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
    157     {
    158         ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(*current()) == WTF::Unicode::foldCase(expectedCharacter));
    159         advance();
    160     }
    161 
    162     void advancePastNewline(int& lineNumber)
    163     {
    164         ASSERT(*current() == '\n');
    165         if (!m_pushedChar1 && m_currentString.m_length > 1) {
    166             int newLineFlag = m_currentString.doNotExcludeLineNumbers();
    167             lineNumber += newLineFlag;
    168             m_currentLine += newLineFlag;
    169             if (newLineFlag)
    170                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
    171             --m_currentString.m_length;
    172             m_currentChar = ++m_currentString.m_current;
    173             return;
    174         }
    175         advanceSlowCase(lineNumber);
    176     }
    177 
    178     void advancePastNonNewline()
    179     {
    180         ASSERT(*current() != '\n');
    181         if (!m_pushedChar1 && m_currentString.m_length > 1) {
    182             --m_currentString.m_length;
    183             m_currentChar = ++m_currentString.m_current;
    184             return;
    185         }
    186         advanceSlowCase();
    187     }
    188 
    189     void advance(int& lineNumber)
    190     {
    191         if (!m_pushedChar1 && m_currentString.m_length > 1) {
    192             int newLineFlag = (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers();
    193             lineNumber += newLineFlag;
    194             m_currentLine += newLineFlag;
    195             if (newLineFlag)
    196                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
    197             --m_currentString.m_length;
    198             m_currentChar = ++m_currentString.m_current;
    199             return;
    200         }
    201         advanceSlowCase(lineNumber);
    202     }
    203 
    204     // Writes the consumed characters into consumedCharacters, which must
    205     // have space for at least |count| characters.
    206     void advance(unsigned count, UChar* consumedCharacters);
    207 
    208     bool escaped() const { return m_pushedChar1; }
    209 
    210     int numberOfCharactersConsumed() const
    211     {
    212         int numberOfPushedCharacters = 0;
    213         if (m_pushedChar1) {
    214             ++numberOfPushedCharacters;
    215             if (m_pushedChar2)
    216                 ++numberOfPushedCharacters;
    217         }
    218         return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
    219     }
    220 
    221     String toString() const;
    222 
    223     const UChar& operator*() const { return *current(); }
    224     const UChar* operator->() const { return current(); }
    225 
    226 
    227     // The method is moderately slow, comparing to currentLine method.
    228     WTF::ZeroBasedNumber currentColumn() const;
    229     WTF::ZeroBasedNumber currentLine() const;
    230     // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
    231     // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
    232     void setCurrentPosition(WTF::ZeroBasedNumber line, WTF::ZeroBasedNumber columnAftreProlog, int prologLength);
    233 
    234 private:
    235     void append(const SegmentedSubstring&);
    236     void prepend(const SegmentedSubstring&);
    237 
    238     void advanceSlowCase();
    239     void advanceSlowCase(int& lineNumber);
    240     void advanceSubstring();
    241     const UChar* current() const { return m_currentChar; }
    242 
    243     static bool equalsLiterally(const UChar* str1, const UChar* str2, size_t count) { return !memcmp(str1, str2, count * sizeof(UChar)); }
    244     static bool equalsIgnoringCase(const UChar* str1, const UChar* str2, size_t count) { return !WTF::Unicode::umemcasecmp(str1, str2, count); }
    245 
    246     template<bool equals(const UChar* str1, const UChar* str2, size_t count)>
    247     inline LookAheadResult lookAheadInline(const String& string)
    248     {
    249         if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
    250             if (equals(string.characters(), m_currentString.m_current, string.length()))
    251                 return DidMatch;
    252             return DidNotMatch;
    253         }
    254         return lookAheadSlowCase<equals>(string);
    255     }
    256 
    257     template<bool equals(const UChar* str1, const UChar* str2, size_t count)>
    258     LookAheadResult lookAheadSlowCase(const String& string)
    259     {
    260         unsigned count = string.length();
    261         if (count > length())
    262             return NotEnoughCharacters;
    263         UChar* consumedCharacters;
    264         String consumedString = String::createUninitialized(count, consumedCharacters);
    265         advance(count, consumedCharacters);
    266         LookAheadResult result = DidNotMatch;
    267         if (equals(string.characters(), consumedCharacters, count))
    268             result = DidMatch;
    269         prepend(SegmentedString(consumedString));
    270         return result;
    271     }
    272 
    273     bool isComposite() const { return !m_substrings.isEmpty(); }
    274 
    275     UChar m_pushedChar1;
    276     UChar m_pushedChar2;
    277     SegmentedSubstring m_currentString;
    278     const UChar* m_currentChar;
    279     int m_numberOfCharactersConsumedPriorToCurrentString;
    280     int m_numberOfCharactersConsumedPriorToCurrentLine;
    281     int m_currentLine;
    282     Deque<SegmentedSubstring> m_substrings;
    283     bool m_closed;
    284 };
    285 
    286 }
    287 
    288 #endif
    289