Home | History | Annotate | Download | only in text
      1 /*
      2     Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      3 
      4     This library is free software; you can redistribute it and/or
      5     modify it under the terms of the GNU Library General Public
      6     License as published by the Free Software Foundation; either
      7     version 2 of the License, or (at your option) any later version.
      8 
      9     This library is distributed in the hope that it will be useful,
     10     but WITHOUT ANY WARRANTY; without even the implied warranty of
     11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12     Library General Public License for more details.
     13 
     14     You should have received a copy of the GNU Library General Public License
     15     along with this library; see the file COPYING.LIB.  If not, write to
     16     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     17     Boston, MA 02110-1301, USA.
     18 */
     19 
     20 #ifndef SegmentedString_h
     21 #define SegmentedString_h
     22 
     23 #include "wtf/Deque.h"
     24 #include "wtf/text/StringBuilder.h"
     25 #include "wtf/text/TextPosition.h"
     26 #include "wtf/text/WTFString.h"
     27 
     28 namespace WebCore {
     29 
     30 class SegmentedString;
     31 
     32 class SegmentedSubstring {
     33 public:
     34     SegmentedSubstring()
     35         : m_length(0)
     36         , m_doNotExcludeLineNumbers(true)
     37         , m_is8Bit(false)
     38     {
     39         m_data.string16Ptr = 0;
     40     }
     41 
     42     SegmentedSubstring(const String& str)
     43         : m_length(str.length())
     44         , m_doNotExcludeLineNumbers(true)
     45         , m_string(str)
     46     {
     47         if (m_length) {
     48             if (m_string.is8Bit()) {
     49                 m_is8Bit = true;
     50                 m_data.string8Ptr = m_string.characters8();
     51             } else {
     52                 m_is8Bit = false;
     53                 m_data.string16Ptr = m_string.characters16();
     54             }
     55         } else
     56             m_is8Bit = false;
     57     }
     58 
     59     void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
     60 
     61     bool is8Bit() { return m_is8Bit; }
     62 
     63     bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
     64     bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
     65 
     66     void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
     67 
     68     int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
     69 
     70     void appendTo(StringBuilder& builder) const
     71     {
     72         int offset = m_string.length() - m_length;
     73 
     74         if (!offset) {
     75             if (m_length)
     76                 builder.append(m_string);
     77         } else
     78             builder.append(m_string.substring(offset, m_length));
     79     }
     80 
     81     UChar getCurrentChar8()
     82     {
     83         return *m_data.string8Ptr;
     84     }
     85 
     86     UChar getCurrentChar16()
     87     {
     88         return m_data.string16Ptr ? *m_data.string16Ptr : 0;
     89     }
     90 
     91     UChar incrementAndGetCurrentChar8()
     92     {
     93         ASSERT(m_data.string8Ptr);
     94         return *++m_data.string8Ptr;
     95     }
     96 
     97     UChar incrementAndGetCurrentChar16()
     98     {
     99         ASSERT(m_data.string16Ptr);
    100         return *++m_data.string16Ptr;
    101     }
    102 
    103     String currentSubString(unsigned length)
    104     {
    105         int offset = m_string.length() - m_length;
    106         return m_string.substring(offset, length);
    107     }
    108 
    109     ALWAYS_INLINE UChar getCurrentChar()
    110     {
    111         ASSERT(m_length);
    112         if (is8Bit())
    113             return getCurrentChar8();
    114         return getCurrentChar16();
    115     }
    116 
    117     ALWAYS_INLINE UChar incrementAndGetCurrentChar()
    118     {
    119         ASSERT(m_length);
    120         if (is8Bit())
    121             return incrementAndGetCurrentChar8();
    122         return incrementAndGetCurrentChar16();
    123     }
    124 
    125 public:
    126     union {
    127         const LChar* string8Ptr;
    128         const UChar* string16Ptr;
    129     } m_data;
    130     int m_length;
    131 
    132 private:
    133     bool m_doNotExcludeLineNumbers;
    134     bool m_is8Bit;
    135     String m_string;
    136 };
    137 
    138 class SegmentedString {
    139 public:
    140     SegmentedString()
    141         : m_pushedChar1(0)
    142         , m_pushedChar2(0)
    143         , m_currentChar(0)
    144         , m_numberOfCharactersConsumedPriorToCurrentString(0)
    145         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
    146         , m_currentLine(0)
    147         , m_closed(false)
    148         , m_empty(true)
    149         , m_fastPathFlags(NoFastPath)
    150         , m_advanceFunc(&SegmentedString::advanceEmpty)
    151         , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
    152     {
    153     }
    154 
    155     SegmentedString(const String& str)
    156         : m_pushedChar1(0)
    157         , m_pushedChar2(0)
    158         , m_currentString(str)
    159         , m_currentChar(0)
    160         , m_numberOfCharactersConsumedPriorToCurrentString(0)
    161         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
    162         , m_currentLine(0)
    163         , m_closed(false)
    164         , m_empty(!str.length())
    165         , m_fastPathFlags(NoFastPath)
    166     {
    167         if (m_currentString.m_length)
    168             m_currentChar = m_currentString.getCurrentChar();
    169         updateAdvanceFunctionPointers();
    170     }
    171 
    172     SegmentedString(const SegmentedString&);
    173 
    174     const SegmentedString& operator=(const SegmentedString&);
    175 
    176     void clear();
    177     void close();
    178 
    179     void append(const SegmentedString&);
    180     void prepend(const SegmentedString&);
    181 
    182     bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
    183     void setExcludeLineNumbers();
    184 
    185     void push(UChar c)
    186     {
    187         if (!m_pushedChar1) {
    188             m_pushedChar1 = c;
    189             m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
    190             updateSlowCaseFunctionPointers();
    191         } else {
    192             ASSERT(!m_pushedChar2);
    193             m_pushedChar2 = c;
    194         }
    195     }
    196 
    197     bool isEmpty() const { return m_empty; }
    198     unsigned length() const;
    199 
    200     bool isClosed() const { return m_closed; }
    201 
    202     enum LookAheadResult {
    203         DidNotMatch,
    204         DidMatch,
    205         NotEnoughCharacters,
    206     };
    207 
    208     LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
    209     LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }
    210 
    211     void advance()
    212     {
    213         if (m_fastPathFlags & Use8BitAdvance) {
    214             ASSERT(!m_pushedChar1);
    215             bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
    216             m_currentChar = m_currentString.incrementAndGetCurrentChar8();
    217 
    218             if (!haveOneCharacterLeft)
    219                 return;
    220 
    221             updateSlowCaseFunctionPointers();
    222 
    223             return;
    224         }
    225 
    226         (this->*m_advanceFunc)();
    227     }
    228 
    229     inline void advanceAndUpdateLineNumber()
    230     {
    231         if (m_fastPathFlags & Use8BitAdvance) {
    232             ASSERT(!m_pushedChar1);
    233 
    234             bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
    235             bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
    236 
    237             m_currentChar = m_currentString.incrementAndGetCurrentChar8();
    238 
    239             if (!(haveNewLine | haveOneCharacterLeft))
    240                 return;
    241 
    242             if (haveNewLine) {
    243                 ++m_currentLine;
    244                 m_numberOfCharactersConsumedPriorToCurrentLine =  m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
    245             }
    246 
    247             if (haveOneCharacterLeft)
    248                 updateSlowCaseFunctionPointers();
    249 
    250             return;
    251         }
    252 
    253         (this->*m_advanceAndUpdateLineNumberFunc)();
    254     }
    255 
    256     void advanceAndASSERT(UChar expectedCharacter)
    257     {
    258         ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
    259         advance();
    260     }
    261 
    262     void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
    263     {
    264         ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter));
    265         advance();
    266     }
    267 
    268     void advancePastNonNewline()
    269     {
    270         ASSERT(currentChar() != '\n');
    271         advance();
    272     }
    273 
    274     void advancePastNewlineAndUpdateLineNumber()
    275     {
    276         ASSERT(currentChar() == '\n');
    277         if (!m_pushedChar1 && m_currentString.m_length > 1) {
    278             int newLineFlag = m_currentString.doNotExcludeLineNumbers();
    279             m_currentLine += newLineFlag;
    280             if (newLineFlag)
    281                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
    282             decrementAndCheckLength();
    283             m_currentChar = m_currentString.incrementAndGetCurrentChar();
    284             return;
    285         }
    286         advanceAndUpdateLineNumberSlowCase();
    287     }
    288 
    289     // Writes the consumed characters into consumedCharacters, which must
    290     // have space for at least |count| characters.
    291     void advance(unsigned count, UChar* consumedCharacters);
    292 
    293     bool escaped() const { return m_pushedChar1; }
    294 
    295     int numberOfCharactersConsumed() const
    296     {
    297         int numberOfPushedCharacters = 0;
    298         if (m_pushedChar1) {
    299             ++numberOfPushedCharacters;
    300             if (m_pushedChar2)
    301                 ++numberOfPushedCharacters;
    302         }
    303         return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
    304     }
    305 
    306     String toString() const;
    307 
    308     UChar currentChar() const { return m_currentChar; }
    309 
    310     // The method is moderately slow, comparing to currentLine method.
    311     OrdinalNumber currentColumn() const;
    312     OrdinalNumber currentLine() const;
    313     // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
    314     // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
    315     void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
    316 
    317 private:
    318     enum FastPathFlags {
    319         NoFastPath = 0,
    320         Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
    321         Use8BitAdvance = 1 << 1,
    322     };
    323 
    324     void append(const SegmentedSubstring&);
    325     void prepend(const SegmentedSubstring&);
    326 
    327     void advance8();
    328     void advance16();
    329     void advanceAndUpdateLineNumber8();
    330     void advanceAndUpdateLineNumber16();
    331     void advanceSlowCase();
    332     void advanceAndUpdateLineNumberSlowCase();
    333     void advanceEmpty();
    334     void advanceSubstring();
    335 
    336     void updateSlowCaseFunctionPointers();
    337 
    338     void decrementAndCheckLength()
    339     {
    340         ASSERT(m_currentString.m_length > 1);
    341         if (--m_currentString.m_length == 1)
    342             updateSlowCaseFunctionPointers();
    343     }
    344 
    345     void updateAdvanceFunctionPointers()
    346     {
    347         if ((m_currentString.m_length > 1) && !m_pushedChar1) {
    348             if (m_currentString.is8Bit()) {
    349                 m_advanceFunc = &SegmentedString::advance8;
    350                 m_fastPathFlags = Use8BitAdvance;
    351                 if (m_currentString.doNotExcludeLineNumbers()) {
    352                     m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
    353                     m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
    354                 } else
    355                     m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
    356                 return;
    357             }
    358 
    359             m_advanceFunc = &SegmentedString::advance16;
    360             m_fastPathFlags = NoFastPath;
    361             if (m_currentString.doNotExcludeLineNumbers())
    362                 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
    363             else
    364                 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
    365             return;
    366         }
    367 
    368         if (!m_currentString.m_length && !isComposite()) {
    369             m_advanceFunc = &SegmentedString::advanceEmpty;
    370             m_fastPathFlags = NoFastPath;
    371             m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
    372         }
    373 
    374         updateSlowCaseFunctionPointers();
    375     }
    376 
    377     inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
    378     {
    379         if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
    380             String currentSubstring = m_currentString.currentSubString(string.length());
    381             if (currentSubstring.startsWith(string, caseSensitive))
    382                 return DidMatch;
    383             return DidNotMatch;
    384         }
    385         return lookAheadSlowCase(string, caseSensitive);
    386     }
    387 
    388     LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
    389     {
    390         unsigned count = string.length();
    391         if (count > length())
    392             return NotEnoughCharacters;
    393         UChar* consumedCharacters;
    394         String consumedString = String::createUninitialized(count, consumedCharacters);
    395         advance(count, consumedCharacters);
    396         LookAheadResult result = DidNotMatch;
    397         if (consumedString.startsWith(string, caseSensitive))
    398             result = DidMatch;
    399         prepend(SegmentedString(consumedString));
    400         return result;
    401     }
    402 
    403     bool isComposite() const { return !m_substrings.isEmpty(); }
    404 
    405     UChar m_pushedChar1;
    406     UChar m_pushedChar2;
    407     SegmentedSubstring m_currentString;
    408     UChar m_currentChar;
    409     int m_numberOfCharactersConsumedPriorToCurrentString;
    410     int m_numberOfCharactersConsumedPriorToCurrentLine;
    411     int m_currentLine;
    412     Deque<SegmentedSubstring> m_substrings;
    413     bool m_closed;
    414     bool m_empty;
    415     unsigned char m_fastPathFlags;
    416     void (SegmentedString::*m_advanceFunc)();
    417     void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
    418 };
    419 
    420 }
    421 
    422 #endif
    423