Home | History | Annotate | Download | only in text
      1 /*
      2     Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      3 
      4     This library is free software; you can redistribute it and/or
      5     modify it under the terms of the GNU Library General Public
      6     License as published by the Free Software Foundation; either
      7     version 2 of the License, or (at your option) any later version.
      8 
      9     This library is distributed in the hope that it will be useful,
     10     but WITHOUT ANY WARRANTY; without even the implied warranty of
     11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12     Library General Public License for more details.
     13 
     14     You should have received a copy of the GNU Library General Public License
     15     along with this library; see the file COPYING.LIB.  If not, write to
     16     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     17     Boston, MA 02110-1301, USA.
     18 */
     19 
     20 #ifndef SegmentedString_h
     21 #define SegmentedString_h
     22 
     23 #include "platform/PlatformExport.h"
     24 #include "wtf/Deque.h"
     25 #include "wtf/text/StringBuilder.h"
     26 #include "wtf/text/TextPosition.h"
     27 #include "wtf/text/WTFString.h"
     28 
     29 namespace WebCore {
     30 
     31 class SegmentedString;
     32 
     33 class PLATFORM_EXPORT SegmentedSubstring {
     34 public:
     35     SegmentedSubstring()
     36         : m_length(0)
     37         , m_doNotExcludeLineNumbers(true)
     38         , m_is8Bit(false)
     39     {
     40         m_data.string16Ptr = 0;
     41     }
     42 
     43     SegmentedSubstring(const String& str)
     44         : m_length(str.length())
     45         , m_doNotExcludeLineNumbers(true)
     46         , m_string(str)
     47     {
     48         if (m_length) {
     49             if (m_string.is8Bit()) {
     50                 m_is8Bit = true;
     51                 m_data.string8Ptr = m_string.characters8();
     52             } else {
     53                 m_is8Bit = false;
     54                 m_data.string16Ptr = m_string.characters16();
     55             }
     56         } else {
     57             m_is8Bit = false;
     58         }
     59     }
     60 
     61     void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
     62 
     63     bool is8Bit() { return m_is8Bit; }
     64 
     65     bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
     66     bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
     67 
     68     void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
     69 
     70     int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
     71 
     72     void appendTo(StringBuilder& builder) const
     73     {
     74         int offset = m_string.length() - m_length;
     75 
     76         if (!offset) {
     77             if (m_length)
     78                 builder.append(m_string);
     79         } else {
     80             builder.append(m_string.substring(offset, m_length));
     81         }
     82     }
     83 
     84     UChar getCurrentChar8()
     85     {
     86         return *m_data.string8Ptr;
     87     }
     88 
     89     UChar getCurrentChar16()
     90     {
     91         return m_data.string16Ptr ? *m_data.string16Ptr : 0;
     92     }
     93 
     94     UChar incrementAndGetCurrentChar8()
     95     {
     96         ASSERT(m_data.string8Ptr);
     97         return *++m_data.string8Ptr;
     98     }
     99 
    100     UChar incrementAndGetCurrentChar16()
    101     {
    102         ASSERT(m_data.string16Ptr);
    103         return *++m_data.string16Ptr;
    104     }
    105 
    106     String currentSubString(unsigned length)
    107     {
    108         int offset = m_string.length() - m_length;
    109         return m_string.substring(offset, length);
    110     }
    111 
    112     ALWAYS_INLINE UChar getCurrentChar()
    113     {
    114         ASSERT(m_length);
    115         if (is8Bit())
    116             return getCurrentChar8();
    117         return getCurrentChar16();
    118     }
    119 
    120     ALWAYS_INLINE UChar incrementAndGetCurrentChar()
    121     {
    122         ASSERT(m_length);
    123         if (is8Bit())
    124             return incrementAndGetCurrentChar8();
    125         return incrementAndGetCurrentChar16();
    126     }
    127 
    128 public:
    129     union {
    130         const LChar* string8Ptr;
    131         const UChar* string16Ptr;
    132     } m_data;
    133     int m_length;
    134 
    135 private:
    136     bool m_doNotExcludeLineNumbers;
    137     bool m_is8Bit;
    138     String m_string;
    139 };
    140 
    141 class PLATFORM_EXPORT SegmentedString {
    142 public:
    143     SegmentedString()
    144         : m_pushedChar1(0)
    145         , m_pushedChar2(0)
    146         , m_currentChar(0)
    147         , m_numberOfCharactersConsumedPriorToCurrentString(0)
    148         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
    149         , m_currentLine(0)
    150         , m_closed(false)
    151         , m_empty(true)
    152         , m_fastPathFlags(NoFastPath)
    153         , m_advanceFunc(&SegmentedString::advanceEmpty)
    154         , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
    155     {
    156     }
    157 
    158     SegmentedString(const String& str)
    159         : m_pushedChar1(0)
    160         , m_pushedChar2(0)
    161         , m_currentString(str)
    162         , m_currentChar(0)
    163         , m_numberOfCharactersConsumedPriorToCurrentString(0)
    164         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
    165         , m_currentLine(0)
    166         , m_closed(false)
    167         , m_empty(!str.length())
    168         , m_fastPathFlags(NoFastPath)
    169     {
    170         if (m_currentString.m_length)
    171             m_currentChar = m_currentString.getCurrentChar();
    172         updateAdvanceFunctionPointers();
    173     }
    174 
    175     void clear();
    176     void close();
    177 
    178     void append(const SegmentedString&);
    179     void prepend(const SegmentedString&);
    180 
    181     bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
    182     void setExcludeLineNumbers();
    183 
    184     void push(UChar c)
    185     {
    186         if (!m_pushedChar1) {
    187             m_pushedChar1 = c;
    188             m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
    189             updateSlowCaseFunctionPointers();
    190         } else {
    191             ASSERT(!m_pushedChar2);
    192             m_pushedChar2 = c;
    193         }
    194     }
    195 
    196     bool isEmpty() const { return m_empty; }
    197     unsigned length() const;
    198 
    199     bool isClosed() const { return m_closed; }
    200 
    201     enum LookAheadResult {
    202         DidNotMatch,
    203         DidMatch,
    204         NotEnoughCharacters,
    205     };
    206 
    207     LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
    208     LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }
    209 
    210     void advance()
    211     {
    212         if (m_fastPathFlags & Use8BitAdvance) {
    213             ASSERT(!m_pushedChar1);
    214             bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
    215             m_currentChar = m_currentString.incrementAndGetCurrentChar8();
    216 
    217             if (!haveOneCharacterLeft)
    218                 return;
    219 
    220             updateSlowCaseFunctionPointers();
    221 
    222             return;
    223         }
    224 
    225         (this->*m_advanceFunc)();
    226     }
    227 
    228     inline void advanceAndUpdateLineNumber()
    229     {
    230         if (m_fastPathFlags & Use8BitAdvance) {
    231             ASSERT(!m_pushedChar1);
    232 
    233             bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
    234             bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
    235 
    236             m_currentChar = m_currentString.incrementAndGetCurrentChar8();
    237 
    238             if (!(haveNewLine | haveOneCharacterLeft))
    239                 return;
    240 
    241             if (haveNewLine) {
    242                 ++m_currentLine;
    243                 m_numberOfCharactersConsumedPriorToCurrentLine =  m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
    244             }
    245 
    246             if (haveOneCharacterLeft)
    247                 updateSlowCaseFunctionPointers();
    248 
    249             return;
    250         }
    251 
    252         (this->*m_advanceAndUpdateLineNumberFunc)();
    253     }
    254 
    255     void advanceAndASSERT(UChar expectedCharacter)
    256     {
    257         ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
    258         advance();
    259     }
    260 
    261     void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
    262     {
    263         ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter));
    264         advance();
    265     }
    266 
    267     void advancePastNonNewline()
    268     {
    269         ASSERT(currentChar() != '\n');
    270         advance();
    271     }
    272 
    273     void advancePastNewlineAndUpdateLineNumber()
    274     {
    275         ASSERT(currentChar() == '\n');
    276         if (!m_pushedChar1 && m_currentString.m_length > 1) {
    277             int newLineFlag = m_currentString.doNotExcludeLineNumbers();
    278             m_currentLine += newLineFlag;
    279             if (newLineFlag)
    280                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
    281             decrementAndCheckLength();
    282             m_currentChar = m_currentString.incrementAndGetCurrentChar();
    283             return;
    284         }
    285         advanceAndUpdateLineNumberSlowCase();
    286     }
    287 
    288     // Writes the consumed characters into consumedCharacters, which must
    289     // have space for at least |count| characters.
    290     void advance(unsigned count, UChar* consumedCharacters);
    291 
    292     bool escaped() const { return m_pushedChar1; }
    293 
    294     int numberOfCharactersConsumed() const
    295     {
    296         int numberOfPushedCharacters = 0;
    297         if (m_pushedChar1) {
    298             ++numberOfPushedCharacters;
    299             if (m_pushedChar2)
    300                 ++numberOfPushedCharacters;
    301         }
    302         return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
    303     }
    304 
    305     String toString() const;
    306 
    307     UChar currentChar() const { return m_currentChar; }
    308 
    309     // The method is moderately slow, comparing to currentLine method.
    310     OrdinalNumber currentColumn() const;
    311     OrdinalNumber currentLine() const;
    312     // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
    313     // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
    314     void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
    315 
    316 private:
    317     enum FastPathFlags {
    318         NoFastPath = 0,
    319         Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
    320         Use8BitAdvance = 1 << 1,
    321     };
    322 
    323     void append(const SegmentedSubstring&);
    324     void prepend(const SegmentedSubstring&);
    325 
    326     void advance8();
    327     void advance16();
    328     void advanceAndUpdateLineNumber8();
    329     void advanceAndUpdateLineNumber16();
    330     void advanceSlowCase();
    331     void advanceAndUpdateLineNumberSlowCase();
    332     void advanceEmpty();
    333     void advanceSubstring();
    334 
    335     void updateSlowCaseFunctionPointers();
    336 
    337     void decrementAndCheckLength()
    338     {
    339         ASSERT(m_currentString.m_length > 1);
    340         if (--m_currentString.m_length == 1)
    341             updateSlowCaseFunctionPointers();
    342     }
    343 
    344     void updateAdvanceFunctionPointers()
    345     {
    346         if ((m_currentString.m_length > 1) && !m_pushedChar1) {
    347             if (m_currentString.is8Bit()) {
    348                 m_advanceFunc = &SegmentedString::advance8;
    349                 m_fastPathFlags = Use8BitAdvance;
    350                 if (m_currentString.doNotExcludeLineNumbers()) {
    351                     m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
    352                     m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
    353                 } else {
    354                     m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
    355                 }
    356                 return;
    357             }
    358 
    359             m_advanceFunc = &SegmentedString::advance16;
    360             m_fastPathFlags = NoFastPath;
    361             if (m_currentString.doNotExcludeLineNumbers())
    362                 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
    363             else
    364                 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
    365             return;
    366         }
    367 
    368         if (!m_currentString.m_length && !isComposite()) {
    369             m_advanceFunc = &SegmentedString::advanceEmpty;
    370             m_fastPathFlags = NoFastPath;
    371             m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
    372         }
    373 
    374         updateSlowCaseFunctionPointers();
    375     }
    376 
    377     inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
    378     {
    379         if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
    380             String currentSubstring = m_currentString.currentSubString(string.length());
    381             if (currentSubstring.startsWith(string, caseSensitive))
    382                 return DidMatch;
    383             return DidNotMatch;
    384         }
    385         return lookAheadSlowCase(string, caseSensitive);
    386     }
    387 
    388     LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
    389     {
    390         unsigned count = string.length();
    391         if (count > length())
    392             return NotEnoughCharacters;
    393         UChar* consumedCharacters;
    394         String consumedString = String::createUninitialized(count, consumedCharacters);
    395         advance(count, consumedCharacters);
    396         LookAheadResult result = DidNotMatch;
    397         if (consumedString.startsWith(string, caseSensitive))
    398             result = DidMatch;
    399         prepend(SegmentedString(consumedString));
    400         return result;
    401     }
    402 
    403     bool isComposite() const { return !m_substrings.isEmpty(); }
    404 
    405     UChar m_pushedChar1;
    406     UChar m_pushedChar2;
    407     SegmentedSubstring m_currentString;
    408     UChar m_currentChar;
    409     int m_numberOfCharactersConsumedPriorToCurrentString;
    410     int m_numberOfCharactersConsumedPriorToCurrentLine;
    411     int m_currentLine;
    412     Deque<SegmentedSubstring> m_substrings;
    413     bool m_closed;
    414     bool m_empty;
    415     unsigned char m_fastPathFlags;
    416     void (SegmentedString::*m_advanceFunc)();
    417     void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
    418 };
    419 
    420 }
    421 
    422 #endif
    423