Home | History | Annotate | Download | only in text
      1 /*
      2     Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      3 
      4     This library is free software; you can redistribute it and/or
      5     modify it under the terms of the GNU Library General Public
      6     License as published by the Free Software Foundation; either
      7     version 2 of the License, or (at your option) any later version.
      8 
      9     This library is distributed in the hope that it will be useful,
     10     but WITHOUT ANY WARRANTY; without even the implied warranty of
     11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12     Library General Public License for more details.
     13 
     14     You should have received a copy of the GNU Library General Public License
     15     along with this library; see the file COPYING.LIB.  If not, write to
     16     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     17     Boston, MA 02110-1301, USA.
     18 */
     19 
     20 #ifndef SegmentedString_h
     21 #define SegmentedString_h
     22 
     23 #include "platform/PlatformExport.h"
     24 #include "wtf/Deque.h"
     25 #include "wtf/text/StringBuilder.h"
     26 #include "wtf/text/TextPosition.h"
     27 #include "wtf/text/WTFString.h"
     28 
     29 namespace WebCore {
     30 
     31 class SegmentedString;
     32 
     33 class PLATFORM_EXPORT SegmentedSubstring {
     34 public:
     35     SegmentedSubstring()
     36         : m_length(0)
     37         , m_doNotExcludeLineNumbers(true)
     38         , m_is8Bit(false)
     39     {
     40         m_data.string16Ptr = 0;
     41     }
     42 
     43     SegmentedSubstring(const String& str)
     44         : m_length(str.length())
     45         , m_doNotExcludeLineNumbers(true)
     46         , m_string(str)
     47     {
     48         if (m_length) {
     49             if (m_string.is8Bit()) {
     50                 m_is8Bit = true;
     51                 m_data.string8Ptr = m_string.characters8();
     52             } else {
     53                 m_is8Bit = false;
     54                 m_data.string16Ptr = m_string.characters16();
     55             }
     56         } else {
     57             m_is8Bit = false;
     58         }
     59     }
     60 
     61     void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
     62 
     63     bool is8Bit() { return m_is8Bit; }
     64 
     65     bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
     66     bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
     67 
     68     void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
     69 
     70     int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
     71 
     72     void appendTo(StringBuilder& builder) const
     73     {
     74         int offset = m_string.length() - m_length;
     75 
     76         if (!offset) {
     77             if (m_length)
     78                 builder.append(m_string);
     79         } else {
     80             builder.append(m_string.substring(offset, m_length));
     81         }
     82     }
     83 
     84     UChar getCurrentChar8()
     85     {
     86         return *m_data.string8Ptr;
     87     }
     88 
     89     UChar getCurrentChar16()
     90     {
     91         return m_data.string16Ptr ? *m_data.string16Ptr : 0;
     92     }
     93 
     94     UChar incrementAndGetCurrentChar8()
     95     {
     96         ASSERT(m_data.string8Ptr);
     97         return *++m_data.string8Ptr;
     98     }
     99 
    100     UChar incrementAndGetCurrentChar16()
    101     {
    102         ASSERT(m_data.string16Ptr);
    103         return *++m_data.string16Ptr;
    104     }
    105 
    106     String currentSubString(unsigned length)
    107     {
    108         int offset = m_string.length() - m_length;
    109         return m_string.substring(offset, length);
    110     }
    111 
    112     ALWAYS_INLINE UChar getCurrentChar()
    113     {
    114         ASSERT(m_length);
    115         if (is8Bit())
    116             return getCurrentChar8();
    117         return getCurrentChar16();
    118     }
    119 
    120     ALWAYS_INLINE UChar incrementAndGetCurrentChar()
    121     {
    122         ASSERT(m_length);
    123         if (is8Bit())
    124             return incrementAndGetCurrentChar8();
    125         return incrementAndGetCurrentChar16();
    126     }
    127 
    128 public:
    129     union {
    130         const LChar* string8Ptr;
    131         const UChar* string16Ptr;
    132     } m_data;
    133     int m_length;
    134 
    135 private:
    136     bool m_doNotExcludeLineNumbers;
    137     bool m_is8Bit;
    138     String m_string;
    139 };
    140 
    141 class PLATFORM_EXPORT SegmentedString {
    142 public:
    143     SegmentedString()
    144         : m_pushedChar1(0)
    145         , m_pushedChar2(0)
    146         , m_currentChar(0)
    147         , m_numberOfCharactersConsumedPriorToCurrentString(0)
    148         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
    149         , m_currentLine(0)
    150         , m_closed(false)
    151         , m_empty(true)
    152         , m_fastPathFlags(NoFastPath)
    153         , m_advanceFunc(&SegmentedString::advanceEmpty)
    154         , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
    155     {
    156     }
    157 
    158     SegmentedString(const String& str)
    159         : m_pushedChar1(0)
    160         , m_pushedChar2(0)
    161         , m_currentString(str)
    162         , m_currentChar(0)
    163         , m_numberOfCharactersConsumedPriorToCurrentString(0)
    164         , m_numberOfCharactersConsumedPriorToCurrentLine(0)
    165         , m_currentLine(0)
    166         , m_closed(false)
    167         , m_empty(!str.length())
    168         , m_fastPathFlags(NoFastPath)
    169     {
    170         if (m_currentString.m_length)
    171             m_currentChar = m_currentString.getCurrentChar();
    172         updateAdvanceFunctionPointers();
    173     }
    174 
    175     SegmentedString(const SegmentedString&);
    176 
    177     const SegmentedString& operator=(const SegmentedString&);
    178 
    179     void clear();
    180     void close();
    181 
    182     void append(const SegmentedString&);
    183     void prepend(const SegmentedString&);
    184 
    185     bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
    186     void setExcludeLineNumbers();
    187 
    188     void push(UChar c)
    189     {
    190         if (!m_pushedChar1) {
    191             m_pushedChar1 = c;
    192             m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
    193             updateSlowCaseFunctionPointers();
    194         } else {
    195             ASSERT(!m_pushedChar2);
    196             m_pushedChar2 = c;
    197         }
    198     }
    199 
    200     bool isEmpty() const { return m_empty; }
    201     unsigned length() const;
    202 
    203     bool isClosed() const { return m_closed; }
    204 
    205     enum LookAheadResult {
    206         DidNotMatch,
    207         DidMatch,
    208         NotEnoughCharacters,
    209     };
    210 
    211     LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
    212     LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }
    213 
    214     void advance()
    215     {
    216         if (m_fastPathFlags & Use8BitAdvance) {
    217             ASSERT(!m_pushedChar1);
    218             bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
    219             m_currentChar = m_currentString.incrementAndGetCurrentChar8();
    220 
    221             if (!haveOneCharacterLeft)
    222                 return;
    223 
    224             updateSlowCaseFunctionPointers();
    225 
    226             return;
    227         }
    228 
    229         (this->*m_advanceFunc)();
    230     }
    231 
    232     inline void advanceAndUpdateLineNumber()
    233     {
    234         if (m_fastPathFlags & Use8BitAdvance) {
    235             ASSERT(!m_pushedChar1);
    236 
    237             bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
    238             bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
    239 
    240             m_currentChar = m_currentString.incrementAndGetCurrentChar8();
    241 
    242             if (!(haveNewLine | haveOneCharacterLeft))
    243                 return;
    244 
    245             if (haveNewLine) {
    246                 ++m_currentLine;
    247                 m_numberOfCharactersConsumedPriorToCurrentLine =  m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
    248             }
    249 
    250             if (haveOneCharacterLeft)
    251                 updateSlowCaseFunctionPointers();
    252 
    253             return;
    254         }
    255 
    256         (this->*m_advanceAndUpdateLineNumberFunc)();
    257     }
    258 
    259     void advanceAndASSERT(UChar expectedCharacter)
    260     {
    261         ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
    262         advance();
    263     }
    264 
    265     void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
    266     {
    267         ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter));
    268         advance();
    269     }
    270 
    271     void advancePastNonNewline()
    272     {
    273         ASSERT(currentChar() != '\n');
    274         advance();
    275     }
    276 
    277     void advancePastNewlineAndUpdateLineNumber()
    278     {
    279         ASSERT(currentChar() == '\n');
    280         if (!m_pushedChar1 && m_currentString.m_length > 1) {
    281             int newLineFlag = m_currentString.doNotExcludeLineNumbers();
    282             m_currentLine += newLineFlag;
    283             if (newLineFlag)
    284                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
    285             decrementAndCheckLength();
    286             m_currentChar = m_currentString.incrementAndGetCurrentChar();
    287             return;
    288         }
    289         advanceAndUpdateLineNumberSlowCase();
    290     }
    291 
    292     // Writes the consumed characters into consumedCharacters, which must
    293     // have space for at least |count| characters.
    294     void advance(unsigned count, UChar* consumedCharacters);
    295 
    296     bool escaped() const { return m_pushedChar1; }
    297 
    298     int numberOfCharactersConsumed() const
    299     {
    300         int numberOfPushedCharacters = 0;
    301         if (m_pushedChar1) {
    302             ++numberOfPushedCharacters;
    303             if (m_pushedChar2)
    304                 ++numberOfPushedCharacters;
    305         }
    306         return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
    307     }
    308 
    309     String toString() const;
    310 
    311     UChar currentChar() const { return m_currentChar; }
    312 
    313     // The method is moderately slow, comparing to currentLine method.
    314     OrdinalNumber currentColumn() const;
    315     OrdinalNumber currentLine() const;
    316     // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
    317     // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
    318     void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
    319 
    320 private:
    321     enum FastPathFlags {
    322         NoFastPath = 0,
    323         Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
    324         Use8BitAdvance = 1 << 1,
    325     };
    326 
    327     void append(const SegmentedSubstring&);
    328     void prepend(const SegmentedSubstring&);
    329 
    330     void advance8();
    331     void advance16();
    332     void advanceAndUpdateLineNumber8();
    333     void advanceAndUpdateLineNumber16();
    334     void advanceSlowCase();
    335     void advanceAndUpdateLineNumberSlowCase();
    336     void advanceEmpty();
    337     void advanceSubstring();
    338 
    339     void updateSlowCaseFunctionPointers();
    340 
    341     void decrementAndCheckLength()
    342     {
    343         ASSERT(m_currentString.m_length > 1);
    344         if (--m_currentString.m_length == 1)
    345             updateSlowCaseFunctionPointers();
    346     }
    347 
    348     void updateAdvanceFunctionPointers()
    349     {
    350         if ((m_currentString.m_length > 1) && !m_pushedChar1) {
    351             if (m_currentString.is8Bit()) {
    352                 m_advanceFunc = &SegmentedString::advance8;
    353                 m_fastPathFlags = Use8BitAdvance;
    354                 if (m_currentString.doNotExcludeLineNumbers()) {
    355                     m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
    356                     m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
    357                 } else {
    358                     m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
    359                 }
    360                 return;
    361             }
    362 
    363             m_advanceFunc = &SegmentedString::advance16;
    364             m_fastPathFlags = NoFastPath;
    365             if (m_currentString.doNotExcludeLineNumbers())
    366                 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
    367             else
    368                 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
    369             return;
    370         }
    371 
    372         if (!m_currentString.m_length && !isComposite()) {
    373             m_advanceFunc = &SegmentedString::advanceEmpty;
    374             m_fastPathFlags = NoFastPath;
    375             m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
    376         }
    377 
    378         updateSlowCaseFunctionPointers();
    379     }
    380 
    381     inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
    382     {
    383         if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
    384             String currentSubstring = m_currentString.currentSubString(string.length());
    385             if (currentSubstring.startsWith(string, caseSensitive))
    386                 return DidMatch;
    387             return DidNotMatch;
    388         }
    389         return lookAheadSlowCase(string, caseSensitive);
    390     }
    391 
    392     LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
    393     {
    394         unsigned count = string.length();
    395         if (count > length())
    396             return NotEnoughCharacters;
    397         UChar* consumedCharacters;
    398         String consumedString = String::createUninitialized(count, consumedCharacters);
    399         advance(count, consumedCharacters);
    400         LookAheadResult result = DidNotMatch;
    401         if (consumedString.startsWith(string, caseSensitive))
    402             result = DidMatch;
    403         prepend(SegmentedString(consumedString));
    404         return result;
    405     }
    406 
    407     bool isComposite() const { return !m_substrings.isEmpty(); }
    408 
    409     UChar m_pushedChar1;
    410     UChar m_pushedChar2;
    411     SegmentedSubstring m_currentString;
    412     UChar m_currentChar;
    413     int m_numberOfCharactersConsumedPriorToCurrentString;
    414     int m_numberOfCharactersConsumedPriorToCurrentLine;
    415     int m_currentLine;
    416     Deque<SegmentedSubstring> m_substrings;
    417     bool m_closed;
    418     bool m_empty;
    419     unsigned char m_fastPathFlags;
    420     void (SegmentedString::*m_advanceFunc)();
    421     void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
    422 };
    423 
    424 }
    425 
    426 #endif
    427