1 /* 2 Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 3 4 This library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Library General Public 6 License as published by the Free Software Foundation; either 7 version 2 of the License, or (at your option) any later version. 8 9 This library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Library General Public License for more details. 13 14 You should have received a copy of the GNU Library General Public License 15 along with this library; see the file COPYING.LIB. If not, write to 16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 17 Boston, MA 02110-1301, USA. 18 */ 19 20 #ifndef SegmentedString_h 21 #define SegmentedString_h 22 23 #include "wtf/Deque.h" 24 #include "wtf/text/StringBuilder.h" 25 #include "wtf/text/TextPosition.h" 26 #include "wtf/text/WTFString.h" 27 28 namespace WebCore { 29 30 class SegmentedString; 31 32 class SegmentedSubstring { 33 public: 34 SegmentedSubstring() 35 : m_length(0) 36 , m_doNotExcludeLineNumbers(true) 37 , m_is8Bit(false) 38 { 39 m_data.string16Ptr = 0; 40 } 41 42 SegmentedSubstring(const String& str) 43 : m_length(str.length()) 44 , m_doNotExcludeLineNumbers(true) 45 , m_string(str) 46 { 47 if (m_length) { 48 if (m_string.is8Bit()) { 49 m_is8Bit = true; 50 m_data.string8Ptr = m_string.characters8(); 51 } else { 52 m_is8Bit = false; 53 m_data.string16Ptr = m_string.characters16(); 54 } 55 } else 56 m_is8Bit = false; 57 } 58 59 void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;} 60 61 bool is8Bit() { return m_is8Bit; } 62 63 bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; } 64 bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; } 65 66 void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } 67 68 int numberOfCharactersConsumed() const { return m_string.length() - m_length; } 69 70 void appendTo(StringBuilder& builder) const 71 { 72 int offset = m_string.length() - m_length; 73 74 if (!offset) { 75 if (m_length) 76 builder.append(m_string); 77 } else 78 builder.append(m_string.substring(offset, m_length)); 79 } 80 81 UChar getCurrentChar8() 82 { 83 return *m_data.string8Ptr; 84 } 85 86 UChar getCurrentChar16() 87 { 88 return m_data.string16Ptr ? *m_data.string16Ptr : 0; 89 } 90 91 UChar incrementAndGetCurrentChar8() 92 { 93 ASSERT(m_data.string8Ptr); 94 return *++m_data.string8Ptr; 95 } 96 97 UChar incrementAndGetCurrentChar16() 98 { 99 ASSERT(m_data.string16Ptr); 100 return *++m_data.string16Ptr; 101 } 102 103 String currentSubString(unsigned length) 104 { 105 int offset = m_string.length() - m_length; 106 return m_string.substring(offset, length); 107 } 108 109 ALWAYS_INLINE UChar getCurrentChar() 110 { 111 ASSERT(m_length); 112 if (is8Bit()) 113 return getCurrentChar8(); 114 return getCurrentChar16(); 115 } 116 117 ALWAYS_INLINE UChar incrementAndGetCurrentChar() 118 { 119 ASSERT(m_length); 120 if (is8Bit()) 121 return incrementAndGetCurrentChar8(); 122 return incrementAndGetCurrentChar16(); 123 } 124 125 public: 126 union { 127 const LChar* string8Ptr; 128 const UChar* string16Ptr; 129 } m_data; 130 int m_length; 131 132 private: 133 bool m_doNotExcludeLineNumbers; 134 bool m_is8Bit; 135 String m_string; 136 }; 137 138 class SegmentedString { 139 public: 140 SegmentedString() 141 : m_pushedChar1(0) 142 , m_pushedChar2(0) 143 , m_currentChar(0) 144 , m_numberOfCharactersConsumedPriorToCurrentString(0) 145 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 146 , m_currentLine(0) 147 , m_closed(false) 148 , m_empty(true) 149 , m_fastPathFlags(NoFastPath) 150 , m_advanceFunc(&SegmentedString::advanceEmpty) 151 , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty) 152 { 153 } 154 155 SegmentedString(const String& str) 156 : m_pushedChar1(0) 157 , m_pushedChar2(0) 158 , m_currentString(str) 159 , m_currentChar(0) 160 , m_numberOfCharactersConsumedPriorToCurrentString(0) 161 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 162 , m_currentLine(0) 163 , m_closed(false) 164 , m_empty(!str.length()) 165 , m_fastPathFlags(NoFastPath) 166 { 167 if (m_currentString.m_length) 168 m_currentChar = m_currentString.getCurrentChar(); 169 updateAdvanceFunctionPointers(); 170 } 171 172 SegmentedString(const SegmentedString&); 173 174 const SegmentedString& operator=(const SegmentedString&); 175 176 void clear(); 177 void close(); 178 179 void append(const SegmentedString&); 180 void prepend(const SegmentedString&); 181 182 bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); } 183 void setExcludeLineNumbers(); 184 185 void push(UChar c) 186 { 187 if (!m_pushedChar1) { 188 m_pushedChar1 = c; 189 m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar(); 190 updateSlowCaseFunctionPointers(); 191 } else { 192 ASSERT(!m_pushedChar2); 193 m_pushedChar2 = c; 194 } 195 } 196 197 bool isEmpty() const { return m_empty; } 198 unsigned length() const; 199 200 bool isClosed() const { return m_closed; } 201 202 enum LookAheadResult { 203 DidNotMatch, 204 DidMatch, 205 NotEnoughCharacters, 206 }; 207 208 LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); } 209 LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); } 210 211 void advance() 212 { 213 if (m_fastPathFlags & Use8BitAdvance) { 214 ASSERT(!m_pushedChar1); 215 bool haveOneCharacterLeft = (--m_currentString.m_length == 1); 216 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 217 218 if (!haveOneCharacterLeft) 219 return; 220 221 updateSlowCaseFunctionPointers(); 222 223 return; 224 } 225 226 (this->*m_advanceFunc)(); 227 } 228 229 inline void advanceAndUpdateLineNumber() 230 { 231 if (m_fastPathFlags & Use8BitAdvance) { 232 ASSERT(!m_pushedChar1); 233 234 bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers); 235 bool haveOneCharacterLeft = (--m_currentString.m_length == 1); 236 237 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 238 239 if (!(haveNewLine | haveOneCharacterLeft)) 240 return; 241 242 if (haveNewLine) { 243 ++m_currentLine; 244 m_numberOfCharactersConsumedPriorToCurrentLine = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed(); 245 } 246 247 if (haveOneCharacterLeft) 248 updateSlowCaseFunctionPointers(); 249 250 return; 251 } 252 253 (this->*m_advanceAndUpdateLineNumberFunc)(); 254 } 255 256 void advanceAndASSERT(UChar expectedCharacter) 257 { 258 ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter); 259 advance(); 260 } 261 262 void advanceAndASSERTIgnoringCase(UChar expectedCharacter) 263 { 264 ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter)); 265 advance(); 266 } 267 268 void advancePastNonNewline() 269 { 270 ASSERT(currentChar() != '\n'); 271 advance(); 272 } 273 274 void advancePastNewlineAndUpdateLineNumber() 275 { 276 ASSERT(currentChar() == '\n'); 277 if (!m_pushedChar1 && m_currentString.m_length > 1) { 278 int newLineFlag = m_currentString.doNotExcludeLineNumbers(); 279 m_currentLine += newLineFlag; 280 if (newLineFlag) 281 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 282 decrementAndCheckLength(); 283 m_currentChar = m_currentString.incrementAndGetCurrentChar(); 284 return; 285 } 286 advanceAndUpdateLineNumberSlowCase(); 287 } 288 289 // Writes the consumed characters into consumedCharacters, which must 290 // have space for at least |count| characters. 291 void advance(unsigned count, UChar* consumedCharacters); 292 293 bool escaped() const { return m_pushedChar1; } 294 295 int numberOfCharactersConsumed() const 296 { 297 int numberOfPushedCharacters = 0; 298 if (m_pushedChar1) { 299 ++numberOfPushedCharacters; 300 if (m_pushedChar2) 301 ++numberOfPushedCharacters; 302 } 303 return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters; 304 } 305 306 String toString() const; 307 308 UChar currentChar() const { return m_currentChar; } 309 310 // The method is moderately slow, comparing to currentLine method. 311 OrdinalNumber currentColumn() const; 312 OrdinalNumber currentLine() const; 313 // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog 314 // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. 315 void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength); 316 317 private: 318 enum FastPathFlags { 319 NoFastPath = 0, 320 Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, 321 Use8BitAdvance = 1 << 1, 322 }; 323 324 void append(const SegmentedSubstring&); 325 void prepend(const SegmentedSubstring&); 326 327 void advance8(); 328 void advance16(); 329 void advanceAndUpdateLineNumber8(); 330 void advanceAndUpdateLineNumber16(); 331 void advanceSlowCase(); 332 void advanceAndUpdateLineNumberSlowCase(); 333 void advanceEmpty(); 334 void advanceSubstring(); 335 336 void updateSlowCaseFunctionPointers(); 337 338 void decrementAndCheckLength() 339 { 340 ASSERT(m_currentString.m_length > 1); 341 if (--m_currentString.m_length == 1) 342 updateSlowCaseFunctionPointers(); 343 } 344 345 void updateAdvanceFunctionPointers() 346 { 347 if ((m_currentString.m_length > 1) && !m_pushedChar1) { 348 if (m_currentString.is8Bit()) { 349 m_advanceFunc = &SegmentedString::advance8; 350 m_fastPathFlags = Use8BitAdvance; 351 if (m_currentString.doNotExcludeLineNumbers()) { 352 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8; 353 m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; 354 } else 355 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8; 356 return; 357 } 358 359 m_advanceFunc = &SegmentedString::advance16; 360 m_fastPathFlags = NoFastPath; 361 if (m_currentString.doNotExcludeLineNumbers()) 362 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16; 363 else 364 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16; 365 return; 366 } 367 368 if (!m_currentString.m_length && !isComposite()) { 369 m_advanceFunc = &SegmentedString::advanceEmpty; 370 m_fastPathFlags = NoFastPath; 371 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; 372 } 373 374 updateSlowCaseFunctionPointers(); 375 } 376 377 inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive) 378 { 379 if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) { 380 String currentSubstring = m_currentString.currentSubString(string.length()); 381 if (currentSubstring.startsWith(string, caseSensitive)) 382 return DidMatch; 383 return DidNotMatch; 384 } 385 return lookAheadSlowCase(string, caseSensitive); 386 } 387 388 LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive) 389 { 390 unsigned count = string.length(); 391 if (count > length()) 392 return NotEnoughCharacters; 393 UChar* consumedCharacters; 394 String consumedString = String::createUninitialized(count, consumedCharacters); 395 advance(count, consumedCharacters); 396 LookAheadResult result = DidNotMatch; 397 if (consumedString.startsWith(string, caseSensitive)) 398 result = DidMatch; 399 prepend(SegmentedString(consumedString)); 400 return result; 401 } 402 403 bool isComposite() const { return !m_substrings.isEmpty(); } 404 405 UChar m_pushedChar1; 406 UChar m_pushedChar2; 407 SegmentedSubstring m_currentString; 408 UChar m_currentChar; 409 int m_numberOfCharactersConsumedPriorToCurrentString; 410 int m_numberOfCharactersConsumedPriorToCurrentLine; 411 int m_currentLine; 412 Deque<SegmentedSubstring> m_substrings; 413 bool m_closed; 414 bool m_empty; 415 unsigned char m_fastPathFlags; 416 void (SegmentedString::*m_advanceFunc)(); 417 void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)(); 418 }; 419 420 } 421 422 #endif 423