1 /* 2 Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 3 4 This library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Library General Public 6 License as published by the Free Software Foundation; either 7 version 2 of the License, or (at your option) any later version. 8 9 This library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Library General Public License for more details. 13 14 You should have received a copy of the GNU Library General Public License 15 along with this library; see the file COPYING.LIB. If not, write to 16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 17 Boston, MA 02110-1301, USA. 18 */ 19 20 #ifndef SegmentedString_h 21 #define SegmentedString_h 22 23 #include "platform/PlatformExport.h" 24 #include "wtf/Deque.h" 25 #include "wtf/text/StringBuilder.h" 26 #include "wtf/text/TextPosition.h" 27 #include "wtf/text/WTFString.h" 28 29 namespace WebCore { 30 31 class SegmentedString; 32 33 class PLATFORM_EXPORT SegmentedSubstring { 34 public: 35 SegmentedSubstring() 36 : m_length(0) 37 , m_doNotExcludeLineNumbers(true) 38 , m_is8Bit(false) 39 { 40 m_data.string16Ptr = 0; 41 } 42 43 SegmentedSubstring(const String& str) 44 : m_length(str.length()) 45 , m_doNotExcludeLineNumbers(true) 46 , m_string(str) 47 { 48 if (m_length) { 49 if (m_string.is8Bit()) { 50 m_is8Bit = true; 51 m_data.string8Ptr = m_string.characters8(); 52 } else { 53 m_is8Bit = false; 54 m_data.string16Ptr = m_string.characters16(); 55 } 56 } else { 57 m_is8Bit = false; 58 } 59 } 60 61 void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;} 62 63 bool is8Bit() { return m_is8Bit; } 64 65 bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; } 66 bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; } 67 68 void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } 69 70 int numberOfCharactersConsumed() const { return m_string.length() - m_length; } 71 72 void appendTo(StringBuilder& builder) const 73 { 74 int offset = m_string.length() - m_length; 75 76 if (!offset) { 77 if (m_length) 78 builder.append(m_string); 79 } else { 80 builder.append(m_string.substring(offset, m_length)); 81 } 82 } 83 84 UChar getCurrentChar8() 85 { 86 return *m_data.string8Ptr; 87 } 88 89 UChar getCurrentChar16() 90 { 91 return m_data.string16Ptr ? *m_data.string16Ptr : 0; 92 } 93 94 UChar incrementAndGetCurrentChar8() 95 { 96 ASSERT(m_data.string8Ptr); 97 return *++m_data.string8Ptr; 98 } 99 100 UChar incrementAndGetCurrentChar16() 101 { 102 ASSERT(m_data.string16Ptr); 103 return *++m_data.string16Ptr; 104 } 105 106 String currentSubString(unsigned length) 107 { 108 int offset = m_string.length() - m_length; 109 return m_string.substring(offset, length); 110 } 111 112 ALWAYS_INLINE UChar getCurrentChar() 113 { 114 ASSERT(m_length); 115 if (is8Bit()) 116 return getCurrentChar8(); 117 return getCurrentChar16(); 118 } 119 120 ALWAYS_INLINE UChar incrementAndGetCurrentChar() 121 { 122 ASSERT(m_length); 123 if (is8Bit()) 124 return incrementAndGetCurrentChar8(); 125 return incrementAndGetCurrentChar16(); 126 } 127 128 public: 129 union { 130 const LChar* string8Ptr; 131 const UChar* string16Ptr; 132 } m_data; 133 int m_length; 134 135 private: 136 bool m_doNotExcludeLineNumbers; 137 bool m_is8Bit; 138 String m_string; 139 }; 140 141 class PLATFORM_EXPORT SegmentedString { 142 public: 143 SegmentedString() 144 : m_pushedChar1(0) 145 , m_pushedChar2(0) 146 , m_currentChar(0) 147 , m_numberOfCharactersConsumedPriorToCurrentString(0) 148 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 149 , m_currentLine(0) 150 , m_closed(false) 151 , m_empty(true) 152 , m_fastPathFlags(NoFastPath) 153 , m_advanceFunc(&SegmentedString::advanceEmpty) 154 , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty) 155 { 156 } 157 158 SegmentedString(const String& str) 159 : m_pushedChar1(0) 160 , m_pushedChar2(0) 161 , m_currentString(str) 162 , m_currentChar(0) 163 , m_numberOfCharactersConsumedPriorToCurrentString(0) 164 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 165 , m_currentLine(0) 166 , m_closed(false) 167 , m_empty(!str.length()) 168 , m_fastPathFlags(NoFastPath) 169 { 170 if (m_currentString.m_length) 171 m_currentChar = m_currentString.getCurrentChar(); 172 updateAdvanceFunctionPointers(); 173 } 174 175 SegmentedString(const SegmentedString&); 176 177 const SegmentedString& operator=(const SegmentedString&); 178 179 void clear(); 180 void close(); 181 182 void append(const SegmentedString&); 183 void prepend(const SegmentedString&); 184 185 bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); } 186 void setExcludeLineNumbers(); 187 188 void push(UChar c) 189 { 190 if (!m_pushedChar1) { 191 m_pushedChar1 = c; 192 m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar(); 193 updateSlowCaseFunctionPointers(); 194 } else { 195 ASSERT(!m_pushedChar2); 196 m_pushedChar2 = c; 197 } 198 } 199 200 bool isEmpty() const { return m_empty; } 201 unsigned length() const; 202 203 bool isClosed() const { return m_closed; } 204 205 enum LookAheadResult { 206 DidNotMatch, 207 DidMatch, 208 NotEnoughCharacters, 209 }; 210 211 LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); } 212 LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); } 213 214 void advance() 215 { 216 if (m_fastPathFlags & Use8BitAdvance) { 217 ASSERT(!m_pushedChar1); 218 bool haveOneCharacterLeft = (--m_currentString.m_length == 1); 219 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 220 221 if (!haveOneCharacterLeft) 222 return; 223 224 updateSlowCaseFunctionPointers(); 225 226 return; 227 } 228 229 (this->*m_advanceFunc)(); 230 } 231 232 inline void advanceAndUpdateLineNumber() 233 { 234 if (m_fastPathFlags & Use8BitAdvance) { 235 ASSERT(!m_pushedChar1); 236 237 bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers); 238 bool haveOneCharacterLeft = (--m_currentString.m_length == 1); 239 240 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 241 242 if (!(haveNewLine | haveOneCharacterLeft)) 243 return; 244 245 if (haveNewLine) { 246 ++m_currentLine; 247 m_numberOfCharactersConsumedPriorToCurrentLine = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed(); 248 } 249 250 if (haveOneCharacterLeft) 251 updateSlowCaseFunctionPointers(); 252 253 return; 254 } 255 256 (this->*m_advanceAndUpdateLineNumberFunc)(); 257 } 258 259 void advanceAndASSERT(UChar expectedCharacter) 260 { 261 ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter); 262 advance(); 263 } 264 265 void advanceAndASSERTIgnoringCase(UChar expectedCharacter) 266 { 267 ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter)); 268 advance(); 269 } 270 271 void advancePastNonNewline() 272 { 273 ASSERT(currentChar() != '\n'); 274 advance(); 275 } 276 277 void advancePastNewlineAndUpdateLineNumber() 278 { 279 ASSERT(currentChar() == '\n'); 280 if (!m_pushedChar1 && m_currentString.m_length > 1) { 281 int newLineFlag = m_currentString.doNotExcludeLineNumbers(); 282 m_currentLine += newLineFlag; 283 if (newLineFlag) 284 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 285 decrementAndCheckLength(); 286 m_currentChar = m_currentString.incrementAndGetCurrentChar(); 287 return; 288 } 289 advanceAndUpdateLineNumberSlowCase(); 290 } 291 292 // Writes the consumed characters into consumedCharacters, which must 293 // have space for at least |count| characters. 294 void advance(unsigned count, UChar* consumedCharacters); 295 296 bool escaped() const { return m_pushedChar1; } 297 298 int numberOfCharactersConsumed() const 299 { 300 int numberOfPushedCharacters = 0; 301 if (m_pushedChar1) { 302 ++numberOfPushedCharacters; 303 if (m_pushedChar2) 304 ++numberOfPushedCharacters; 305 } 306 return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters; 307 } 308 309 String toString() const; 310 311 UChar currentChar() const { return m_currentChar; } 312 313 // The method is moderately slow, comparing to currentLine method. 314 OrdinalNumber currentColumn() const; 315 OrdinalNumber currentLine() const; 316 // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog 317 // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. 318 void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength); 319 320 private: 321 enum FastPathFlags { 322 NoFastPath = 0, 323 Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, 324 Use8BitAdvance = 1 << 1, 325 }; 326 327 void append(const SegmentedSubstring&); 328 void prepend(const SegmentedSubstring&); 329 330 void advance8(); 331 void advance16(); 332 void advanceAndUpdateLineNumber8(); 333 void advanceAndUpdateLineNumber16(); 334 void advanceSlowCase(); 335 void advanceAndUpdateLineNumberSlowCase(); 336 void advanceEmpty(); 337 void advanceSubstring(); 338 339 void updateSlowCaseFunctionPointers(); 340 341 void decrementAndCheckLength() 342 { 343 ASSERT(m_currentString.m_length > 1); 344 if (--m_currentString.m_length == 1) 345 updateSlowCaseFunctionPointers(); 346 } 347 348 void updateAdvanceFunctionPointers() 349 { 350 if ((m_currentString.m_length > 1) && !m_pushedChar1) { 351 if (m_currentString.is8Bit()) { 352 m_advanceFunc = &SegmentedString::advance8; 353 m_fastPathFlags = Use8BitAdvance; 354 if (m_currentString.doNotExcludeLineNumbers()) { 355 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8; 356 m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; 357 } else { 358 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8; 359 } 360 return; 361 } 362 363 m_advanceFunc = &SegmentedString::advance16; 364 m_fastPathFlags = NoFastPath; 365 if (m_currentString.doNotExcludeLineNumbers()) 366 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16; 367 else 368 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16; 369 return; 370 } 371 372 if (!m_currentString.m_length && !isComposite()) { 373 m_advanceFunc = &SegmentedString::advanceEmpty; 374 m_fastPathFlags = NoFastPath; 375 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; 376 } 377 378 updateSlowCaseFunctionPointers(); 379 } 380 381 inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive) 382 { 383 if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) { 384 String currentSubstring = m_currentString.currentSubString(string.length()); 385 if (currentSubstring.startsWith(string, caseSensitive)) 386 return DidMatch; 387 return DidNotMatch; 388 } 389 return lookAheadSlowCase(string, caseSensitive); 390 } 391 392 LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive) 393 { 394 unsigned count = string.length(); 395 if (count > length()) 396 return NotEnoughCharacters; 397 UChar* consumedCharacters; 398 String consumedString = String::createUninitialized(count, consumedCharacters); 399 advance(count, consumedCharacters); 400 LookAheadResult result = DidNotMatch; 401 if (consumedString.startsWith(string, caseSensitive)) 402 result = DidMatch; 403 prepend(SegmentedString(consumedString)); 404 return result; 405 } 406 407 bool isComposite() const { return !m_substrings.isEmpty(); } 408 409 UChar m_pushedChar1; 410 UChar m_pushedChar2; 411 SegmentedSubstring m_currentString; 412 UChar m_currentChar; 413 int m_numberOfCharactersConsumedPriorToCurrentString; 414 int m_numberOfCharactersConsumedPriorToCurrentLine; 415 int m_currentLine; 416 Deque<SegmentedSubstring> m_substrings; 417 bool m_closed; 418 bool m_empty; 419 unsigned char m_fastPathFlags; 420 void (SegmentedString::*m_advanceFunc)(); 421 void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)(); 422 }; 423 424 } 425 426 #endif 427