1 /* 2 Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 3 4 This library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Library General Public 6 License as published by the Free Software Foundation; either 7 version 2 of the License, or (at your option) any later version. 8 9 This library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Library General Public License for more details. 13 14 You should have received a copy of the GNU Library General Public License 15 along with this library; see the file COPYING.LIB. If not, write to 16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 17 Boston, MA 02110-1301, USA. 18 */ 19 20 #ifndef SegmentedString_h 21 #define SegmentedString_h 22 23 #include "platform/PlatformExport.h" 24 #include "wtf/Deque.h" 25 #include "wtf/text/StringBuilder.h" 26 #include "wtf/text/TextPosition.h" 27 #include "wtf/text/WTFString.h" 28 29 namespace WebCore { 30 31 class SegmentedString; 32 33 class PLATFORM_EXPORT SegmentedSubstring { 34 public: 35 SegmentedSubstring() 36 : m_length(0) 37 , m_doNotExcludeLineNumbers(true) 38 , m_is8Bit(false) 39 { 40 m_data.string16Ptr = 0; 41 } 42 43 SegmentedSubstring(const String& str) 44 : m_length(str.length()) 45 , m_doNotExcludeLineNumbers(true) 46 , m_string(str) 47 { 48 if (m_length) { 49 if (m_string.is8Bit()) { 50 m_is8Bit = true; 51 m_data.string8Ptr = m_string.characters8(); 52 } else { 53 m_is8Bit = false; 54 m_data.string16Ptr = m_string.characters16(); 55 } 56 } else { 57 m_is8Bit = false; 58 } 59 } 60 61 void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;} 62 63 bool is8Bit() { return m_is8Bit; } 64 65 bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; } 66 bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; } 67 68 void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } 69 70 int numberOfCharactersConsumed() const { return m_string.length() - m_length; } 71 72 void appendTo(StringBuilder& builder) const 73 { 74 int offset = m_string.length() - m_length; 75 76 if (!offset) { 77 if (m_length) 78 builder.append(m_string); 79 } else { 80 builder.append(m_string.substring(offset, m_length)); 81 } 82 } 83 84 UChar getCurrentChar8() 85 { 86 return *m_data.string8Ptr; 87 } 88 89 UChar getCurrentChar16() 90 { 91 return m_data.string16Ptr ? *m_data.string16Ptr : 0; 92 } 93 94 UChar incrementAndGetCurrentChar8() 95 { 96 ASSERT(m_data.string8Ptr); 97 return *++m_data.string8Ptr; 98 } 99 100 UChar incrementAndGetCurrentChar16() 101 { 102 ASSERT(m_data.string16Ptr); 103 return *++m_data.string16Ptr; 104 } 105 106 String currentSubString(unsigned length) 107 { 108 int offset = m_string.length() - m_length; 109 return m_string.substring(offset, length); 110 } 111 112 ALWAYS_INLINE UChar getCurrentChar() 113 { 114 ASSERT(m_length); 115 if (is8Bit()) 116 return getCurrentChar8(); 117 return getCurrentChar16(); 118 } 119 120 ALWAYS_INLINE UChar incrementAndGetCurrentChar() 121 { 122 ASSERT(m_length); 123 if (is8Bit()) 124 return incrementAndGetCurrentChar8(); 125 return incrementAndGetCurrentChar16(); 126 } 127 128 public: 129 union { 130 const LChar* string8Ptr; 131 const UChar* string16Ptr; 132 } m_data; 133 int m_length; 134 135 private: 136 bool m_doNotExcludeLineNumbers; 137 bool m_is8Bit; 138 String m_string; 139 }; 140 141 class PLATFORM_EXPORT SegmentedString { 142 public: 143 SegmentedString() 144 : m_pushedChar1(0) 145 , m_pushedChar2(0) 146 , m_currentChar(0) 147 , m_numberOfCharactersConsumedPriorToCurrentString(0) 148 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 149 , m_currentLine(0) 150 , m_closed(false) 151 , m_empty(true) 152 , m_fastPathFlags(NoFastPath) 153 , m_advanceFunc(&SegmentedString::advanceEmpty) 154 , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty) 155 { 156 } 157 158 SegmentedString(const String& str) 159 : m_pushedChar1(0) 160 , m_pushedChar2(0) 161 , m_currentString(str) 162 , m_currentChar(0) 163 , m_numberOfCharactersConsumedPriorToCurrentString(0) 164 , m_numberOfCharactersConsumedPriorToCurrentLine(0) 165 , m_currentLine(0) 166 , m_closed(false) 167 , m_empty(!str.length()) 168 , m_fastPathFlags(NoFastPath) 169 { 170 if (m_currentString.m_length) 171 m_currentChar = m_currentString.getCurrentChar(); 172 updateAdvanceFunctionPointers(); 173 } 174 175 void clear(); 176 void close(); 177 178 void append(const SegmentedString&); 179 void prepend(const SegmentedString&); 180 181 bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); } 182 void setExcludeLineNumbers(); 183 184 void push(UChar c) 185 { 186 if (!m_pushedChar1) { 187 m_pushedChar1 = c; 188 m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar(); 189 updateSlowCaseFunctionPointers(); 190 } else { 191 ASSERT(!m_pushedChar2); 192 m_pushedChar2 = c; 193 } 194 } 195 196 bool isEmpty() const { return m_empty; } 197 unsigned length() const; 198 199 bool isClosed() const { return m_closed; } 200 201 enum LookAheadResult { 202 DidNotMatch, 203 DidMatch, 204 NotEnoughCharacters, 205 }; 206 207 LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); } 208 LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); } 209 210 void advance() 211 { 212 if (m_fastPathFlags & Use8BitAdvance) { 213 ASSERT(!m_pushedChar1); 214 bool haveOneCharacterLeft = (--m_currentString.m_length == 1); 215 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 216 217 if (!haveOneCharacterLeft) 218 return; 219 220 updateSlowCaseFunctionPointers(); 221 222 return; 223 } 224 225 (this->*m_advanceFunc)(); 226 } 227 228 inline void advanceAndUpdateLineNumber() 229 { 230 if (m_fastPathFlags & Use8BitAdvance) { 231 ASSERT(!m_pushedChar1); 232 233 bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers); 234 bool haveOneCharacterLeft = (--m_currentString.m_length == 1); 235 236 m_currentChar = m_currentString.incrementAndGetCurrentChar8(); 237 238 if (!(haveNewLine | haveOneCharacterLeft)) 239 return; 240 241 if (haveNewLine) { 242 ++m_currentLine; 243 m_numberOfCharactersConsumedPriorToCurrentLine = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed(); 244 } 245 246 if (haveOneCharacterLeft) 247 updateSlowCaseFunctionPointers(); 248 249 return; 250 } 251 252 (this->*m_advanceAndUpdateLineNumberFunc)(); 253 } 254 255 void advanceAndASSERT(UChar expectedCharacter) 256 { 257 ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter); 258 advance(); 259 } 260 261 void advanceAndASSERTIgnoringCase(UChar expectedCharacter) 262 { 263 ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter)); 264 advance(); 265 } 266 267 void advancePastNonNewline() 268 { 269 ASSERT(currentChar() != '\n'); 270 advance(); 271 } 272 273 void advancePastNewlineAndUpdateLineNumber() 274 { 275 ASSERT(currentChar() == '\n'); 276 if (!m_pushedChar1 && m_currentString.m_length > 1) { 277 int newLineFlag = m_currentString.doNotExcludeLineNumbers(); 278 m_currentLine += newLineFlag; 279 if (newLineFlag) 280 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; 281 decrementAndCheckLength(); 282 m_currentChar = m_currentString.incrementAndGetCurrentChar(); 283 return; 284 } 285 advanceAndUpdateLineNumberSlowCase(); 286 } 287 288 // Writes the consumed characters into consumedCharacters, which must 289 // have space for at least |count| characters. 290 void advance(unsigned count, UChar* consumedCharacters); 291 292 bool escaped() const { return m_pushedChar1; } 293 294 int numberOfCharactersConsumed() const 295 { 296 int numberOfPushedCharacters = 0; 297 if (m_pushedChar1) { 298 ++numberOfPushedCharacters; 299 if (m_pushedChar2) 300 ++numberOfPushedCharacters; 301 } 302 return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters; 303 } 304 305 String toString() const; 306 307 UChar currentChar() const { return m_currentChar; } 308 309 // The method is moderately slow, comparing to currentLine method. 310 OrdinalNumber currentColumn() const; 311 OrdinalNumber currentLine() const; 312 // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog 313 // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. 314 void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength); 315 316 private: 317 enum FastPathFlags { 318 NoFastPath = 0, 319 Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, 320 Use8BitAdvance = 1 << 1, 321 }; 322 323 void append(const SegmentedSubstring&); 324 void prepend(const SegmentedSubstring&); 325 326 void advance8(); 327 void advance16(); 328 void advanceAndUpdateLineNumber8(); 329 void advanceAndUpdateLineNumber16(); 330 void advanceSlowCase(); 331 void advanceAndUpdateLineNumberSlowCase(); 332 void advanceEmpty(); 333 void advanceSubstring(); 334 335 void updateSlowCaseFunctionPointers(); 336 337 void decrementAndCheckLength() 338 { 339 ASSERT(m_currentString.m_length > 1); 340 if (--m_currentString.m_length == 1) 341 updateSlowCaseFunctionPointers(); 342 } 343 344 void updateAdvanceFunctionPointers() 345 { 346 if ((m_currentString.m_length > 1) && !m_pushedChar1) { 347 if (m_currentString.is8Bit()) { 348 m_advanceFunc = &SegmentedString::advance8; 349 m_fastPathFlags = Use8BitAdvance; 350 if (m_currentString.doNotExcludeLineNumbers()) { 351 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8; 352 m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; 353 } else { 354 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8; 355 } 356 return; 357 } 358 359 m_advanceFunc = &SegmentedString::advance16; 360 m_fastPathFlags = NoFastPath; 361 if (m_currentString.doNotExcludeLineNumbers()) 362 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16; 363 else 364 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16; 365 return; 366 } 367 368 if (!m_currentString.m_length && !isComposite()) { 369 m_advanceFunc = &SegmentedString::advanceEmpty; 370 m_fastPathFlags = NoFastPath; 371 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; 372 } 373 374 updateSlowCaseFunctionPointers(); 375 } 376 377 inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive) 378 { 379 if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) { 380 String currentSubstring = m_currentString.currentSubString(string.length()); 381 if (currentSubstring.startsWith(string, caseSensitive)) 382 return DidMatch; 383 return DidNotMatch; 384 } 385 return lookAheadSlowCase(string, caseSensitive); 386 } 387 388 LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive) 389 { 390 unsigned count = string.length(); 391 if (count > length()) 392 return NotEnoughCharacters; 393 UChar* consumedCharacters; 394 String consumedString = String::createUninitialized(count, consumedCharacters); 395 advance(count, consumedCharacters); 396 LookAheadResult result = DidNotMatch; 397 if (consumedString.startsWith(string, caseSensitive)) 398 result = DidMatch; 399 prepend(SegmentedString(consumedString)); 400 return result; 401 } 402 403 bool isComposite() const { return !m_substrings.isEmpty(); } 404 405 UChar m_pushedChar1; 406 UChar m_pushedChar2; 407 SegmentedSubstring m_currentString; 408 UChar m_currentChar; 409 int m_numberOfCharactersConsumedPriorToCurrentString; 410 int m_numberOfCharactersConsumedPriorToCurrentLine; 411 int m_currentLine; 412 Deque<SegmentedSubstring> m_substrings; 413 bool m_closed; 414 bool m_empty; 415 unsigned char m_fastPathFlags; 416 void (SegmentedString::*m_advanceFunc)(); 417 void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)(); 418 }; 419 420 } 421 422 #endif 423