1 /* 2 * Copyright (C) 2000 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22 #ifndef BidiResolver_h 23 #define BidiResolver_h 24 25 #include "platform/text/BidiContext.h" 26 #include "platform/text/BidiRunList.h" 27 #include "platform/text/TextDirection.h" 28 #include "wtf/HashMap.h" 29 #include "wtf/Noncopyable.h" 30 #include "wtf/PassRefPtr.h" 31 #include "wtf/Vector.h" 32 33 namespace WebCore { 34 35 template <class Iterator> struct MidpointState { 36 MidpointState() 37 { 38 reset(); 39 } 40 41 void reset() 42 { 43 numMidpoints = 0; 44 currentMidpoint = 0; 45 betweenMidpoints = false; 46 } 47 48 // The goal is to reuse the line state across multiple 49 // lines so we just keep an array around for midpoints and never clear it across multiple 50 // lines. We track the number of items and position using the two other variables. 51 Vector<Iterator> midpoints; 52 unsigned numMidpoints; 53 unsigned currentMidpoint; 54 bool betweenMidpoints; 55 }; 56 57 // The BidiStatus at a given position (typically the end of a line) can 58 // be cached and then used to restart bidi resolution at that position. 59 struct BidiStatus { 60 BidiStatus() 61 : eor(WTF::Unicode::OtherNeutral) 62 , lastStrong(WTF::Unicode::OtherNeutral) 63 , last(WTF::Unicode::OtherNeutral) 64 { 65 } 66 67 // Creates a BidiStatus representing a new paragraph root with a default direction. 68 // Uses TextDirection as it only has two possibilities instead of WTF::Unicode::Direction which has 19. 69 BidiStatus(TextDirection textDirection, bool isOverride) 70 { 71 WTF::Unicode::Direction direction = textDirection == LTR ? WTF::Unicode::LeftToRight : WTF::Unicode::RightToLeft; 72 eor = lastStrong = last = direction; 73 context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride); 74 } 75 76 BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext) 77 : eor(eorDir) 78 , lastStrong(lastStrongDir) 79 , last(lastDir) 80 , context(bidiContext) 81 { 82 } 83 84 WTF::Unicode::Direction eor; 85 WTF::Unicode::Direction lastStrong; 86 WTF::Unicode::Direction last; 87 RefPtr<BidiContext> context; 88 }; 89 90 class BidiEmbedding { 91 public: 92 BidiEmbedding(WTF::Unicode::Direction direction, BidiEmbeddingSource source) 93 : m_direction(direction) 94 , m_source(source) 95 { 96 } 97 98 WTF::Unicode::Direction direction() const { return m_direction; } 99 BidiEmbeddingSource source() const { return m_source; } 100 private: 101 WTF::Unicode::Direction m_direction; 102 BidiEmbeddingSource m_source; 103 }; 104 105 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2) 106 { 107 return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context); 108 } 109 110 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2) 111 { 112 return !(status1 == status2); 113 } 114 115 struct BidiCharacterRun { 116 BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir) 117 : m_override(context->override()) 118 , m_next(0) 119 , m_start(start) 120 , m_stop(stop) 121 { 122 ASSERT(m_start <= m_stop); 123 if (dir == WTF::Unicode::OtherNeutral) 124 dir = context->dir(); 125 126 m_level = context->level(); 127 128 // add level of run (cases I1 & I2) 129 if (m_level % 2) { 130 if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber) 131 m_level++; 132 } else { 133 if (dir == WTF::Unicode::RightToLeft) 134 m_level++; 135 else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber) 136 m_level += 2; 137 } 138 } 139 140 int start() const { return m_start; } 141 int stop() const { return m_stop; } 142 unsigned char level() const { return m_level; } 143 bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; } 144 bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; } 145 146 BidiCharacterRun* next() const { return m_next; } 147 void setNext(BidiCharacterRun* next) { m_next = next; } 148 149 // Do not add anything apart from bitfields until after m_next. See https://bugs.webkit.org/show_bug.cgi?id=100173 150 bool m_override : 1; 151 bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit. 152 bool m_startsSegment : 1; // Same comment as m_hasHyphen. 153 unsigned char m_level; 154 BidiCharacterRun* m_next; 155 int m_start; 156 int m_stop; 157 }; 158 159 enum VisualDirectionOverride { 160 NoVisualOverride, 161 VisualLeftToRightOverride, 162 VisualRightToLeftOverride 163 }; 164 165 // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm 166 // http://unicode.org/reports/tr9 167 template <class Iterator, class Run> class BidiResolver { 168 WTF_MAKE_NONCOPYABLE(BidiResolver); 169 public: 170 BidiResolver() 171 : m_direction(WTF::Unicode::OtherNeutral) 172 , m_reachedEndOfLine(false) 173 , m_emptyRun(true) 174 , m_nestedIsolateCount(0) 175 { 176 } 177 178 #ifndef NDEBUG 179 ~BidiResolver(); 180 #endif 181 182 const Iterator& position() const { return m_current; } 183 Iterator& position() { return m_current; } 184 void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; } 185 void setPosition(const Iterator& position, unsigned nestedIsolatedCount) 186 { 187 m_current = position; 188 m_nestedIsolateCount = nestedIsolatedCount; 189 } 190 191 BidiContext* context() const { return m_status.context.get(); } 192 void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; } 193 194 void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; } 195 void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; } 196 void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; } 197 198 WTF::Unicode::Direction dir() const { return m_direction; } 199 void setDir(WTF::Unicode::Direction d) { m_direction = d; } 200 201 const BidiStatus& status() const { return m_status; } 202 void setStatus(const BidiStatus s) 203 { 204 ASSERT(s.context); 205 m_status = s; 206 } 207 208 MidpointState<Iterator>& midpointState() { return m_midpointState; } 209 210 // The current algorithm handles nested isolates one layer of nesting at a time. 211 // But when we layout each isolated span, we will walk into (and ignore) all 212 // child isolated spans. 213 void enterIsolate() { m_nestedIsolateCount++; } 214 void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; } 215 bool inIsolate() const { return m_nestedIsolateCount; } 216 217 void embed(WTF::Unicode::Direction, BidiEmbeddingSource); 218 bool commitExplicitEmbedding(); 219 220 void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false); 221 222 BidiRunList<Run>& runs() { return m_runs; } 223 224 // FIXME: This used to be part of deleteRuns() but was a layering violation. 225 // It's unclear if this is still needed. 226 void markCurrentRunEmpty() { m_emptyRun = true; } 227 228 Vector<Run*>& isolatedRuns() { return m_isolatedRuns; } 229 230 bool isEndOfLine(const Iterator& end) { return m_current == end || m_current.atEnd(); } 231 232 TextDirection determineParagraphDirectionality(bool* hasStrongDirectionality = 0); 233 234 void setMidpointStateForIsolatedRun(Run*, const MidpointState<Iterator>&); 235 MidpointState<Iterator> midpointStateForIsolatedRun(Run*); 236 237 Iterator endOfLine() const { return m_endOfLine; } 238 239 protected: 240 void increment() { m_current.increment(); } 241 // FIXME: Instead of InlineBidiResolvers subclassing this method, we should 242 // pass in some sort of Traits object which knows how to create runs for appending. 243 void appendRun(); 244 245 Iterator m_current; 246 // sor and eor are "start of run" and "end of run" respectively and correpond 247 // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7 248 Iterator m_sor; // Points to the first character in the current run. 249 Iterator m_eor; // Points to the last character in the current run. 250 Iterator m_last; 251 BidiStatus m_status; 252 WTF::Unicode::Direction m_direction; 253 // m_endOfRunAtEndOfLine is "the position last eor in the end of line" 254 Iterator m_endOfRunAtEndOfLine; 255 Iterator m_endOfLine; 256 bool m_reachedEndOfLine; 257 Iterator m_lastBeforeET; // Before a EuropeanNumberTerminator 258 bool m_emptyRun; 259 260 // FIXME: This should not belong to the resolver, but rather be passed 261 // into createBidiRunsForLine by the caller. 262 BidiRunList<Run> m_runs; 263 264 MidpointState<Iterator> m_midpointState; 265 266 unsigned m_nestedIsolateCount; 267 Vector<Run*> m_isolatedRuns; 268 269 private: 270 void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to); 271 void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from); 272 void checkDirectionInLowerRaiseEmbeddingLevel(); 273 274 void updateStatusLastFromCurrentDirection(WTF::Unicode::Direction); 275 void reorderRunsFromLevels(); 276 277 Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence; 278 HashMap<Run *, MidpointState<Iterator> > m_midpointStateForIsolatedRun; 279 }; 280 281 #ifndef NDEBUG 282 template <class Iterator, class Run> 283 BidiResolver<Iterator, Run>::~BidiResolver() 284 { 285 // The owner of this resolver should have handled the isolated runs. 286 ASSERT(m_isolatedRuns.isEmpty()); 287 } 288 #endif 289 290 template <class Iterator, class Run> 291 void BidiResolver<Iterator, Run>::appendRun() 292 { 293 if (!m_emptyRun && !m_eor.atEnd()) { 294 unsigned startOffset = m_sor.offset(); 295 unsigned endOffset = m_eor.offset(); 296 297 if (!m_endOfRunAtEndOfLine.atEnd() && endOffset >= m_endOfRunAtEndOfLine.offset()) { 298 m_reachedEndOfLine = true; 299 endOffset = m_endOfRunAtEndOfLine.offset(); 300 } 301 302 if (endOffset >= startOffset) 303 m_runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction)); 304 305 m_eor.increment(); 306 m_sor = m_eor; 307 } 308 309 m_direction = WTF::Unicode::OtherNeutral; 310 m_status.eor = WTF::Unicode::OtherNeutral; 311 } 312 313 template <class Iterator, class Run> 314 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction dir, BidiEmbeddingSource source) 315 { 316 // Isolated spans compute base directionality during their own UBA run. 317 // Do not insert fake embed characters once we enter an isolated span. 318 ASSERT(!inIsolate()); 319 using namespace WTF::Unicode; 320 321 ASSERT(dir == PopDirectionalFormat || dir == LeftToRightEmbedding || dir == LeftToRightOverride || dir == RightToLeftEmbedding || dir == RightToLeftOverride); 322 m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source)); 323 } 324 325 template <class Iterator, class Run> 326 void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel() 327 { 328 using namespace WTF::Unicode; 329 330 ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd()); 331 ASSERT(m_status.last != NonSpacingMark 332 && m_status.last != BoundaryNeutral 333 && m_status.last != RightToLeftEmbedding 334 && m_status.last != LeftToRightEmbedding 335 && m_status.last != RightToLeftOverride 336 && m_status.last != LeftToRightOverride 337 && m_status.last != PopDirectionalFormat); 338 if (m_direction == OtherNeutral) 339 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; 340 } 341 342 template <class Iterator, class Run> 343 void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from) 344 { 345 using namespace WTF::Unicode; 346 347 if (!m_emptyRun && m_eor != m_last) { 348 checkDirectionInLowerRaiseEmbeddingLevel(); 349 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last 350 if (from == LeftToRight) { 351 // bidi.sor ... bidi.eor ... bidi.last L 352 if (m_status.eor == EuropeanNumber) { 353 if (m_status.lastStrong != LeftToRight) { 354 m_direction = EuropeanNumber; 355 appendRun(); 356 } 357 } else if (m_status.eor == ArabicNumber) { 358 m_direction = ArabicNumber; 359 appendRun(); 360 } else if (m_status.lastStrong != LeftToRight) { 361 appendRun(); 362 m_direction = LeftToRight; 363 } 364 } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) { 365 appendRun(); 366 m_direction = RightToLeft; 367 } 368 m_eor = m_last; 369 } 370 371 appendRun(); 372 m_emptyRun = true; 373 374 // sor for the new run is determined by the higher level (rule X10) 375 setLastDir(from); 376 setLastStrongDir(from); 377 m_eor = Iterator(); 378 } 379 380 template <class Iterator, class Run> 381 void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to) 382 { 383 using namespace WTF::Unicode; 384 385 if (!m_emptyRun && m_eor != m_last) { 386 checkDirectionInLowerRaiseEmbeddingLevel(); 387 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last 388 if (to == LeftToRight) { 389 // bidi.sor ... bidi.eor ... bidi.last L 390 if (m_status.eor == EuropeanNumber) { 391 if (m_status.lastStrong != LeftToRight) { 392 m_direction = EuropeanNumber; 393 appendRun(); 394 } 395 } else if (m_status.eor == ArabicNumber) { 396 m_direction = ArabicNumber; 397 appendRun(); 398 } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) { 399 appendRun(); 400 m_direction = LeftToRight; 401 } 402 } else if (m_status.eor == ArabicNumber 403 || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft)) 404 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) { 405 appendRun(); 406 m_direction = RightToLeft; 407 } 408 m_eor = m_last; 409 } 410 411 appendRun(); 412 m_emptyRun = true; 413 414 setLastDir(to); 415 setLastStrongDir(to); 416 m_eor = Iterator(); 417 } 418 419 template <class Iterator, class Run> 420 bool BidiResolver<Iterator, Run>::commitExplicitEmbedding() 421 { 422 // When we're "inIsolate()" we're resolving the parent context which 423 // ignores (skips over) the isolated content, including embedding levels. 424 // We should never accrue embedding levels while skipping over isolated content. 425 ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty()); 426 427 using namespace WTF::Unicode; 428 429 unsigned char fromLevel = context()->level(); 430 RefPtr<BidiContext> toContext = context(); 431 432 for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) { 433 BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i]; 434 if (embedding.direction() == PopDirectionalFormat) { 435 if (BidiContext* parentContext = toContext->parent()) 436 toContext = parentContext; 437 } else { 438 Direction direction = (embedding.direction() == RightToLeftEmbedding || embedding.direction() == RightToLeftOverride) ? RightToLeft : LeftToRight; 439 bool override = embedding.direction() == LeftToRightOverride || embedding.direction() == RightToLeftOverride; 440 unsigned char level = toContext->level(); 441 if (direction == RightToLeft) 442 level = nextGreaterOddLevel(level); 443 else 444 level = nextGreaterEvenLevel(level); 445 if (level < BidiContext::kMaxLevel) 446 toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get()); 447 } 448 } 449 450 unsigned char toLevel = toContext->level(); 451 452 if (toLevel > fromLevel) 453 raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight); 454 else if (toLevel < fromLevel) 455 lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight); 456 457 setContext(toContext); 458 459 m_currentExplicitEmbeddingSequence.clear(); 460 461 return fromLevel != toLevel; 462 } 463 464 template <class Iterator, class Run> 465 inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent) 466 { 467 using namespace WTF::Unicode; 468 switch (dirCurrent) { 469 case EuropeanNumberTerminator: 470 if (m_status.last != EuropeanNumber) 471 m_status.last = EuropeanNumberTerminator; 472 break; 473 case EuropeanNumberSeparator: 474 case CommonNumberSeparator: 475 case SegmentSeparator: 476 case WhiteSpaceNeutral: 477 case OtherNeutral: 478 switch (m_status.last) { 479 case LeftToRight: 480 case RightToLeft: 481 case RightToLeftArabic: 482 case EuropeanNumber: 483 case ArabicNumber: 484 m_status.last = dirCurrent; 485 break; 486 default: 487 m_status.last = OtherNeutral; 488 } 489 break; 490 case NonSpacingMark: 491 case BoundaryNeutral: 492 case RightToLeftEmbedding: 493 case LeftToRightEmbedding: 494 case RightToLeftOverride: 495 case LeftToRightOverride: 496 case PopDirectionalFormat: 497 // ignore these 498 break; 499 case EuropeanNumber: 500 // fall through 501 default: 502 m_status.last = dirCurrent; 503 } 504 } 505 506 template <class Iterator, class Run> 507 inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels() 508 { 509 unsigned char levelLow = BidiContext::kMaxLevel; 510 unsigned char levelHigh = 0; 511 for (Run* run = m_runs.firstRun(); run; run = run->next()) { 512 levelHigh = std::max(run->level(), levelHigh); 513 levelLow = std::min(run->level(), levelLow); 514 } 515 516 // This implements reordering of the line (L2 according to Bidi spec): 517 // http://unicode.org/reports/tr9/#L2 518 // L2. From the highest level found in the text to the lowest odd level on each line, 519 // reverse any contiguous sequence of characters that are at that level or higher. 520 521 // Reversing is only done up to the lowest odd level. 522 if (!(levelLow % 2)) 523 levelLow++; 524 525 unsigned count = m_runs.runCount() - 1; 526 527 while (levelHigh >= levelLow) { 528 unsigned i = 0; 529 Run* run = m_runs.firstRun(); 530 while (i < count) { 531 for (;i < count && run && run->level() < levelHigh; i++) 532 run = run->next(); 533 unsigned start = i; 534 for (;i <= count && run && run->level() >= levelHigh; i++) 535 run = run->next(); 536 unsigned end = i - 1; 537 m_runs.reverseRuns(start, end); 538 } 539 levelHigh--; 540 } 541 } 542 543 template <class Iterator, class Run> 544 TextDirection BidiResolver<Iterator, Run>::determineParagraphDirectionality(bool* hasStrongDirectionality) 545 { 546 while (!m_current.atEnd()) { 547 if (inIsolate()) { 548 increment(); 549 continue; 550 } 551 if (m_current.atParagraphSeparator()) 552 break; 553 UChar32 current = m_current.current(); 554 if (UNLIKELY(U16_IS_SURROGATE(current))) { 555 increment(); 556 // If this not the high part of the surrogate pair, then drop it and move to the next. 557 if (!U16_IS_SURROGATE_LEAD(current)) 558 continue; 559 UChar high = static_cast<UChar>(current); 560 if (m_current.atEnd()) 561 continue; 562 UChar low = m_current.current(); 563 // Verify the low part. If invalid, then assume an invalid surrogate pair and retry. 564 if (!U16_IS_TRAIL(low)) 565 continue; 566 current = U16_GET_SUPPLEMENTARY(high, low); 567 } 568 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(current); 569 if (charDirection == WTF::Unicode::LeftToRight) { 570 if (hasStrongDirectionality) 571 *hasStrongDirectionality = true; 572 return LTR; 573 } 574 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) { 575 if (hasStrongDirectionality) 576 *hasStrongDirectionality = true; 577 return RTL; 578 } 579 increment(); 580 } 581 if (hasStrongDirectionality) 582 *hasStrongDirectionality = false; 583 return LTR; 584 } 585 586 template <class Iterator, class Run> 587 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak) 588 { 589 using namespace WTF::Unicode; 590 591 ASSERT(m_direction == OtherNeutral); 592 593 m_endOfLine = end; 594 595 if (override != NoVisualOverride) { 596 m_emptyRun = false; 597 m_sor = m_current; 598 m_eor = Iterator(); 599 while (m_current != end && !m_current.atEnd()) { 600 m_eor = m_current; 601 increment(); 602 } 603 m_direction = override == VisualLeftToRightOverride ? LeftToRight : RightToLeft; 604 appendRun(); 605 m_runs.setLogicallyLastRun(m_runs.lastRun()); 606 if (override == VisualRightToLeftOverride && m_runs.runCount()) 607 m_runs.reverseRuns(0, m_runs.runCount() - 1); 608 return; 609 } 610 611 m_emptyRun = true; 612 613 m_eor = Iterator(); 614 615 m_last = m_current; 616 bool lastLineEnded = false; 617 BidiResolver<Iterator, Run> stateAtEnd; 618 619 while (true) { 620 if (inIsolate() && m_emptyRun) { 621 m_sor = m_current; 622 m_emptyRun = false; 623 } 624 625 if (!lastLineEnded && isEndOfLine(end)) { 626 if (m_emptyRun) 627 break; 628 629 stateAtEnd.m_status = m_status; 630 stateAtEnd.m_sor = m_sor; 631 stateAtEnd.m_eor = m_eor; 632 stateAtEnd.m_last = m_last; 633 stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine; 634 stateAtEnd.m_lastBeforeET = m_lastBeforeET; 635 stateAtEnd.m_emptyRun = m_emptyRun; 636 m_endOfRunAtEndOfLine = m_last; 637 lastLineEnded = true; 638 } 639 Direction dirCurrent; 640 if (lastLineEnded && (hardLineBreak || m_current.atEnd())) { 641 BidiContext* c = context(); 642 if (hardLineBreak) { 643 // A deviation from the Unicode Bidi Algorithm in order to match 644 // WinIE and user expectations: hard line breaks reset bidi state 645 // coming from unicode bidi control characters, but not those from 646 // DOM nodes with specified directionality 647 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts()); 648 649 dirCurrent = stateAtEnd.context()->dir(); 650 stateAtEnd.setEorDir(dirCurrent); 651 stateAtEnd.setLastDir(dirCurrent); 652 stateAtEnd.setLastStrongDir(dirCurrent); 653 } else { 654 while (c->parent()) 655 c = c->parent(); 656 dirCurrent = c->dir(); 657 } 658 } else { 659 dirCurrent = m_current.direction(); 660 if (context()->override() 661 && dirCurrent != RightToLeftEmbedding 662 && dirCurrent != LeftToRightEmbedding 663 && dirCurrent != RightToLeftOverride 664 && dirCurrent != LeftToRightOverride 665 && dirCurrent != PopDirectionalFormat) 666 dirCurrent = context()->dir(); 667 else if (dirCurrent == NonSpacingMark) 668 dirCurrent = m_status.last; 669 } 670 671 // We ignore all character directionality while in unicode-bidi: isolate spans. 672 // We'll handle ordering the isolated characters in a second pass. 673 if (inIsolate()) 674 dirCurrent = OtherNeutral; 675 676 ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd()); 677 switch (dirCurrent) { 678 679 // embedding and overrides (X1-X9 in the Bidi specs) 680 case RightToLeftEmbedding: 681 case LeftToRightEmbedding: 682 case RightToLeftOverride: 683 case LeftToRightOverride: 684 case PopDirectionalFormat: 685 embed(dirCurrent, FromUnicode); 686 commitExplicitEmbedding(); 687 break; 688 689 // strong types 690 case LeftToRight: 691 switch (m_status.last) { 692 case RightToLeft: 693 case RightToLeftArabic: 694 case EuropeanNumber: 695 case ArabicNumber: 696 if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight) 697 appendRun(); 698 break; 699 case LeftToRight: 700 break; 701 case EuropeanNumberSeparator: 702 case EuropeanNumberTerminator: 703 case CommonNumberSeparator: 704 case BoundaryNeutral: 705 case BlockSeparator: 706 case SegmentSeparator: 707 case WhiteSpaceNeutral: 708 case OtherNeutral: 709 if (m_status.eor == EuropeanNumber) { 710 if (m_status.lastStrong != LeftToRight) { 711 // the numbers need to be on a higher embedding level, so let's close that run 712 m_direction = EuropeanNumber; 713 appendRun(); 714 if (context()->dir() != LeftToRight) { 715 // the neutrals take the embedding direction, which is R 716 m_eor = m_last; 717 m_direction = RightToLeft; 718 appendRun(); 719 } 720 } 721 } else if (m_status.eor == ArabicNumber) { 722 // Arabic numbers are always on a higher embedding level, so let's close that run 723 m_direction = ArabicNumber; 724 appendRun(); 725 if (context()->dir() != LeftToRight) { 726 // the neutrals take the embedding direction, which is R 727 m_eor = m_last; 728 m_direction = RightToLeft; 729 appendRun(); 730 } 731 } else if (m_status.lastStrong != LeftToRight) { 732 // last stuff takes embedding dir 733 if (context()->dir() == RightToLeft) { 734 m_eor = m_last; 735 m_direction = RightToLeft; 736 } 737 appendRun(); 738 } 739 default: 740 break; 741 } 742 m_eor = m_current; 743 m_status.eor = LeftToRight; 744 m_status.lastStrong = LeftToRight; 745 m_direction = LeftToRight; 746 break; 747 case RightToLeftArabic: 748 case RightToLeft: 749 switch (m_status.last) { 750 case LeftToRight: 751 case EuropeanNumber: 752 case ArabicNumber: 753 appendRun(); 754 case RightToLeft: 755 case RightToLeftArabic: 756 break; 757 case EuropeanNumberSeparator: 758 case EuropeanNumberTerminator: 759 case CommonNumberSeparator: 760 case BoundaryNeutral: 761 case BlockSeparator: 762 case SegmentSeparator: 763 case WhiteSpaceNeutral: 764 case OtherNeutral: 765 if (m_status.eor == EuropeanNumber) { 766 if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight) 767 m_eor = m_last; 768 appendRun(); 769 } else if (m_status.eor == ArabicNumber) { 770 appendRun(); 771 } else if (m_status.lastStrong == LeftToRight) { 772 if (context()->dir() == LeftToRight) 773 m_eor = m_last; 774 appendRun(); 775 } 776 default: 777 break; 778 } 779 m_eor = m_current; 780 m_status.eor = RightToLeft; 781 m_status.lastStrong = dirCurrent; 782 m_direction = RightToLeft; 783 break; 784 785 // weak types: 786 787 case EuropeanNumber: 788 if (m_status.lastStrong != RightToLeftArabic) { 789 // if last strong was AL change EN to AN 790 switch (m_status.last) { 791 case EuropeanNumber: 792 case LeftToRight: 793 break; 794 case RightToLeft: 795 case RightToLeftArabic: 796 case ArabicNumber: 797 m_eor = m_last; 798 appendRun(); 799 m_direction = EuropeanNumber; 800 break; 801 case EuropeanNumberSeparator: 802 case CommonNumberSeparator: 803 if (m_status.eor == EuropeanNumber) 804 break; 805 case EuropeanNumberTerminator: 806 case BoundaryNeutral: 807 case BlockSeparator: 808 case SegmentSeparator: 809 case WhiteSpaceNeutral: 810 case OtherNeutral: 811 if (m_status.eor == EuropeanNumber) { 812 if (m_status.lastStrong == RightToLeft) { 813 // ENs on both sides behave like Rs, so the neutrals should be R. 814 // Terminate the EN run. 815 appendRun(); 816 // Make an R run. 817 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last; 818 m_direction = RightToLeft; 819 appendRun(); 820 // Begin a new EN run. 821 m_direction = EuropeanNumber; 822 } 823 } else if (m_status.eor == ArabicNumber) { 824 // Terminate the AN run. 825 appendRun(); 826 if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) { 827 // Make an R run. 828 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last; 829 m_direction = RightToLeft; 830 appendRun(); 831 // Begin a new EN run. 832 m_direction = EuropeanNumber; 833 } 834 } else if (m_status.lastStrong == RightToLeft) { 835 // Extend the R run to include the neutrals. 836 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last; 837 m_direction = RightToLeft; 838 appendRun(); 839 // Begin a new EN run. 840 m_direction = EuropeanNumber; 841 } 842 default: 843 break; 844 } 845 m_eor = m_current; 846 m_status.eor = EuropeanNumber; 847 if (m_direction == OtherNeutral) 848 m_direction = LeftToRight; 849 break; 850 } 851 case ArabicNumber: 852 dirCurrent = ArabicNumber; 853 switch (m_status.last) { 854 case LeftToRight: 855 if (context()->dir() == LeftToRight) 856 appendRun(); 857 break; 858 case ArabicNumber: 859 break; 860 case RightToLeft: 861 case RightToLeftArabic: 862 case EuropeanNumber: 863 m_eor = m_last; 864 appendRun(); 865 break; 866 case CommonNumberSeparator: 867 if (m_status.eor == ArabicNumber) 868 break; 869 case EuropeanNumberSeparator: 870 case EuropeanNumberTerminator: 871 case BoundaryNeutral: 872 case BlockSeparator: 873 case SegmentSeparator: 874 case WhiteSpaceNeutral: 875 case OtherNeutral: 876 if (m_status.eor == ArabicNumber 877 || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft)) 878 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) { 879 // Terminate the run before the neutrals. 880 appendRun(); 881 // Begin an R run for the neutrals. 882 m_direction = RightToLeft; 883 } else if (m_direction == OtherNeutral) { 884 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; 885 } 886 m_eor = m_last; 887 appendRun(); 888 default: 889 break; 890 } 891 m_eor = m_current; 892 m_status.eor = ArabicNumber; 893 if (m_direction == OtherNeutral) 894 m_direction = ArabicNumber; 895 break; 896 case EuropeanNumberSeparator: 897 case CommonNumberSeparator: 898 break; 899 case EuropeanNumberTerminator: 900 if (m_status.last == EuropeanNumber) { 901 dirCurrent = EuropeanNumber; 902 m_eor = m_current; 903 m_status.eor = dirCurrent; 904 } else if (m_status.last != EuropeanNumberTerminator) { 905 m_lastBeforeET = m_emptyRun ? m_eor : m_last; 906 } 907 break; 908 909 // boundary neutrals should be ignored 910 case BoundaryNeutral: 911 if (m_eor == m_last) 912 m_eor = m_current; 913 break; 914 // neutrals 915 case BlockSeparator: 916 // ### what do we do with newline and paragraph seperators that come to here? 917 break; 918 case SegmentSeparator: 919 // ### implement rule L1 920 break; 921 case WhiteSpaceNeutral: 922 break; 923 case OtherNeutral: 924 break; 925 default: 926 break; 927 } 928 929 if (lastLineEnded && m_eor == m_current) { 930 if (!m_reachedEndOfLine) { 931 m_eor = m_endOfRunAtEndOfLine; 932 switch (m_status.eor) { 933 case LeftToRight: 934 case RightToLeft: 935 case ArabicNumber: 936 m_direction = m_status.eor; 937 break; 938 case EuropeanNumber: 939 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber; 940 break; 941 default: 942 ASSERT_NOT_REACHED(); 943 } 944 appendRun(); 945 } 946 m_current = end; 947 m_status = stateAtEnd.m_status; 948 m_sor = stateAtEnd.m_sor; 949 m_eor = stateAtEnd.m_eor; 950 m_last = stateAtEnd.m_last; 951 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine; 952 m_lastBeforeET = stateAtEnd.m_lastBeforeET; 953 m_emptyRun = stateAtEnd.m_emptyRun; 954 m_direction = OtherNeutral; 955 break; 956 } 957 958 updateStatusLastFromCurrentDirection(dirCurrent); 959 m_last = m_current; 960 961 if (m_emptyRun) { 962 m_sor = m_current; 963 m_emptyRun = false; 964 } 965 966 increment(); 967 if (!m_currentExplicitEmbeddingSequence.isEmpty()) { 968 bool committed = commitExplicitEmbedding(); 969 if (committed && lastLineEnded) { 970 m_current = end; 971 m_status = stateAtEnd.m_status; 972 m_sor = stateAtEnd.m_sor; 973 m_eor = stateAtEnd.m_eor; 974 m_last = stateAtEnd.m_last; 975 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine; 976 m_lastBeforeET = stateAtEnd.m_lastBeforeET; 977 m_emptyRun = stateAtEnd.m_emptyRun; 978 m_direction = OtherNeutral; 979 break; 980 } 981 } 982 } 983 984 m_runs.setLogicallyLastRun(m_runs.lastRun()); 985 reorderRunsFromLevels(); 986 m_endOfRunAtEndOfLine = Iterator(); 987 m_endOfLine = Iterator(); 988 } 989 990 template <class Iterator, class Run> 991 void BidiResolver<Iterator, Run>::setMidpointStateForIsolatedRun(Run* run, const MidpointState<Iterator>& midpoint) 992 { 993 ASSERT(!m_midpointStateForIsolatedRun.contains(run)); 994 m_midpointStateForIsolatedRun.add(run, midpoint); 995 } 996 997 template<class Iterator, class Run> 998 MidpointState<Iterator> BidiResolver<Iterator, Run>::midpointStateForIsolatedRun(Run* run) 999 { 1000 return m_midpointStateForIsolatedRun.take(run); 1001 } 1002 1003 1004 } // namespace WebCore 1005 1006 #endif // BidiResolver_h 1007