1 /* 2 * Copyright (C) 2000 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22 #ifndef BidiResolver_h 23 #define BidiResolver_h 24 25 #include "platform/text/BidiCharacterRun.h" 26 #include "platform/text/BidiContext.h" 27 #include "platform/text/BidiRunList.h" 28 #include "platform/text/TextDirection.h" 29 #include "wtf/HashMap.h" 30 #include "wtf/Noncopyable.h" 31 #include "wtf/PassRefPtr.h" 32 #include "wtf/Vector.h" 33 34 namespace blink { 35 36 class RenderObject; 37 38 template <class Iterator> class MidpointState { 39 public: 40 MidpointState() 41 { 42 reset(); 43 } 44 45 void reset() 46 { 47 m_numMidpoints = 0; 48 m_currentMidpoint = 0; 49 m_betweenMidpoints = false; 50 } 51 52 void startIgnoringSpaces(const Iterator& midpoint) 53 { 54 ASSERT(!(m_numMidpoints % 2)); 55 addMidpoint(midpoint); 56 } 57 58 void stopIgnoringSpaces(const Iterator& midpoint) 59 { 60 ASSERT(m_numMidpoints % 2); 61 addMidpoint(midpoint); 62 } 63 64 // When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or 65 // hard line breaks to ensure that they're not ignored. 66 void ensureLineBoxInsideIgnoredSpaces(RenderObject* renderer) 67 { 68 Iterator midpoint(0, renderer, 0); 69 stopIgnoringSpaces(midpoint); 70 startIgnoringSpaces(midpoint); 71 } 72 73 // Adding a pair of midpoints before a character will split it out into a new line box. 74 void ensureCharacterGetsLineBox(Iterator& textParagraphSeparator) 75 { 76 startIgnoringSpaces(Iterator(0, textParagraphSeparator.object(), textParagraphSeparator.offset() - 1)); 77 stopIgnoringSpaces(Iterator(0, textParagraphSeparator.object(), textParagraphSeparator.offset())); 78 } 79 80 void checkMidpoints(Iterator& lBreak) 81 { 82 // Check to see if our last midpoint is a start point beyond the line break. If so, 83 // shave it off the list, and shave off a trailing space if the previous end point doesn't 84 // preserve whitespace. 85 if (lBreak.object() && m_numMidpoints && !(m_numMidpoints % 2)) { 86 Iterator* midpointsIterator = m_midpoints.data(); 87 Iterator& endpoint = midpointsIterator[m_numMidpoints - 2]; 88 const Iterator& startpoint = midpointsIterator[m_numMidpoints - 1]; 89 Iterator currpoint = endpoint; 90 while (!currpoint.atEnd() && currpoint != startpoint && currpoint != lBreak) 91 currpoint.increment(); 92 if (currpoint == lBreak) { 93 // We hit the line break before the start point. Shave off the start point. 94 m_numMidpoints--; 95 if (endpoint.object()->style()->collapseWhiteSpace() && endpoint.object()->isText()) 96 endpoint.setOffset(endpoint.offset() - 1); 97 } 98 } 99 } 100 101 Vector<Iterator>& midpoints() { return m_midpoints; } 102 const unsigned& numMidpoints() const { return m_numMidpoints; } 103 const unsigned& currentMidpoint() const { return m_currentMidpoint; } 104 void incrementCurrentMidpoint() { m_currentMidpoint++; } 105 const bool& betweenMidpoints() const { return m_betweenMidpoints; } 106 void setBetweenMidpoints(bool betweenMidpoint) { m_betweenMidpoints = betweenMidpoint; } 107 private: 108 // The goal is to reuse the line state across multiple 109 // lines so we just keep an array around for midpoints and never clear it across multiple 110 // lines. We track the number of items and position using the two other variables. 111 Vector<Iterator> m_midpoints; 112 unsigned m_numMidpoints; 113 unsigned m_currentMidpoint; 114 bool m_betweenMidpoints; 115 116 void addMidpoint(const Iterator& midpoint) 117 { 118 if (m_midpoints.size() <= m_numMidpoints) 119 m_midpoints.grow(m_numMidpoints + 10); 120 121 Iterator* midpointsIterator = m_midpoints.data(); 122 midpointsIterator[m_numMidpoints++] = midpoint; 123 } 124 }; 125 126 // The BidiStatus at a given position (typically the end of a line) can 127 // be cached and then used to restart bidi resolution at that position. 128 struct BidiStatus { 129 BidiStatus() 130 : eor(WTF::Unicode::OtherNeutral) 131 , lastStrong(WTF::Unicode::OtherNeutral) 132 , last(WTF::Unicode::OtherNeutral) 133 { 134 } 135 136 // Creates a BidiStatus representing a new paragraph root with a default direction. 137 // Uses TextDirection as it only has two possibilities instead of WTF::Unicode::Direction which has 19. 138 BidiStatus(TextDirection textDirection, bool isOverride) 139 { 140 WTF::Unicode::Direction direction = textDirection == LTR ? WTF::Unicode::LeftToRight : WTF::Unicode::RightToLeft; 141 eor = lastStrong = last = direction; 142 context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride); 143 } 144 145 BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext) 146 : eor(eorDir) 147 , lastStrong(lastStrongDir) 148 , last(lastDir) 149 , context(bidiContext) 150 { 151 } 152 153 WTF::Unicode::Direction eor; 154 WTF::Unicode::Direction lastStrong; 155 WTF::Unicode::Direction last; 156 RefPtr<BidiContext> context; 157 }; 158 159 class BidiEmbedding { 160 public: 161 BidiEmbedding(WTF::Unicode::Direction direction, BidiEmbeddingSource source) 162 : m_direction(direction) 163 , m_source(source) 164 { 165 } 166 167 WTF::Unicode::Direction direction() const { return m_direction; } 168 BidiEmbeddingSource source() const { return m_source; } 169 private: 170 WTF::Unicode::Direction m_direction; 171 BidiEmbeddingSource m_source; 172 }; 173 174 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2) 175 { 176 return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context); 177 } 178 179 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2) 180 { 181 return !(status1 == status2); 182 } 183 184 enum VisualDirectionOverride { 185 NoVisualOverride, 186 VisualLeftToRightOverride, 187 VisualRightToLeftOverride 188 }; 189 190 // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm 191 // http://unicode.org/reports/tr9 192 template <class Iterator, class Run> class BidiResolver { 193 WTF_MAKE_NONCOPYABLE(BidiResolver); 194 public: 195 BidiResolver() 196 : m_direction(WTF::Unicode::OtherNeutral) 197 , m_reachedEndOfLine(false) 198 , m_emptyRun(true) 199 , m_nestedIsolateCount(0) 200 , m_trailingSpaceRun(0) 201 { 202 } 203 204 #if ENABLE(ASSERT) 205 ~BidiResolver(); 206 #endif 207 208 const Iterator& position() const { return m_current; } 209 Iterator& position() { return m_current; } 210 void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; } 211 void setPosition(const Iterator& position, unsigned nestedIsolatedCount) 212 { 213 m_current = position; 214 m_nestedIsolateCount = nestedIsolatedCount; 215 } 216 217 BidiContext* context() const { return m_status.context.get(); } 218 void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; } 219 220 void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; } 221 void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; } 222 void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; } 223 224 WTF::Unicode::Direction dir() const { return m_direction; } 225 void setDir(WTF::Unicode::Direction d) { m_direction = d; } 226 227 const BidiStatus& status() const { return m_status; } 228 void setStatus(const BidiStatus s) 229 { 230 ASSERT(s.context); 231 m_status = s; 232 m_paragraphDirectionality = s.context->dir() == WTF::Unicode::LeftToRight ? LTR : RTL; 233 } 234 235 MidpointState<Iterator>& midpointState() { return m_midpointState; } 236 237 // The current algorithm handles nested isolates one layer of nesting at a time. 238 // But when we layout each isolated span, we will walk into (and ignore) all 239 // child isolated spans. 240 void enterIsolate() { m_nestedIsolateCount++; } 241 void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; } 242 bool inIsolate() const { return m_nestedIsolateCount; } 243 244 void embed(WTF::Unicode::Direction, BidiEmbeddingSource); 245 bool commitExplicitEmbedding(BidiRunList<Run>&); 246 247 void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false, bool reorderRuns = true); 248 249 BidiRunList<Run>& runs() { return m_runs; } 250 251 // FIXME: This used to be part of deleteRuns() but was a layering violation. 252 // It's unclear if this is still needed. 253 void markCurrentRunEmpty() { m_emptyRun = true; } 254 255 Vector<Run*>& isolatedRuns() { return m_isolatedRuns; } 256 257 bool isEndOfLine(const Iterator& end) { return m_current == end || m_current.atEnd(); } 258 259 TextDirection determineParagraphDirectionality(bool* hasStrongDirectionality = 0); 260 261 void setMidpointStateForIsolatedRun(Run*, const MidpointState<Iterator>&); 262 MidpointState<Iterator> midpointStateForIsolatedRun(Run*); 263 264 Iterator endOfLine() const { return m_endOfLine; } 265 266 Run* trailingSpaceRun() const { return m_trailingSpaceRun; } 267 268 protected: 269 void increment() { m_current.increment(); } 270 // FIXME: Instead of InlineBidiResolvers subclassing this method, we should 271 // pass in some sort of Traits object which knows how to create runs for appending. 272 void appendRun(BidiRunList<Run>&); 273 274 Run* addTrailingRun(BidiRunList<Run>&, int, int, Run*, BidiContext*, TextDirection) const { return 0; } 275 Iterator m_current; 276 // sor and eor are "start of run" and "end of run" respectively and correpond 277 // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7 278 Iterator m_sor; // Points to the first character in the current run. 279 Iterator m_eor; // Points to the last character in the current run. 280 Iterator m_last; 281 BidiStatus m_status; 282 WTF::Unicode::Direction m_direction; 283 // m_endOfRunAtEndOfLine is "the position last eor in the end of line" 284 Iterator m_endOfRunAtEndOfLine; 285 Iterator m_endOfLine; 286 bool m_reachedEndOfLine; 287 Iterator m_lastBeforeET; // Before a EuropeanNumberTerminator 288 bool m_emptyRun; 289 290 // FIXME: This should not belong to the resolver, but rather be passed 291 // into createBidiRunsForLine by the caller. 292 BidiRunList<Run> m_runs; 293 294 MidpointState<Iterator> m_midpointState; 295 296 unsigned m_nestedIsolateCount; 297 Vector<Run*> m_isolatedRuns; 298 Run* m_trailingSpaceRun; 299 TextDirection m_paragraphDirectionality; 300 301 private: 302 void raiseExplicitEmbeddingLevel(BidiRunList<Run>&, WTF::Unicode::Direction from, WTF::Unicode::Direction to); 303 void lowerExplicitEmbeddingLevel(BidiRunList<Run>&, WTF::Unicode::Direction from); 304 void checkDirectionInLowerRaiseEmbeddingLevel(); 305 306 void updateStatusLastFromCurrentDirection(WTF::Unicode::Direction); 307 void reorderRunsFromLevels(BidiRunList<Run>&) const; 308 309 bool needsToApplyL1Rule(BidiRunList<Run>&) { return false; } 310 int findFirstTrailingSpaceAtRun(Run*) { return 0; } 311 // http://www.unicode.org/reports/tr9/#L1 312 void applyL1Rule(BidiRunList<Run>&); 313 314 Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence; 315 HashMap<Run *, MidpointState<Iterator> > m_midpointStateForIsolatedRun; 316 }; 317 318 #if ENABLE(ASSERT) 319 template <class Iterator, class Run> 320 BidiResolver<Iterator, Run>::~BidiResolver() 321 { 322 // The owner of this resolver should have handled the isolated runs. 323 ASSERT(m_isolatedRuns.isEmpty()); 324 } 325 #endif 326 327 template <class Iterator, class Run> 328 void BidiResolver<Iterator, Run>::appendRun(BidiRunList<Run>& runs) 329 { 330 if (!m_emptyRun && !m_eor.atEnd()) { 331 unsigned startOffset = m_sor.offset(); 332 unsigned endOffset = m_eor.offset(); 333 334 if (!m_endOfRunAtEndOfLine.atEnd() && endOffset >= m_endOfRunAtEndOfLine.offset()) { 335 m_reachedEndOfLine = true; 336 endOffset = m_endOfRunAtEndOfLine.offset(); 337 } 338 339 if (endOffset >= startOffset) 340 runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction)); 341 342 m_eor.increment(); 343 m_sor = m_eor; 344 } 345 346 m_direction = WTF::Unicode::OtherNeutral; 347 m_status.eor = WTF::Unicode::OtherNeutral; 348 } 349 350 template <class Iterator, class Run> 351 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction dir, BidiEmbeddingSource source) 352 { 353 // Isolated spans compute base directionality during their own UBA run. 354 // Do not insert fake embed characters once we enter an isolated span. 355 ASSERT(!inIsolate()); 356 using namespace WTF::Unicode; 357 358 ASSERT(dir == PopDirectionalFormat || dir == LeftToRightEmbedding || dir == LeftToRightOverride || dir == RightToLeftEmbedding || dir == RightToLeftOverride); 359 m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source)); 360 } 361 362 template <class Iterator, class Run> 363 void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel() 364 { 365 using namespace WTF::Unicode; 366 367 ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd()); 368 ASSERT(m_status.last != NonSpacingMark 369 && m_status.last != BoundaryNeutral 370 && m_status.last != RightToLeftEmbedding 371 && m_status.last != LeftToRightEmbedding 372 && m_status.last != RightToLeftOverride 373 && m_status.last != LeftToRightOverride 374 && m_status.last != PopDirectionalFormat); 375 if (m_direction == OtherNeutral) 376 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; 377 } 378 379 template <class Iterator, class Run> 380 void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(BidiRunList<Run>& runs, WTF::Unicode::Direction from) 381 { 382 using namespace WTF::Unicode; 383 384 if (!m_emptyRun && m_eor != m_last) { 385 checkDirectionInLowerRaiseEmbeddingLevel(); 386 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last 387 if (from == LeftToRight) { 388 // bidi.sor ... bidi.eor ... bidi.last L 389 if (m_status.eor == EuropeanNumber) { 390 if (m_status.lastStrong != LeftToRight) { 391 m_direction = EuropeanNumber; 392 appendRun(runs); 393 } 394 } else if (m_status.eor == ArabicNumber) { 395 m_direction = ArabicNumber; 396 appendRun(runs); 397 } else if (m_status.lastStrong != LeftToRight) { 398 appendRun(runs); 399 m_direction = LeftToRight; 400 } 401 } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) { 402 appendRun(runs); 403 m_direction = RightToLeft; 404 } 405 m_eor = m_last; 406 } 407 408 appendRun(runs); 409 m_emptyRun = true; 410 411 // sor for the new run is determined by the higher level (rule X10) 412 setLastDir(from); 413 setLastStrongDir(from); 414 m_eor = Iterator(); 415 } 416 417 template <class Iterator, class Run> 418 void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(BidiRunList<Run>& runs, WTF::Unicode::Direction from, WTF::Unicode::Direction to) 419 { 420 using namespace WTF::Unicode; 421 422 if (!m_emptyRun && m_eor != m_last) { 423 checkDirectionInLowerRaiseEmbeddingLevel(); 424 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last 425 if (to == LeftToRight) { 426 // bidi.sor ... bidi.eor ... bidi.last L 427 if (m_status.eor == EuropeanNumber) { 428 if (m_status.lastStrong != LeftToRight) { 429 m_direction = EuropeanNumber; 430 appendRun(runs); 431 } 432 } else if (m_status.eor == ArabicNumber) { 433 m_direction = ArabicNumber; 434 appendRun(runs); 435 } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) { 436 appendRun(runs); 437 m_direction = LeftToRight; 438 } 439 } else if (m_status.eor == ArabicNumber 440 || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft)) 441 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) { 442 appendRun(runs); 443 m_direction = RightToLeft; 444 } 445 m_eor = m_last; 446 } 447 448 appendRun(runs); 449 m_emptyRun = true; 450 451 setLastDir(to); 452 setLastStrongDir(to); 453 m_eor = Iterator(); 454 } 455 456 template <class Iterator, class Run> 457 void BidiResolver<Iterator, Run>::applyL1Rule(BidiRunList<Run>& runs) 458 { 459 ASSERT(runs.runCount()); 460 if (!needsToApplyL1Rule(runs)) 461 return; 462 463 Run* trailingSpaceRun = runs.logicallyLastRun(); 464 465 int firstSpace = findFirstTrailingSpaceAtRun(trailingSpaceRun); 466 if (firstSpace == trailingSpaceRun->stop()) 467 return; 468 469 bool shouldReorder = trailingSpaceRun != (m_paragraphDirectionality == LTR ? runs.lastRun() : runs.firstRun()); 470 if (firstSpace != trailingSpaceRun->start()) { 471 BidiContext* baseContext = context(); 472 while (BidiContext* parent = baseContext->parent()) 473 baseContext = parent; 474 475 m_trailingSpaceRun = addTrailingRun(runs, firstSpace, trailingSpaceRun->m_stop, trailingSpaceRun, baseContext, m_paragraphDirectionality); 476 ASSERT(m_trailingSpaceRun); 477 trailingSpaceRun->m_stop = firstSpace; 478 return; 479 } 480 if (!shouldReorder) { 481 m_trailingSpaceRun = trailingSpaceRun; 482 return; 483 } 484 485 if (m_paragraphDirectionality == LTR) { 486 runs.moveRunToEnd(trailingSpaceRun); 487 trailingSpaceRun->m_level = 0; 488 } else { 489 runs.moveRunToBeginning(trailingSpaceRun); 490 trailingSpaceRun->m_level = 1; 491 } 492 m_trailingSpaceRun = trailingSpaceRun; 493 } 494 495 template <class Iterator, class Run> 496 bool BidiResolver<Iterator, Run>::commitExplicitEmbedding(BidiRunList<Run>& runs) 497 { 498 // When we're "inIsolate()" we're resolving the parent context which 499 // ignores (skips over) the isolated content, including embedding levels. 500 // We should never accrue embedding levels while skipping over isolated content. 501 ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty()); 502 503 using namespace WTF::Unicode; 504 505 unsigned char fromLevel = context()->level(); 506 RefPtr<BidiContext> toContext = context(); 507 508 for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) { 509 BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i]; 510 if (embedding.direction() == PopDirectionalFormat) { 511 if (BidiContext* parentContext = toContext->parent()) 512 toContext = parentContext; 513 } else { 514 Direction direction = (embedding.direction() == RightToLeftEmbedding || embedding.direction() == RightToLeftOverride) ? RightToLeft : LeftToRight; 515 bool override = embedding.direction() == LeftToRightOverride || embedding.direction() == RightToLeftOverride; 516 unsigned char level = toContext->level(); 517 if (direction == RightToLeft) 518 level = nextGreaterOddLevel(level); 519 else 520 level = nextGreaterEvenLevel(level); 521 if (level < BidiContext::kMaxLevel) 522 toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get()); 523 } 524 } 525 526 unsigned char toLevel = toContext->level(); 527 528 if (toLevel > fromLevel) 529 raiseExplicitEmbeddingLevel(runs, fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight); 530 else if (toLevel < fromLevel) 531 lowerExplicitEmbeddingLevel(runs, fromLevel % 2 ? RightToLeft : LeftToRight); 532 533 setContext(toContext); 534 535 m_currentExplicitEmbeddingSequence.clear(); 536 537 return fromLevel != toLevel; 538 } 539 540 template <class Iterator, class Run> 541 inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent) 542 { 543 using namespace WTF::Unicode; 544 switch (dirCurrent) { 545 case EuropeanNumberTerminator: 546 if (m_status.last != EuropeanNumber) 547 m_status.last = EuropeanNumberTerminator; 548 break; 549 case EuropeanNumberSeparator: 550 case CommonNumberSeparator: 551 case SegmentSeparator: 552 case WhiteSpaceNeutral: 553 case OtherNeutral: 554 switch (m_status.last) { 555 case LeftToRight: 556 case RightToLeft: 557 case RightToLeftArabic: 558 case EuropeanNumber: 559 case ArabicNumber: 560 m_status.last = dirCurrent; 561 break; 562 default: 563 m_status.last = OtherNeutral; 564 } 565 break; 566 case NonSpacingMark: 567 case BoundaryNeutral: 568 case RightToLeftEmbedding: 569 case LeftToRightEmbedding: 570 case RightToLeftOverride: 571 case LeftToRightOverride: 572 case PopDirectionalFormat: 573 // ignore these 574 break; 575 case EuropeanNumber: 576 // fall through 577 default: 578 m_status.last = dirCurrent; 579 } 580 } 581 582 template <class Iterator, class Run> 583 inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels(BidiRunList<Run>& runs) const 584 { 585 unsigned char levelLow = BidiContext::kMaxLevel; 586 unsigned char levelHigh = 0; 587 for (Run* run = runs.firstRun(); run; run = run->next()) { 588 levelHigh = std::max(run->level(), levelHigh); 589 levelLow = std::min(run->level(), levelLow); 590 } 591 592 // This implements reordering of the line (L2 according to Bidi spec): 593 // http://unicode.org/reports/tr9/#L2 594 // L2. From the highest level found in the text to the lowest odd level on each line, 595 // reverse any contiguous sequence of characters that are at that level or higher. 596 597 // Reversing is only done up to the lowest odd level. 598 if (!(levelLow % 2)) 599 levelLow++; 600 601 unsigned count = runs.runCount() - 1; 602 603 while (levelHigh >= levelLow) { 604 unsigned i = 0; 605 Run* run = runs.firstRun(); 606 while (i < count) { 607 for (;i < count && run && run->level() < levelHigh; i++) 608 run = run->next(); 609 unsigned start = i; 610 for (;i <= count && run && run->level() >= levelHigh; i++) 611 run = run->next(); 612 unsigned end = i - 1; 613 runs.reverseRuns(start, end); 614 } 615 levelHigh--; 616 } 617 } 618 619 template <class Iterator, class Run> 620 TextDirection BidiResolver<Iterator, Run>::determineParagraphDirectionality(bool* hasStrongDirectionality) 621 { 622 while (!m_current.atEnd()) { 623 if (inIsolate()) { 624 increment(); 625 continue; 626 } 627 if (m_current.atParagraphSeparator()) 628 break; 629 UChar32 current = m_current.current(); 630 if (UNLIKELY(U16_IS_SURROGATE(current))) { 631 increment(); 632 // If this not the high part of the surrogate pair, then drop it and move to the next. 633 if (!U16_IS_SURROGATE_LEAD(current)) 634 continue; 635 UChar high = static_cast<UChar>(current); 636 if (m_current.atEnd()) 637 continue; 638 UChar low = m_current.current(); 639 // Verify the low part. If invalid, then assume an invalid surrogate pair and retry. 640 if (!U16_IS_TRAIL(low)) 641 continue; 642 current = U16_GET_SUPPLEMENTARY(high, low); 643 } 644 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(current); 645 if (charDirection == WTF::Unicode::LeftToRight) { 646 if (hasStrongDirectionality) 647 *hasStrongDirectionality = true; 648 return LTR; 649 } 650 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) { 651 if (hasStrongDirectionality) 652 *hasStrongDirectionality = true; 653 return RTL; 654 } 655 increment(); 656 } 657 if (hasStrongDirectionality) 658 *hasStrongDirectionality = false; 659 return LTR; 660 } 661 662 template <class Iterator, class Run> 663 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak, bool reorderRuns) 664 { 665 using namespace WTF::Unicode; 666 667 ASSERT(m_direction == OtherNeutral); 668 m_trailingSpaceRun = 0; 669 670 m_endOfLine = end; 671 672 if (override != NoVisualOverride) { 673 m_emptyRun = false; 674 m_sor = m_current; 675 m_eor = Iterator(); 676 while (m_current != end && !m_current.atEnd()) { 677 m_eor = m_current; 678 increment(); 679 } 680 m_direction = override == VisualLeftToRightOverride ? LeftToRight : RightToLeft; 681 appendRun(m_runs); 682 m_runs.setLogicallyLastRun(m_runs.lastRun()); 683 if (override == VisualRightToLeftOverride && m_runs.runCount()) 684 m_runs.reverseRuns(0, m_runs.runCount() - 1); 685 return; 686 } 687 688 m_emptyRun = true; 689 690 m_eor = Iterator(); 691 692 m_last = m_current; 693 bool lastLineEnded = false; 694 BidiResolver<Iterator, Run> stateAtEnd; 695 696 while (true) { 697 if (inIsolate() && m_emptyRun) { 698 m_sor = m_current; 699 m_emptyRun = false; 700 } 701 702 if (!lastLineEnded && isEndOfLine(end)) { 703 if (m_emptyRun) 704 break; 705 706 stateAtEnd.m_status = m_status; 707 stateAtEnd.m_sor = m_sor; 708 stateAtEnd.m_eor = m_eor; 709 stateAtEnd.m_last = m_last; 710 stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine; 711 stateAtEnd.m_lastBeforeET = m_lastBeforeET; 712 stateAtEnd.m_emptyRun = m_emptyRun; 713 m_endOfRunAtEndOfLine = m_last; 714 lastLineEnded = true; 715 } 716 Direction dirCurrent; 717 if (lastLineEnded && (hardLineBreak || m_current.atEnd())) { 718 BidiContext* c = context(); 719 if (hardLineBreak) { 720 // A deviation from the Unicode Bidi Algorithm in order to match 721 // WinIE and user expectations: hard line breaks reset bidi state 722 // coming from unicode bidi control characters, but not those from 723 // DOM nodes with specified directionality 724 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts()); 725 726 dirCurrent = stateAtEnd.context()->dir(); 727 stateAtEnd.setEorDir(dirCurrent); 728 stateAtEnd.setLastDir(dirCurrent); 729 stateAtEnd.setLastStrongDir(dirCurrent); 730 } else { 731 while (c->parent()) 732 c = c->parent(); 733 dirCurrent = c->dir(); 734 } 735 } else { 736 dirCurrent = m_current.direction(); 737 if (context()->override() 738 && dirCurrent != RightToLeftEmbedding 739 && dirCurrent != LeftToRightEmbedding 740 && dirCurrent != RightToLeftOverride 741 && dirCurrent != LeftToRightOverride 742 && dirCurrent != PopDirectionalFormat) 743 dirCurrent = context()->dir(); 744 else if (dirCurrent == NonSpacingMark) 745 dirCurrent = m_status.last; 746 } 747 748 // We ignore all character directionality while in unicode-bidi: isolate spans. 749 // We'll handle ordering the isolated characters in a second pass. 750 if (inIsolate()) 751 dirCurrent = OtherNeutral; 752 753 ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd()); 754 switch (dirCurrent) { 755 756 // embedding and overrides (X1-X9 in the Bidi specs) 757 case RightToLeftEmbedding: 758 case LeftToRightEmbedding: 759 case RightToLeftOverride: 760 case LeftToRightOverride: 761 case PopDirectionalFormat: 762 embed(dirCurrent, FromUnicode); 763 commitExplicitEmbedding(m_runs); 764 break; 765 766 // strong types 767 case LeftToRight: 768 switch (m_status.last) { 769 case RightToLeft: 770 case RightToLeftArabic: 771 case EuropeanNumber: 772 case ArabicNumber: 773 if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight) 774 appendRun(m_runs); 775 break; 776 case LeftToRight: 777 break; 778 case EuropeanNumberSeparator: 779 case EuropeanNumberTerminator: 780 case CommonNumberSeparator: 781 case BoundaryNeutral: 782 case BlockSeparator: 783 case SegmentSeparator: 784 case WhiteSpaceNeutral: 785 case OtherNeutral: 786 if (m_status.eor == EuropeanNumber) { 787 if (m_status.lastStrong != LeftToRight) { 788 // the numbers need to be on a higher embedding level, so let's close that run 789 m_direction = EuropeanNumber; 790 appendRun(m_runs); 791 if (context()->dir() != LeftToRight) { 792 // the neutrals take the embedding direction, which is R 793 m_eor = m_last; 794 m_direction = RightToLeft; 795 appendRun(m_runs); 796 } 797 } 798 } else if (m_status.eor == ArabicNumber) { 799 // Arabic numbers are always on a higher embedding level, so let's close that run 800 m_direction = ArabicNumber; 801 appendRun(m_runs); 802 if (context()->dir() != LeftToRight) { 803 // the neutrals take the embedding direction, which is R 804 m_eor = m_last; 805 m_direction = RightToLeft; 806 appendRun(m_runs); 807 } 808 } else if (m_status.lastStrong != LeftToRight) { 809 // last stuff takes embedding dir 810 if (context()->dir() == RightToLeft) { 811 m_eor = m_last; 812 m_direction = RightToLeft; 813 } 814 appendRun(m_runs); 815 } 816 default: 817 break; 818 } 819 m_eor = m_current; 820 m_status.eor = LeftToRight; 821 m_status.lastStrong = LeftToRight; 822 m_direction = LeftToRight; 823 break; 824 case RightToLeftArabic: 825 case RightToLeft: 826 switch (m_status.last) { 827 case LeftToRight: 828 case EuropeanNumber: 829 case ArabicNumber: 830 appendRun(m_runs); 831 case RightToLeft: 832 case RightToLeftArabic: 833 break; 834 case EuropeanNumberSeparator: 835 case EuropeanNumberTerminator: 836 case CommonNumberSeparator: 837 case BoundaryNeutral: 838 case BlockSeparator: 839 case SegmentSeparator: 840 case WhiteSpaceNeutral: 841 case OtherNeutral: 842 if (m_status.eor == EuropeanNumber) { 843 if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight) 844 m_eor = m_last; 845 appendRun(m_runs); 846 } else if (m_status.eor == ArabicNumber) { 847 appendRun(m_runs); 848 } else if (m_status.lastStrong == LeftToRight) { 849 if (context()->dir() == LeftToRight) 850 m_eor = m_last; 851 appendRun(m_runs); 852 } 853 default: 854 break; 855 } 856 m_eor = m_current; 857 m_status.eor = RightToLeft; 858 m_status.lastStrong = dirCurrent; 859 m_direction = RightToLeft; 860 break; 861 862 // weak types: 863 864 case EuropeanNumber: 865 if (m_status.lastStrong != RightToLeftArabic) { 866 // if last strong was AL change EN to AN 867 switch (m_status.last) { 868 case EuropeanNumber: 869 case LeftToRight: 870 break; 871 case RightToLeft: 872 case RightToLeftArabic: 873 case ArabicNumber: 874 m_eor = m_last; 875 appendRun(m_runs); 876 m_direction = EuropeanNumber; 877 break; 878 case EuropeanNumberSeparator: 879 case CommonNumberSeparator: 880 if (m_status.eor == EuropeanNumber) 881 break; 882 case EuropeanNumberTerminator: 883 case BoundaryNeutral: 884 case BlockSeparator: 885 case SegmentSeparator: 886 case WhiteSpaceNeutral: 887 case OtherNeutral: 888 if (m_status.eor == EuropeanNumber) { 889 if (m_status.lastStrong == RightToLeft) { 890 // ENs on both sides behave like Rs, so the neutrals should be R. 891 // Terminate the EN run. 892 appendRun(m_runs); 893 // Make an R run. 894 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last; 895 m_direction = RightToLeft; 896 appendRun(m_runs); 897 // Begin a new EN run. 898 m_direction = EuropeanNumber; 899 } 900 } else if (m_status.eor == ArabicNumber) { 901 // Terminate the AN run. 902 appendRun(m_runs); 903 if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) { 904 // Make an R run. 905 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last; 906 m_direction = RightToLeft; 907 appendRun(m_runs); 908 // Begin a new EN run. 909 m_direction = EuropeanNumber; 910 } 911 } else if (m_status.lastStrong == RightToLeft) { 912 // Extend the R run to include the neutrals. 913 m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last; 914 m_direction = RightToLeft; 915 appendRun(m_runs); 916 // Begin a new EN run. 917 m_direction = EuropeanNumber; 918 } 919 default: 920 break; 921 } 922 m_eor = m_current; 923 m_status.eor = EuropeanNumber; 924 if (m_direction == OtherNeutral) 925 m_direction = LeftToRight; 926 break; 927 } 928 case ArabicNumber: 929 dirCurrent = ArabicNumber; 930 switch (m_status.last) { 931 case LeftToRight: 932 if (context()->dir() == LeftToRight) 933 appendRun(m_runs); 934 break; 935 case ArabicNumber: 936 break; 937 case RightToLeft: 938 case RightToLeftArabic: 939 case EuropeanNumber: 940 m_eor = m_last; 941 appendRun(m_runs); 942 break; 943 case CommonNumberSeparator: 944 if (m_status.eor == ArabicNumber) 945 break; 946 case EuropeanNumberSeparator: 947 case EuropeanNumberTerminator: 948 case BoundaryNeutral: 949 case BlockSeparator: 950 case SegmentSeparator: 951 case WhiteSpaceNeutral: 952 case OtherNeutral: 953 if (m_status.eor == ArabicNumber 954 || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft)) 955 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) { 956 // Terminate the run before the neutrals. 957 appendRun(m_runs); 958 // Begin an R run for the neutrals. 959 m_direction = RightToLeft; 960 } else if (m_direction == OtherNeutral) { 961 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; 962 } 963 m_eor = m_last; 964 appendRun(m_runs); 965 default: 966 break; 967 } 968 m_eor = m_current; 969 m_status.eor = ArabicNumber; 970 if (m_direction == OtherNeutral) 971 m_direction = ArabicNumber; 972 break; 973 case EuropeanNumberSeparator: 974 case CommonNumberSeparator: 975 break; 976 case EuropeanNumberTerminator: 977 if (m_status.last == EuropeanNumber) { 978 dirCurrent = EuropeanNumber; 979 m_eor = m_current; 980 m_status.eor = dirCurrent; 981 } else if (m_status.last != EuropeanNumberTerminator) { 982 m_lastBeforeET = m_emptyRun ? m_eor : m_last; 983 } 984 break; 985 986 // boundary neutrals should be ignored 987 case BoundaryNeutral: 988 if (m_eor == m_last) 989 m_eor = m_current; 990 break; 991 // neutrals 992 case BlockSeparator: 993 // ### what do we do with newline and paragraph seperators that come to here? 994 break; 995 case SegmentSeparator: 996 // ### implement rule L1 997 break; 998 case WhiteSpaceNeutral: 999 break; 1000 case OtherNeutral: 1001 break; 1002 default: 1003 break; 1004 } 1005 1006 if (lastLineEnded && m_eor == m_current) { 1007 if (!m_reachedEndOfLine) { 1008 m_eor = m_endOfRunAtEndOfLine; 1009 switch (m_status.eor) { 1010 case LeftToRight: 1011 case RightToLeft: 1012 case ArabicNumber: 1013 m_direction = m_status.eor; 1014 break; 1015 case EuropeanNumber: 1016 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber; 1017 break; 1018 default: 1019 ASSERT_NOT_REACHED(); 1020 } 1021 appendRun(m_runs); 1022 } 1023 m_current = end; 1024 m_status = stateAtEnd.m_status; 1025 m_sor = stateAtEnd.m_sor; 1026 m_eor = stateAtEnd.m_eor; 1027 m_last = stateAtEnd.m_last; 1028 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine; 1029 m_lastBeforeET = stateAtEnd.m_lastBeforeET; 1030 m_emptyRun = stateAtEnd.m_emptyRun; 1031 m_direction = OtherNeutral; 1032 break; 1033 } 1034 1035 updateStatusLastFromCurrentDirection(dirCurrent); 1036 m_last = m_current; 1037 1038 if (m_emptyRun) { 1039 m_sor = m_current; 1040 m_emptyRun = false; 1041 } 1042 1043 increment(); 1044 if (!m_currentExplicitEmbeddingSequence.isEmpty()) { 1045 bool committed = commitExplicitEmbedding(m_runs); 1046 if (committed && lastLineEnded) { 1047 m_current = end; 1048 m_status = stateAtEnd.m_status; 1049 m_sor = stateAtEnd.m_sor; 1050 m_eor = stateAtEnd.m_eor; 1051 m_last = stateAtEnd.m_last; 1052 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine; 1053 m_lastBeforeET = stateAtEnd.m_lastBeforeET; 1054 m_emptyRun = stateAtEnd.m_emptyRun; 1055 m_direction = OtherNeutral; 1056 break; 1057 } 1058 } 1059 } 1060 1061 m_runs.setLogicallyLastRun(m_runs.lastRun()); 1062 if (reorderRuns) 1063 reorderRunsFromLevels(m_runs); 1064 m_endOfRunAtEndOfLine = Iterator(); 1065 m_endOfLine = Iterator(); 1066 1067 if (!hardLineBreak && m_runs.runCount()) 1068 applyL1Rule(m_runs); 1069 } 1070 1071 template <class Iterator, class Run> 1072 void BidiResolver<Iterator, Run>::setMidpointStateForIsolatedRun(Run* run, const MidpointState<Iterator>& midpoint) 1073 { 1074 ASSERT(!m_midpointStateForIsolatedRun.contains(run)); 1075 m_midpointStateForIsolatedRun.add(run, midpoint); 1076 } 1077 1078 template<class Iterator, class Run> 1079 MidpointState<Iterator> BidiResolver<Iterator, Run>::midpointStateForIsolatedRun(Run* run) 1080 { 1081 return m_midpointStateForIsolatedRun.take(run); 1082 } 1083 1084 1085 } // namespace blink 1086 1087 #endif // BidiResolver_h 1088