Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2000 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc.  All right reserved.
      4  *
      5  * This library is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU Library General Public
      7  * License as published by the Free Software Foundation; either
      8  * version 2 of the License, or (at your option) any later version.
      9  *
     10  * This library is distributed in the hope that it will be useful,
     11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * Library General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU Library General Public License
     16  * along with this library; see the file COPYING.LIB.  If not, write to
     17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  * Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 
     22 #ifndef BidiResolver_h
     23 #define BidiResolver_h
     24 
     25 #include "platform/text/BidiContext.h"
     26 #include "platform/text/BidiRunList.h"
     27 #include "platform/text/TextDirection.h"
     28 #include "wtf/HashMap.h"
     29 #include "wtf/Noncopyable.h"
     30 #include "wtf/PassRefPtr.h"
     31 #include "wtf/Vector.h"
     32 
     33 namespace WebCore {
     34 
     35 template <class Iterator> struct MidpointState {
     36     MidpointState()
     37     {
     38         reset();
     39     }
     40 
     41     void reset()
     42     {
     43         numMidpoints = 0;
     44         currentMidpoint = 0;
     45         betweenMidpoints = false;
     46     }
     47 
     48     // The goal is to reuse the line state across multiple
     49     // lines so we just keep an array around for midpoints and never clear it across multiple
     50     // lines. We track the number of items and position using the two other variables.
     51     Vector<Iterator> midpoints;
     52     unsigned numMidpoints;
     53     unsigned currentMidpoint;
     54     bool betweenMidpoints;
     55 };
     56 
     57 // The BidiStatus at a given position (typically the end of a line) can
     58 // be cached and then used to restart bidi resolution at that position.
     59 struct BidiStatus {
     60     BidiStatus()
     61         : eor(WTF::Unicode::OtherNeutral)
     62         , lastStrong(WTF::Unicode::OtherNeutral)
     63         , last(WTF::Unicode::OtherNeutral)
     64     {
     65     }
     66 
     67     // Creates a BidiStatus representing a new paragraph root with a default direction.
     68     // Uses TextDirection as it only has two possibilities instead of WTF::Unicode::Direction which has 19.
     69     BidiStatus(TextDirection textDirection, bool isOverride)
     70     {
     71         WTF::Unicode::Direction direction = textDirection == LTR ? WTF::Unicode::LeftToRight : WTF::Unicode::RightToLeft;
     72         eor = lastStrong = last = direction;
     73         context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride);
     74     }
     75 
     76     BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
     77         : eor(eorDir)
     78         , lastStrong(lastStrongDir)
     79         , last(lastDir)
     80         , context(bidiContext)
     81     {
     82     }
     83 
     84     WTF::Unicode::Direction eor;
     85     WTF::Unicode::Direction lastStrong;
     86     WTF::Unicode::Direction last;
     87     RefPtr<BidiContext> context;
     88 };
     89 
     90 class BidiEmbedding {
     91 public:
     92     BidiEmbedding(WTF::Unicode::Direction direction, BidiEmbeddingSource source)
     93     : m_direction(direction)
     94     , m_source(source)
     95     {
     96     }
     97 
     98     WTF::Unicode::Direction direction() const { return m_direction; }
     99     BidiEmbeddingSource source() const { return m_source; }
    100 private:
    101     WTF::Unicode::Direction m_direction;
    102     BidiEmbeddingSource m_source;
    103 };
    104 
    105 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
    106 {
    107     return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
    108 }
    109 
    110 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
    111 {
    112     return !(status1 == status2);
    113 }
    114 
    115 struct BidiCharacterRun {
    116     BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir)
    117         : m_override(context->override())
    118         , m_next(0)
    119         , m_start(start)
    120         , m_stop(stop)
    121     {
    122         ASSERT(m_start <= m_stop);
    123         if (dir == WTF::Unicode::OtherNeutral)
    124             dir = context->dir();
    125 
    126         m_level = context->level();
    127 
    128         // add level of run (cases I1 & I2)
    129         if (m_level % 2) {
    130             if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
    131                 m_level++;
    132         } else {
    133             if (dir == WTF::Unicode::RightToLeft)
    134                 m_level++;
    135             else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
    136                 m_level += 2;
    137         }
    138     }
    139 
    140     int start() const { return m_start; }
    141     int stop() const { return m_stop; }
    142     unsigned char level() const { return m_level; }
    143     bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
    144     bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
    145 
    146     BidiCharacterRun* next() const { return m_next; }
    147     void setNext(BidiCharacterRun* next) { m_next = next; }
    148 
    149     // Do not add anything apart from bitfields until after m_next. See https://bugs.webkit.org/show_bug.cgi?id=100173
    150     bool m_override : 1;
    151     bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit.
    152     bool m_startsSegment : 1; // Same comment as m_hasHyphen.
    153     unsigned char m_level;
    154     BidiCharacterRun* m_next;
    155     int m_start;
    156     int m_stop;
    157 };
    158 
    159 enum VisualDirectionOverride {
    160     NoVisualOverride,
    161     VisualLeftToRightOverride,
    162     VisualRightToLeftOverride
    163 };
    164 
    165 // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
    166 // http://unicode.org/reports/tr9
    167 template <class Iterator, class Run> class BidiResolver {
    168     WTF_MAKE_NONCOPYABLE(BidiResolver);
    169 public:
    170     BidiResolver()
    171         : m_direction(WTF::Unicode::OtherNeutral)
    172         , m_reachedEndOfLine(false)
    173         , m_emptyRun(true)
    174         , m_nestedIsolateCount(0)
    175     {
    176     }
    177 
    178 #ifndef NDEBUG
    179     ~BidiResolver();
    180 #endif
    181 
    182     const Iterator& position() const { return m_current; }
    183     Iterator& position() { return m_current; }
    184     void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
    185     void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
    186     {
    187         m_current = position;
    188         m_nestedIsolateCount = nestedIsolatedCount;
    189     }
    190 
    191     BidiContext* context() const { return m_status.context.get(); }
    192     void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
    193 
    194     void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
    195     void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
    196     void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
    197 
    198     WTF::Unicode::Direction dir() const { return m_direction; }
    199     void setDir(WTF::Unicode::Direction d) { m_direction = d; }
    200 
    201     const BidiStatus& status() const { return m_status; }
    202     void setStatus(const BidiStatus s)
    203     {
    204         ASSERT(s.context);
    205         m_status = s;
    206     }
    207 
    208     MidpointState<Iterator>& midpointState() { return m_midpointState; }
    209 
    210     // The current algorithm handles nested isolates one layer of nesting at a time.
    211     // But when we layout each isolated span, we will walk into (and ignore) all
    212     // child isolated spans.
    213     void enterIsolate() { m_nestedIsolateCount++; }
    214     void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; }
    215     bool inIsolate() const { return m_nestedIsolateCount; }
    216 
    217     void embed(WTF::Unicode::Direction, BidiEmbeddingSource);
    218     bool commitExplicitEmbedding();
    219 
    220     void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false);
    221 
    222     BidiRunList<Run>& runs() { return m_runs; }
    223 
    224     // FIXME: This used to be part of deleteRuns() but was a layering violation.
    225     // It's unclear if this is still needed.
    226     void markCurrentRunEmpty() { m_emptyRun = true; }
    227 
    228     Vector<Run*>& isolatedRuns() { return m_isolatedRuns; }
    229 
    230     bool isEndOfLine(const Iterator& end) { return m_current == end || m_current.atEnd(); }
    231 
    232     TextDirection determineParagraphDirectionality(bool* hasStrongDirectionality = 0);
    233 
    234     void setMidpointStateForIsolatedRun(Run*, const MidpointState<Iterator>&);
    235     MidpointState<Iterator> midpointStateForIsolatedRun(Run*);
    236 
    237     Iterator endOfLine() const { return m_endOfLine; }
    238 
    239 protected:
    240     void increment() { m_current.increment(); }
    241     // FIXME: Instead of InlineBidiResolvers subclassing this method, we should
    242     // pass in some sort of Traits object which knows how to create runs for appending.
    243     void appendRun();
    244 
    245     Iterator m_current;
    246     // sor and eor are "start of run" and "end of run" respectively and correpond
    247     // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7
    248     Iterator m_sor; // Points to the first character in the current run.
    249     Iterator m_eor; // Points to the last character in the current run.
    250     Iterator m_last;
    251     BidiStatus m_status;
    252     WTF::Unicode::Direction m_direction;
    253     // m_endOfRunAtEndOfLine is "the position last eor in the end of line"
    254     Iterator m_endOfRunAtEndOfLine;
    255     Iterator m_endOfLine;
    256     bool m_reachedEndOfLine;
    257     Iterator m_lastBeforeET; // Before a EuropeanNumberTerminator
    258     bool m_emptyRun;
    259 
    260     // FIXME: This should not belong to the resolver, but rather be passed
    261     // into createBidiRunsForLine by the caller.
    262     BidiRunList<Run> m_runs;
    263 
    264     MidpointState<Iterator> m_midpointState;
    265 
    266     unsigned m_nestedIsolateCount;
    267     Vector<Run*> m_isolatedRuns;
    268 
    269 private:
    270     void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to);
    271     void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from);
    272     void checkDirectionInLowerRaiseEmbeddingLevel();
    273 
    274     void updateStatusLastFromCurrentDirection(WTF::Unicode::Direction);
    275     void reorderRunsFromLevels();
    276 
    277     Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
    278     HashMap<Run *, MidpointState<Iterator> > m_midpointStateForIsolatedRun;
    279 };
    280 
    281 #ifndef NDEBUG
    282 template <class Iterator, class Run>
    283 BidiResolver<Iterator, Run>::~BidiResolver()
    284 {
    285     // The owner of this resolver should have handled the isolated runs.
    286     ASSERT(m_isolatedRuns.isEmpty());
    287 }
    288 #endif
    289 
    290 template <class Iterator, class Run>
    291 void BidiResolver<Iterator, Run>::appendRun()
    292 {
    293     if (!m_emptyRun && !m_eor.atEnd()) {
    294         unsigned startOffset = m_sor.offset();
    295         unsigned endOffset = m_eor.offset();
    296 
    297         if (!m_endOfRunAtEndOfLine.atEnd() && endOffset >= m_endOfRunAtEndOfLine.offset()) {
    298             m_reachedEndOfLine = true;
    299             endOffset = m_endOfRunAtEndOfLine.offset();
    300         }
    301 
    302         if (endOffset >= startOffset)
    303             m_runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
    304 
    305         m_eor.increment();
    306         m_sor = m_eor;
    307     }
    308 
    309     m_direction = WTF::Unicode::OtherNeutral;
    310     m_status.eor = WTF::Unicode::OtherNeutral;
    311 }
    312 
    313 template <class Iterator, class Run>
    314 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction dir, BidiEmbeddingSource source)
    315 {
    316     // Isolated spans compute base directionality during their own UBA run.
    317     // Do not insert fake embed characters once we enter an isolated span.
    318     ASSERT(!inIsolate());
    319     using namespace WTF::Unicode;
    320 
    321     ASSERT(dir == PopDirectionalFormat || dir == LeftToRightEmbedding || dir == LeftToRightOverride || dir == RightToLeftEmbedding || dir == RightToLeftOverride);
    322     m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
    323 }
    324 
    325 template <class Iterator, class Run>
    326 void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
    327 {
    328     using namespace WTF::Unicode;
    329 
    330     ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
    331     ASSERT(m_status.last != NonSpacingMark
    332         && m_status.last != BoundaryNeutral
    333         && m_status.last != RightToLeftEmbedding
    334         && m_status.last != LeftToRightEmbedding
    335         && m_status.last != RightToLeftOverride
    336         && m_status.last != LeftToRightOverride
    337         && m_status.last != PopDirectionalFormat);
    338     if (m_direction == OtherNeutral)
    339         m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
    340 }
    341 
    342 template <class Iterator, class Run>
    343 void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)
    344 {
    345     using namespace WTF::Unicode;
    346 
    347     if (!m_emptyRun && m_eor != m_last) {
    348         checkDirectionInLowerRaiseEmbeddingLevel();
    349         // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
    350         if (from == LeftToRight) {
    351             // bidi.sor ... bidi.eor ... bidi.last L
    352             if (m_status.eor == EuropeanNumber) {
    353                 if (m_status.lastStrong != LeftToRight) {
    354                     m_direction = EuropeanNumber;
    355                     appendRun();
    356                 }
    357             } else if (m_status.eor == ArabicNumber) {
    358                 m_direction = ArabicNumber;
    359                 appendRun();
    360             } else if (m_status.lastStrong != LeftToRight) {
    361                 appendRun();
    362                 m_direction = LeftToRight;
    363             }
    364         } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
    365             appendRun();
    366             m_direction = RightToLeft;
    367         }
    368         m_eor = m_last;
    369     }
    370 
    371     appendRun();
    372     m_emptyRun = true;
    373 
    374     // sor for the new run is determined by the higher level (rule X10)
    375     setLastDir(from);
    376     setLastStrongDir(from);
    377     m_eor = Iterator();
    378 }
    379 
    380 template <class Iterator, class Run>
    381 void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to)
    382 {
    383     using namespace WTF::Unicode;
    384 
    385     if (!m_emptyRun && m_eor != m_last) {
    386         checkDirectionInLowerRaiseEmbeddingLevel();
    387         // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
    388         if (to == LeftToRight) {
    389             // bidi.sor ... bidi.eor ... bidi.last L
    390             if (m_status.eor == EuropeanNumber) {
    391                 if (m_status.lastStrong != LeftToRight) {
    392                     m_direction = EuropeanNumber;
    393                     appendRun();
    394                 }
    395             } else if (m_status.eor == ArabicNumber) {
    396                 m_direction = ArabicNumber;
    397                 appendRun();
    398             } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
    399                 appendRun();
    400                 m_direction = LeftToRight;
    401             }
    402         } else if (m_status.eor == ArabicNumber
    403             || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
    404             || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
    405             appendRun();
    406             m_direction = RightToLeft;
    407         }
    408         m_eor = m_last;
    409     }
    410 
    411     appendRun();
    412     m_emptyRun = true;
    413 
    414     setLastDir(to);
    415     setLastStrongDir(to);
    416     m_eor = Iterator();
    417 }
    418 
    419 template <class Iterator, class Run>
    420 bool BidiResolver<Iterator, Run>::commitExplicitEmbedding()
    421 {
    422     // When we're "inIsolate()" we're resolving the parent context which
    423     // ignores (skips over) the isolated content, including embedding levels.
    424     // We should never accrue embedding levels while skipping over isolated content.
    425     ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
    426 
    427     using namespace WTF::Unicode;
    428 
    429     unsigned char fromLevel = context()->level();
    430     RefPtr<BidiContext> toContext = context();
    431 
    432     for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
    433         BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i];
    434         if (embedding.direction() == PopDirectionalFormat) {
    435             if (BidiContext* parentContext = toContext->parent())
    436                 toContext = parentContext;
    437         } else {
    438             Direction direction = (embedding.direction() == RightToLeftEmbedding || embedding.direction() == RightToLeftOverride) ? RightToLeft : LeftToRight;
    439             bool override = embedding.direction() == LeftToRightOverride || embedding.direction() == RightToLeftOverride;
    440             unsigned char level = toContext->level();
    441             if (direction == RightToLeft)
    442                 level = nextGreaterOddLevel(level);
    443             else
    444                 level = nextGreaterEvenLevel(level);
    445             if (level < BidiContext::kMaxLevel)
    446                 toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get());
    447         }
    448     }
    449 
    450     unsigned char toLevel = toContext->level();
    451 
    452     if (toLevel > fromLevel)
    453         raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
    454     else if (toLevel < fromLevel)
    455         lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight);
    456 
    457     setContext(toContext);
    458 
    459     m_currentExplicitEmbeddingSequence.clear();
    460 
    461     return fromLevel != toLevel;
    462 }
    463 
    464 template <class Iterator, class Run>
    465 inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent)
    466 {
    467     using namespace WTF::Unicode;
    468     switch (dirCurrent) {
    469     case EuropeanNumberTerminator:
    470         if (m_status.last != EuropeanNumber)
    471             m_status.last = EuropeanNumberTerminator;
    472         break;
    473     case EuropeanNumberSeparator:
    474     case CommonNumberSeparator:
    475     case SegmentSeparator:
    476     case WhiteSpaceNeutral:
    477     case OtherNeutral:
    478         switch (m_status.last) {
    479         case LeftToRight:
    480         case RightToLeft:
    481         case RightToLeftArabic:
    482         case EuropeanNumber:
    483         case ArabicNumber:
    484             m_status.last = dirCurrent;
    485             break;
    486         default:
    487             m_status.last = OtherNeutral;
    488         }
    489         break;
    490     case NonSpacingMark:
    491     case BoundaryNeutral:
    492     case RightToLeftEmbedding:
    493     case LeftToRightEmbedding:
    494     case RightToLeftOverride:
    495     case LeftToRightOverride:
    496     case PopDirectionalFormat:
    497         // ignore these
    498         break;
    499     case EuropeanNumber:
    500         // fall through
    501     default:
    502         m_status.last = dirCurrent;
    503     }
    504 }
    505 
    506 template <class Iterator, class Run>
    507 inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels()
    508 {
    509     unsigned char levelLow = BidiContext::kMaxLevel;
    510     unsigned char levelHigh = 0;
    511     for (Run* run = m_runs.firstRun(); run; run = run->next()) {
    512         levelHigh = std::max(run->level(), levelHigh);
    513         levelLow = std::min(run->level(), levelLow);
    514     }
    515 
    516     // This implements reordering of the line (L2 according to Bidi spec):
    517     // http://unicode.org/reports/tr9/#L2
    518     // L2. From the highest level found in the text to the lowest odd level on each line,
    519     // reverse any contiguous sequence of characters that are at that level or higher.
    520 
    521     // Reversing is only done up to the lowest odd level.
    522     if (!(levelLow % 2))
    523         levelLow++;
    524 
    525     unsigned count = m_runs.runCount() - 1;
    526 
    527     while (levelHigh >= levelLow) {
    528         unsigned i = 0;
    529         Run* run = m_runs.firstRun();
    530         while (i < count) {
    531             for (;i < count && run && run->level() < levelHigh; i++)
    532                 run = run->next();
    533             unsigned start = i;
    534             for (;i <= count && run && run->level() >= levelHigh; i++)
    535                 run = run->next();
    536             unsigned end = i - 1;
    537             m_runs.reverseRuns(start, end);
    538         }
    539         levelHigh--;
    540     }
    541 }
    542 
    543 template <class Iterator, class Run>
    544 TextDirection BidiResolver<Iterator, Run>::determineParagraphDirectionality(bool* hasStrongDirectionality)
    545 {
    546     while (!m_current.atEnd()) {
    547         if (inIsolate()) {
    548             increment();
    549             continue;
    550         }
    551         if (m_current.atParagraphSeparator())
    552             break;
    553         UChar32 current = m_current.current();
    554         if (UNLIKELY(U16_IS_SURROGATE(current))) {
    555             increment();
    556             // If this not the high part of the surrogate pair, then drop it and move to the next.
    557             if (!U16_IS_SURROGATE_LEAD(current))
    558                 continue;
    559             UChar high = static_cast<UChar>(current);
    560             if (m_current.atEnd())
    561                 continue;
    562             UChar low = m_current.current();
    563             // Verify the low part. If invalid, then assume an invalid surrogate pair and retry.
    564             if (!U16_IS_TRAIL(low))
    565                 continue;
    566             current = U16_GET_SUPPLEMENTARY(high, low);
    567         }
    568         WTF::Unicode::Direction charDirection = WTF::Unicode::direction(current);
    569         if (charDirection == WTF::Unicode::LeftToRight) {
    570             if (hasStrongDirectionality)
    571                 *hasStrongDirectionality = true;
    572             return LTR;
    573         }
    574         if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
    575             if (hasStrongDirectionality)
    576                 *hasStrongDirectionality = true;
    577             return RTL;
    578         }
    579         increment();
    580     }
    581     if (hasStrongDirectionality)
    582         *hasStrongDirectionality = false;
    583     return LTR;
    584 }
    585 
    586 template <class Iterator, class Run>
    587 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak)
    588 {
    589     using namespace WTF::Unicode;
    590 
    591     ASSERT(m_direction == OtherNeutral);
    592 
    593     m_endOfLine = end;
    594 
    595     if (override != NoVisualOverride) {
    596         m_emptyRun = false;
    597         m_sor = m_current;
    598         m_eor = Iterator();
    599         while (m_current != end && !m_current.atEnd()) {
    600             m_eor = m_current;
    601             increment();
    602         }
    603         m_direction = override == VisualLeftToRightOverride ? LeftToRight : RightToLeft;
    604         appendRun();
    605         m_runs.setLogicallyLastRun(m_runs.lastRun());
    606         if (override == VisualRightToLeftOverride && m_runs.runCount())
    607             m_runs.reverseRuns(0, m_runs.runCount() - 1);
    608         return;
    609     }
    610 
    611     m_emptyRun = true;
    612 
    613     m_eor = Iterator();
    614 
    615     m_last = m_current;
    616     bool lastLineEnded = false;
    617     BidiResolver<Iterator, Run> stateAtEnd;
    618 
    619     while (true) {
    620         if (inIsolate() && m_emptyRun) {
    621             m_sor = m_current;
    622             m_emptyRun = false;
    623         }
    624 
    625         if (!lastLineEnded && isEndOfLine(end)) {
    626             if (m_emptyRun)
    627                 break;
    628 
    629             stateAtEnd.m_status = m_status;
    630             stateAtEnd.m_sor = m_sor;
    631             stateAtEnd.m_eor = m_eor;
    632             stateAtEnd.m_last = m_last;
    633             stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine;
    634             stateAtEnd.m_lastBeforeET = m_lastBeforeET;
    635             stateAtEnd.m_emptyRun = m_emptyRun;
    636             m_endOfRunAtEndOfLine = m_last;
    637             lastLineEnded = true;
    638         }
    639         Direction dirCurrent;
    640         if (lastLineEnded && (hardLineBreak || m_current.atEnd())) {
    641             BidiContext* c = context();
    642             if (hardLineBreak) {
    643                 // A deviation from the Unicode Bidi Algorithm in order to match
    644                 // WinIE and user expectations: hard line breaks reset bidi state
    645                 // coming from unicode bidi control characters, but not those from
    646                 // DOM nodes with specified directionality
    647                 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts());
    648 
    649                 dirCurrent = stateAtEnd.context()->dir();
    650                 stateAtEnd.setEorDir(dirCurrent);
    651                 stateAtEnd.setLastDir(dirCurrent);
    652                 stateAtEnd.setLastStrongDir(dirCurrent);
    653             } else {
    654                 while (c->parent())
    655                     c = c->parent();
    656                 dirCurrent = c->dir();
    657             }
    658         } else {
    659             dirCurrent = m_current.direction();
    660             if (context()->override()
    661                 && dirCurrent != RightToLeftEmbedding
    662                 && dirCurrent != LeftToRightEmbedding
    663                 && dirCurrent != RightToLeftOverride
    664                 && dirCurrent != LeftToRightOverride
    665                 && dirCurrent != PopDirectionalFormat)
    666                 dirCurrent = context()->dir();
    667             else if (dirCurrent == NonSpacingMark)
    668                 dirCurrent = m_status.last;
    669         }
    670 
    671         // We ignore all character directionality while in unicode-bidi: isolate spans.
    672         // We'll handle ordering the isolated characters in a second pass.
    673         if (inIsolate())
    674             dirCurrent = OtherNeutral;
    675 
    676         ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
    677         switch (dirCurrent) {
    678 
    679         // embedding and overrides (X1-X9 in the Bidi specs)
    680         case RightToLeftEmbedding:
    681         case LeftToRightEmbedding:
    682         case RightToLeftOverride:
    683         case LeftToRightOverride:
    684         case PopDirectionalFormat:
    685             embed(dirCurrent, FromUnicode);
    686             commitExplicitEmbedding();
    687             break;
    688 
    689         // strong types
    690         case LeftToRight:
    691             switch (m_status.last) {
    692             case RightToLeft:
    693             case RightToLeftArabic:
    694             case EuropeanNumber:
    695             case ArabicNumber:
    696                 if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
    697                     appendRun();
    698                 break;
    699             case LeftToRight:
    700                 break;
    701             case EuropeanNumberSeparator:
    702             case EuropeanNumberTerminator:
    703             case CommonNumberSeparator:
    704             case BoundaryNeutral:
    705             case BlockSeparator:
    706             case SegmentSeparator:
    707             case WhiteSpaceNeutral:
    708             case OtherNeutral:
    709                 if (m_status.eor == EuropeanNumber) {
    710                     if (m_status.lastStrong != LeftToRight) {
    711                         // the numbers need to be on a higher embedding level, so let's close that run
    712                         m_direction = EuropeanNumber;
    713                         appendRun();
    714                         if (context()->dir() != LeftToRight) {
    715                             // the neutrals take the embedding direction, which is R
    716                             m_eor = m_last;
    717                             m_direction = RightToLeft;
    718                             appendRun();
    719                         }
    720                     }
    721                 } else if (m_status.eor == ArabicNumber) {
    722                     // Arabic numbers are always on a higher embedding level, so let's close that run
    723                     m_direction = ArabicNumber;
    724                     appendRun();
    725                     if (context()->dir() != LeftToRight) {
    726                         // the neutrals take the embedding direction, which is R
    727                         m_eor = m_last;
    728                         m_direction = RightToLeft;
    729                         appendRun();
    730                     }
    731                 } else if (m_status.lastStrong != LeftToRight) {
    732                     // last stuff takes embedding dir
    733                     if (context()->dir() == RightToLeft) {
    734                         m_eor = m_last;
    735                         m_direction = RightToLeft;
    736                     }
    737                     appendRun();
    738                 }
    739             default:
    740                 break;
    741             }
    742             m_eor = m_current;
    743             m_status.eor = LeftToRight;
    744             m_status.lastStrong = LeftToRight;
    745             m_direction = LeftToRight;
    746             break;
    747         case RightToLeftArabic:
    748         case RightToLeft:
    749             switch (m_status.last) {
    750             case LeftToRight:
    751             case EuropeanNumber:
    752             case ArabicNumber:
    753                 appendRun();
    754             case RightToLeft:
    755             case RightToLeftArabic:
    756                 break;
    757             case EuropeanNumberSeparator:
    758             case EuropeanNumberTerminator:
    759             case CommonNumberSeparator:
    760             case BoundaryNeutral:
    761             case BlockSeparator:
    762             case SegmentSeparator:
    763             case WhiteSpaceNeutral:
    764             case OtherNeutral:
    765                 if (m_status.eor == EuropeanNumber) {
    766                     if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
    767                         m_eor = m_last;
    768                     appendRun();
    769                 } else if (m_status.eor == ArabicNumber) {
    770                     appendRun();
    771                 } else if (m_status.lastStrong == LeftToRight) {
    772                     if (context()->dir() == LeftToRight)
    773                         m_eor = m_last;
    774                     appendRun();
    775                 }
    776             default:
    777                 break;
    778             }
    779             m_eor = m_current;
    780             m_status.eor = RightToLeft;
    781             m_status.lastStrong = dirCurrent;
    782             m_direction = RightToLeft;
    783             break;
    784 
    785             // weak types:
    786 
    787         case EuropeanNumber:
    788             if (m_status.lastStrong != RightToLeftArabic) {
    789                 // if last strong was AL change EN to AN
    790                 switch (m_status.last) {
    791                 case EuropeanNumber:
    792                 case LeftToRight:
    793                     break;
    794                 case RightToLeft:
    795                 case RightToLeftArabic:
    796                 case ArabicNumber:
    797                     m_eor = m_last;
    798                     appendRun();
    799                     m_direction = EuropeanNumber;
    800                     break;
    801                 case EuropeanNumberSeparator:
    802                 case CommonNumberSeparator:
    803                     if (m_status.eor == EuropeanNumber)
    804                         break;
    805                 case EuropeanNumberTerminator:
    806                 case BoundaryNeutral:
    807                 case BlockSeparator:
    808                 case SegmentSeparator:
    809                 case WhiteSpaceNeutral:
    810                 case OtherNeutral:
    811                     if (m_status.eor == EuropeanNumber) {
    812                         if (m_status.lastStrong == RightToLeft) {
    813                             // ENs on both sides behave like Rs, so the neutrals should be R.
    814                             // Terminate the EN run.
    815                             appendRun();
    816                             // Make an R run.
    817                             m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
    818                             m_direction = RightToLeft;
    819                             appendRun();
    820                             // Begin a new EN run.
    821                             m_direction = EuropeanNumber;
    822                         }
    823                     } else if (m_status.eor == ArabicNumber) {
    824                         // Terminate the AN run.
    825                         appendRun();
    826                         if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
    827                             // Make an R run.
    828                             m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
    829                             m_direction = RightToLeft;
    830                             appendRun();
    831                             // Begin a new EN run.
    832                             m_direction = EuropeanNumber;
    833                         }
    834                     } else if (m_status.lastStrong == RightToLeft) {
    835                         // Extend the R run to include the neutrals.
    836                         m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
    837                         m_direction = RightToLeft;
    838                         appendRun();
    839                         // Begin a new EN run.
    840                         m_direction = EuropeanNumber;
    841                     }
    842                 default:
    843                     break;
    844                 }
    845                 m_eor = m_current;
    846                 m_status.eor = EuropeanNumber;
    847                 if (m_direction == OtherNeutral)
    848                     m_direction = LeftToRight;
    849                 break;
    850             }
    851         case ArabicNumber:
    852             dirCurrent = ArabicNumber;
    853             switch (m_status.last) {
    854             case LeftToRight:
    855                 if (context()->dir() == LeftToRight)
    856                     appendRun();
    857                 break;
    858             case ArabicNumber:
    859                 break;
    860             case RightToLeft:
    861             case RightToLeftArabic:
    862             case EuropeanNumber:
    863                 m_eor = m_last;
    864                 appendRun();
    865                 break;
    866             case CommonNumberSeparator:
    867                 if (m_status.eor == ArabicNumber)
    868                     break;
    869             case EuropeanNumberSeparator:
    870             case EuropeanNumberTerminator:
    871             case BoundaryNeutral:
    872             case BlockSeparator:
    873             case SegmentSeparator:
    874             case WhiteSpaceNeutral:
    875             case OtherNeutral:
    876                 if (m_status.eor == ArabicNumber
    877                     || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
    878                     || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
    879                     // Terminate the run before the neutrals.
    880                     appendRun();
    881                     // Begin an R run for the neutrals.
    882                     m_direction = RightToLeft;
    883                 } else if (m_direction == OtherNeutral) {
    884                     m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
    885                 }
    886                 m_eor = m_last;
    887                 appendRun();
    888             default:
    889                 break;
    890             }
    891             m_eor = m_current;
    892             m_status.eor = ArabicNumber;
    893             if (m_direction == OtherNeutral)
    894                 m_direction = ArabicNumber;
    895             break;
    896         case EuropeanNumberSeparator:
    897         case CommonNumberSeparator:
    898             break;
    899         case EuropeanNumberTerminator:
    900             if (m_status.last == EuropeanNumber) {
    901                 dirCurrent = EuropeanNumber;
    902                 m_eor = m_current;
    903                 m_status.eor = dirCurrent;
    904             } else if (m_status.last != EuropeanNumberTerminator) {
    905                 m_lastBeforeET = m_emptyRun ? m_eor : m_last;
    906             }
    907             break;
    908 
    909         // boundary neutrals should be ignored
    910         case BoundaryNeutral:
    911             if (m_eor == m_last)
    912                 m_eor = m_current;
    913             break;
    914             // neutrals
    915         case BlockSeparator:
    916             // ### what do we do with newline and paragraph seperators that come to here?
    917             break;
    918         case SegmentSeparator:
    919             // ### implement rule L1
    920             break;
    921         case WhiteSpaceNeutral:
    922             break;
    923         case OtherNeutral:
    924             break;
    925         default:
    926             break;
    927         }
    928 
    929         if (lastLineEnded && m_eor == m_current) {
    930             if (!m_reachedEndOfLine) {
    931                 m_eor = m_endOfRunAtEndOfLine;
    932                 switch (m_status.eor) {
    933                 case LeftToRight:
    934                 case RightToLeft:
    935                 case ArabicNumber:
    936                     m_direction = m_status.eor;
    937                     break;
    938                 case EuropeanNumber:
    939                     m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
    940                     break;
    941                 default:
    942                     ASSERT_NOT_REACHED();
    943                 }
    944                 appendRun();
    945             }
    946             m_current = end;
    947             m_status = stateAtEnd.m_status;
    948             m_sor = stateAtEnd.m_sor;
    949             m_eor = stateAtEnd.m_eor;
    950             m_last = stateAtEnd.m_last;
    951             m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
    952             m_lastBeforeET = stateAtEnd.m_lastBeforeET;
    953             m_emptyRun = stateAtEnd.m_emptyRun;
    954             m_direction = OtherNeutral;
    955             break;
    956         }
    957 
    958         updateStatusLastFromCurrentDirection(dirCurrent);
    959         m_last = m_current;
    960 
    961         if (m_emptyRun) {
    962             m_sor = m_current;
    963             m_emptyRun = false;
    964         }
    965 
    966         increment();
    967         if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
    968             bool committed = commitExplicitEmbedding();
    969             if (committed && lastLineEnded) {
    970                 m_current = end;
    971                 m_status = stateAtEnd.m_status;
    972                 m_sor = stateAtEnd.m_sor;
    973                 m_eor = stateAtEnd.m_eor;
    974                 m_last = stateAtEnd.m_last;
    975                 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
    976                 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
    977                 m_emptyRun = stateAtEnd.m_emptyRun;
    978                 m_direction = OtherNeutral;
    979                 break;
    980             }
    981         }
    982     }
    983 
    984     m_runs.setLogicallyLastRun(m_runs.lastRun());
    985     reorderRunsFromLevels();
    986     m_endOfRunAtEndOfLine = Iterator();
    987     m_endOfLine = Iterator();
    988 }
    989 
    990 template <class Iterator, class Run>
    991 void BidiResolver<Iterator, Run>::setMidpointStateForIsolatedRun(Run* run, const MidpointState<Iterator>& midpoint)
    992 {
    993     ASSERT(!m_midpointStateForIsolatedRun.contains(run));
    994     m_midpointStateForIsolatedRun.add(run, midpoint);
    995 }
    996 
    997 template<class Iterator, class Run>
    998 MidpointState<Iterator> BidiResolver<Iterator, Run>::midpointStateForIsolatedRun(Run* run)
    999 {
   1000     return m_midpointStateForIsolatedRun.take(run);
   1001 }
   1002 
   1003 
   1004 } // namespace WebCore
   1005 
   1006 #endif // BidiResolver_h
   1007