Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2000 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc.  All right reserved.
      4  *
      5  * This library is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU Library General Public
      7  * License as published by the Free Software Foundation; either
      8  * version 2 of the License, or (at your option) any later version.
      9  *
     10  * This library is distributed in the hope that it will be useful,
     11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * Library General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU Library General Public License
     16  * along with this library; see the file COPYING.LIB.  If not, write to
     17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  * Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 
     22 #ifndef BidiResolver_h
     23 #define BidiResolver_h
     24 
     25 #include "platform/text/BidiCharacterRun.h"
     26 #include "platform/text/BidiContext.h"
     27 #include "platform/text/BidiRunList.h"
     28 #include "platform/text/TextDirection.h"
     29 #include "wtf/HashMap.h"
     30 #include "wtf/Noncopyable.h"
     31 #include "wtf/PassRefPtr.h"
     32 #include "wtf/Vector.h"
     33 
     34 namespace blink {
     35 
     36 class RenderObject;
     37 
     38 template <class Iterator> class MidpointState {
     39 public:
     40     MidpointState()
     41     {
     42         reset();
     43     }
     44 
     45     void reset()
     46     {
     47         m_numMidpoints = 0;
     48         m_currentMidpoint = 0;
     49         m_betweenMidpoints = false;
     50     }
     51 
     52     void startIgnoringSpaces(const Iterator& midpoint)
     53     {
     54         ASSERT(!(m_numMidpoints % 2));
     55         addMidpoint(midpoint);
     56     }
     57 
     58     void stopIgnoringSpaces(const Iterator& midpoint)
     59     {
     60         ASSERT(m_numMidpoints % 2);
     61         addMidpoint(midpoint);
     62     }
     63 
     64     // When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or
     65     // hard line breaks to ensure that they're not ignored.
     66     void ensureLineBoxInsideIgnoredSpaces(RenderObject* renderer)
     67     {
     68         Iterator midpoint(0, renderer, 0);
     69         stopIgnoringSpaces(midpoint);
     70         startIgnoringSpaces(midpoint);
     71     }
     72 
     73     // Adding a pair of midpoints before a character will split it out into a new line box.
     74     void ensureCharacterGetsLineBox(Iterator& textParagraphSeparator)
     75     {
     76         startIgnoringSpaces(Iterator(0, textParagraphSeparator.object(), textParagraphSeparator.offset() - 1));
     77         stopIgnoringSpaces(Iterator(0, textParagraphSeparator.object(), textParagraphSeparator.offset()));
     78     }
     79 
     80     void checkMidpoints(Iterator& lBreak)
     81     {
     82         // Check to see if our last midpoint is a start point beyond the line break. If so,
     83         // shave it off the list, and shave off a trailing space if the previous end point doesn't
     84         // preserve whitespace.
     85         if (lBreak.object() && m_numMidpoints && !(m_numMidpoints % 2)) {
     86             Iterator* midpointsIterator = m_midpoints.data();
     87             Iterator& endpoint = midpointsIterator[m_numMidpoints - 2];
     88             const Iterator& startpoint = midpointsIterator[m_numMidpoints - 1];
     89             Iterator currpoint = endpoint;
     90             while (!currpoint.atEnd() && currpoint != startpoint && currpoint != lBreak)
     91                 currpoint.increment();
     92             if (currpoint == lBreak) {
     93                 // We hit the line break before the start point. Shave off the start point.
     94                 m_numMidpoints--;
     95                 if (endpoint.object()->style()->collapseWhiteSpace() && endpoint.object()->isText())
     96                     endpoint.setOffset(endpoint.offset() - 1);
     97             }
     98         }
     99     }
    100 
    101     Vector<Iterator>& midpoints() { return m_midpoints; }
    102     const unsigned& numMidpoints() const { return m_numMidpoints; }
    103     const unsigned& currentMidpoint() const { return m_currentMidpoint; }
    104     void incrementCurrentMidpoint() { m_currentMidpoint++; }
    105     const bool& betweenMidpoints() const { return m_betweenMidpoints; }
    106     void setBetweenMidpoints(bool betweenMidpoint) { m_betweenMidpoints = betweenMidpoint; }
    107 private:
    108     // The goal is to reuse the line state across multiple
    109     // lines so we just keep an array around for midpoints and never clear it across multiple
    110     // lines. We track the number of items and position using the two other variables.
    111     Vector<Iterator> m_midpoints;
    112     unsigned m_numMidpoints;
    113     unsigned m_currentMidpoint;
    114     bool m_betweenMidpoints;
    115 
    116     void addMidpoint(const Iterator& midpoint)
    117     {
    118         if (m_midpoints.size() <= m_numMidpoints)
    119             m_midpoints.grow(m_numMidpoints + 10);
    120 
    121         Iterator* midpointsIterator = m_midpoints.data();
    122         midpointsIterator[m_numMidpoints++] = midpoint;
    123     }
    124 };
    125 
    126 // The BidiStatus at a given position (typically the end of a line) can
    127 // be cached and then used to restart bidi resolution at that position.
    128 struct BidiStatus {
    129     BidiStatus()
    130         : eor(WTF::Unicode::OtherNeutral)
    131         , lastStrong(WTF::Unicode::OtherNeutral)
    132         , last(WTF::Unicode::OtherNeutral)
    133     {
    134     }
    135 
    136     // Creates a BidiStatus representing a new paragraph root with a default direction.
    137     // Uses TextDirection as it only has two possibilities instead of WTF::Unicode::Direction which has 19.
    138     BidiStatus(TextDirection textDirection, bool isOverride)
    139     {
    140         WTF::Unicode::Direction direction = textDirection == LTR ? WTF::Unicode::LeftToRight : WTF::Unicode::RightToLeft;
    141         eor = lastStrong = last = direction;
    142         context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride);
    143     }
    144 
    145     BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
    146         : eor(eorDir)
    147         , lastStrong(lastStrongDir)
    148         , last(lastDir)
    149         , context(bidiContext)
    150     {
    151     }
    152 
    153     WTF::Unicode::Direction eor;
    154     WTF::Unicode::Direction lastStrong;
    155     WTF::Unicode::Direction last;
    156     RefPtr<BidiContext> context;
    157 };
    158 
    159 class BidiEmbedding {
    160 public:
    161     BidiEmbedding(WTF::Unicode::Direction direction, BidiEmbeddingSource source)
    162     : m_direction(direction)
    163     , m_source(source)
    164     {
    165     }
    166 
    167     WTF::Unicode::Direction direction() const { return m_direction; }
    168     BidiEmbeddingSource source() const { return m_source; }
    169 private:
    170     WTF::Unicode::Direction m_direction;
    171     BidiEmbeddingSource m_source;
    172 };
    173 
    174 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
    175 {
    176     return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
    177 }
    178 
    179 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
    180 {
    181     return !(status1 == status2);
    182 }
    183 
    184 enum VisualDirectionOverride {
    185     NoVisualOverride,
    186     VisualLeftToRightOverride,
    187     VisualRightToLeftOverride
    188 };
    189 
    190 // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
    191 // http://unicode.org/reports/tr9
    192 template <class Iterator, class Run> class BidiResolver {
    193     WTF_MAKE_NONCOPYABLE(BidiResolver);
    194 public:
    195     BidiResolver()
    196         : m_direction(WTF::Unicode::OtherNeutral)
    197         , m_reachedEndOfLine(false)
    198         , m_emptyRun(true)
    199         , m_nestedIsolateCount(0)
    200         , m_trailingSpaceRun(0)
    201     {
    202     }
    203 
    204 #if ENABLE(ASSERT)
    205     ~BidiResolver();
    206 #endif
    207 
    208     const Iterator& position() const { return m_current; }
    209     Iterator& position() { return m_current; }
    210     void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
    211     void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
    212     {
    213         m_current = position;
    214         m_nestedIsolateCount = nestedIsolatedCount;
    215     }
    216 
    217     BidiContext* context() const { return m_status.context.get(); }
    218     void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
    219 
    220     void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
    221     void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
    222     void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
    223 
    224     WTF::Unicode::Direction dir() const { return m_direction; }
    225     void setDir(WTF::Unicode::Direction d) { m_direction = d; }
    226 
    227     const BidiStatus& status() const { return m_status; }
    228     void setStatus(const BidiStatus s)
    229     {
    230         ASSERT(s.context);
    231         m_status = s;
    232         m_paragraphDirectionality = s.context->dir() == WTF::Unicode::LeftToRight ? LTR : RTL;
    233     }
    234 
    235     MidpointState<Iterator>& midpointState() { return m_midpointState; }
    236 
    237     // The current algorithm handles nested isolates one layer of nesting at a time.
    238     // But when we layout each isolated span, we will walk into (and ignore) all
    239     // child isolated spans.
    240     void enterIsolate() { m_nestedIsolateCount++; }
    241     void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; }
    242     bool inIsolate() const { return m_nestedIsolateCount; }
    243 
    244     void embed(WTF::Unicode::Direction, BidiEmbeddingSource);
    245     bool commitExplicitEmbedding(BidiRunList<Run>&);
    246 
    247     void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false, bool reorderRuns = true);
    248 
    249     BidiRunList<Run>& runs() { return m_runs; }
    250 
    251     // FIXME: This used to be part of deleteRuns() but was a layering violation.
    252     // It's unclear if this is still needed.
    253     void markCurrentRunEmpty() { m_emptyRun = true; }
    254 
    255     Vector<Run*>& isolatedRuns() { return m_isolatedRuns; }
    256 
    257     bool isEndOfLine(const Iterator& end) { return m_current == end || m_current.atEnd(); }
    258 
    259     TextDirection determineParagraphDirectionality(bool* hasStrongDirectionality = 0);
    260 
    261     void setMidpointStateForIsolatedRun(Run*, const MidpointState<Iterator>&);
    262     MidpointState<Iterator> midpointStateForIsolatedRun(Run*);
    263 
    264     Iterator endOfLine() const { return m_endOfLine; }
    265 
    266     Run* trailingSpaceRun() const { return m_trailingSpaceRun; }
    267 
    268 protected:
    269     void increment() { m_current.increment(); }
    270     // FIXME: Instead of InlineBidiResolvers subclassing this method, we should
    271     // pass in some sort of Traits object which knows how to create runs for appending.
    272     void appendRun(BidiRunList<Run>&);
    273 
    274     Run* addTrailingRun(BidiRunList<Run>&, int, int, Run*, BidiContext*, TextDirection) const { return 0; }
    275     Iterator m_current;
    276     // sor and eor are "start of run" and "end of run" respectively and correpond
    277     // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7
    278     Iterator m_sor; // Points to the first character in the current run.
    279     Iterator m_eor; // Points to the last character in the current run.
    280     Iterator m_last;
    281     BidiStatus m_status;
    282     WTF::Unicode::Direction m_direction;
    283     // m_endOfRunAtEndOfLine is "the position last eor in the end of line"
    284     Iterator m_endOfRunAtEndOfLine;
    285     Iterator m_endOfLine;
    286     bool m_reachedEndOfLine;
    287     Iterator m_lastBeforeET; // Before a EuropeanNumberTerminator
    288     bool m_emptyRun;
    289 
    290     // FIXME: This should not belong to the resolver, but rather be passed
    291     // into createBidiRunsForLine by the caller.
    292     BidiRunList<Run> m_runs;
    293 
    294     MidpointState<Iterator> m_midpointState;
    295 
    296     unsigned m_nestedIsolateCount;
    297     Vector<Run*> m_isolatedRuns;
    298     Run* m_trailingSpaceRun;
    299     TextDirection m_paragraphDirectionality;
    300 
    301 private:
    302     void raiseExplicitEmbeddingLevel(BidiRunList<Run>&, WTF::Unicode::Direction from, WTF::Unicode::Direction to);
    303     void lowerExplicitEmbeddingLevel(BidiRunList<Run>&, WTF::Unicode::Direction from);
    304     void checkDirectionInLowerRaiseEmbeddingLevel();
    305 
    306     void updateStatusLastFromCurrentDirection(WTF::Unicode::Direction);
    307     void reorderRunsFromLevels(BidiRunList<Run>&) const;
    308 
    309     bool needsToApplyL1Rule(BidiRunList<Run>&) { return false; }
    310     int findFirstTrailingSpaceAtRun(Run*) { return 0; }
    311     // http://www.unicode.org/reports/tr9/#L1
    312     void applyL1Rule(BidiRunList<Run>&);
    313 
    314     Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
    315     HashMap<Run *, MidpointState<Iterator> > m_midpointStateForIsolatedRun;
    316 };
    317 
    318 #if ENABLE(ASSERT)
    319 template <class Iterator, class Run>
    320 BidiResolver<Iterator, Run>::~BidiResolver()
    321 {
    322     // The owner of this resolver should have handled the isolated runs.
    323     ASSERT(m_isolatedRuns.isEmpty());
    324 }
    325 #endif
    326 
    327 template <class Iterator, class Run>
    328 void BidiResolver<Iterator, Run>::appendRun(BidiRunList<Run>& runs)
    329 {
    330     if (!m_emptyRun && !m_eor.atEnd()) {
    331         unsigned startOffset = m_sor.offset();
    332         unsigned endOffset = m_eor.offset();
    333 
    334         if (!m_endOfRunAtEndOfLine.atEnd() && endOffset >= m_endOfRunAtEndOfLine.offset()) {
    335             m_reachedEndOfLine = true;
    336             endOffset = m_endOfRunAtEndOfLine.offset();
    337         }
    338 
    339         if (endOffset >= startOffset)
    340             runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
    341 
    342         m_eor.increment();
    343         m_sor = m_eor;
    344     }
    345 
    346     m_direction = WTF::Unicode::OtherNeutral;
    347     m_status.eor = WTF::Unicode::OtherNeutral;
    348 }
    349 
    350 template <class Iterator, class Run>
    351 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction dir, BidiEmbeddingSource source)
    352 {
    353     // Isolated spans compute base directionality during their own UBA run.
    354     // Do not insert fake embed characters once we enter an isolated span.
    355     ASSERT(!inIsolate());
    356     using namespace WTF::Unicode;
    357 
    358     ASSERT(dir == PopDirectionalFormat || dir == LeftToRightEmbedding || dir == LeftToRightOverride || dir == RightToLeftEmbedding || dir == RightToLeftOverride);
    359     m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
    360 }
    361 
    362 template <class Iterator, class Run>
    363 void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
    364 {
    365     using namespace WTF::Unicode;
    366 
    367     ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
    368     ASSERT(m_status.last != NonSpacingMark
    369         && m_status.last != BoundaryNeutral
    370         && m_status.last != RightToLeftEmbedding
    371         && m_status.last != LeftToRightEmbedding
    372         && m_status.last != RightToLeftOverride
    373         && m_status.last != LeftToRightOverride
    374         && m_status.last != PopDirectionalFormat);
    375     if (m_direction == OtherNeutral)
    376         m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
    377 }
    378 
    379 template <class Iterator, class Run>
    380 void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(BidiRunList<Run>& runs, WTF::Unicode::Direction from)
    381 {
    382     using namespace WTF::Unicode;
    383 
    384     if (!m_emptyRun && m_eor != m_last) {
    385         checkDirectionInLowerRaiseEmbeddingLevel();
    386         // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
    387         if (from == LeftToRight) {
    388             // bidi.sor ... bidi.eor ... bidi.last L
    389             if (m_status.eor == EuropeanNumber) {
    390                 if (m_status.lastStrong != LeftToRight) {
    391                     m_direction = EuropeanNumber;
    392                     appendRun(runs);
    393                 }
    394             } else if (m_status.eor == ArabicNumber) {
    395                 m_direction = ArabicNumber;
    396                 appendRun(runs);
    397             } else if (m_status.lastStrong != LeftToRight) {
    398                 appendRun(runs);
    399                 m_direction = LeftToRight;
    400             }
    401         } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
    402             appendRun(runs);
    403             m_direction = RightToLeft;
    404         }
    405         m_eor = m_last;
    406     }
    407 
    408     appendRun(runs);
    409     m_emptyRun = true;
    410 
    411     // sor for the new run is determined by the higher level (rule X10)
    412     setLastDir(from);
    413     setLastStrongDir(from);
    414     m_eor = Iterator();
    415 }
    416 
    417 template <class Iterator, class Run>
    418 void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(BidiRunList<Run>& runs, WTF::Unicode::Direction from, WTF::Unicode::Direction to)
    419 {
    420     using namespace WTF::Unicode;
    421 
    422     if (!m_emptyRun && m_eor != m_last) {
    423         checkDirectionInLowerRaiseEmbeddingLevel();
    424         // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
    425         if (to == LeftToRight) {
    426             // bidi.sor ... bidi.eor ... bidi.last L
    427             if (m_status.eor == EuropeanNumber) {
    428                 if (m_status.lastStrong != LeftToRight) {
    429                     m_direction = EuropeanNumber;
    430                     appendRun(runs);
    431                 }
    432             } else if (m_status.eor == ArabicNumber) {
    433                 m_direction = ArabicNumber;
    434                 appendRun(runs);
    435             } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
    436                 appendRun(runs);
    437                 m_direction = LeftToRight;
    438             }
    439         } else if (m_status.eor == ArabicNumber
    440             || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
    441             || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
    442             appendRun(runs);
    443             m_direction = RightToLeft;
    444         }
    445         m_eor = m_last;
    446     }
    447 
    448     appendRun(runs);
    449     m_emptyRun = true;
    450 
    451     setLastDir(to);
    452     setLastStrongDir(to);
    453     m_eor = Iterator();
    454 }
    455 
    456 template <class Iterator, class Run>
    457 void BidiResolver<Iterator, Run>::applyL1Rule(BidiRunList<Run>& runs)
    458 {
    459     ASSERT(runs.runCount());
    460     if (!needsToApplyL1Rule(runs))
    461         return;
    462 
    463     Run* trailingSpaceRun = runs.logicallyLastRun();
    464 
    465     int firstSpace = findFirstTrailingSpaceAtRun(trailingSpaceRun);
    466     if (firstSpace == trailingSpaceRun->stop())
    467         return;
    468 
    469     bool shouldReorder = trailingSpaceRun != (m_paragraphDirectionality == LTR ? runs.lastRun() : runs.firstRun());
    470     if (firstSpace != trailingSpaceRun->start()) {
    471         BidiContext* baseContext = context();
    472         while (BidiContext* parent = baseContext->parent())
    473             baseContext = parent;
    474 
    475         m_trailingSpaceRun = addTrailingRun(runs, firstSpace, trailingSpaceRun->m_stop, trailingSpaceRun, baseContext, m_paragraphDirectionality);
    476         ASSERT(m_trailingSpaceRun);
    477         trailingSpaceRun->m_stop = firstSpace;
    478         return;
    479     }
    480     if (!shouldReorder) {
    481         m_trailingSpaceRun = trailingSpaceRun;
    482         return;
    483     }
    484 
    485     if (m_paragraphDirectionality == LTR) {
    486         runs.moveRunToEnd(trailingSpaceRun);
    487         trailingSpaceRun->m_level = 0;
    488     } else {
    489         runs.moveRunToBeginning(trailingSpaceRun);
    490         trailingSpaceRun->m_level = 1;
    491     }
    492     m_trailingSpaceRun = trailingSpaceRun;
    493 }
    494 
    495 template <class Iterator, class Run>
    496 bool BidiResolver<Iterator, Run>::commitExplicitEmbedding(BidiRunList<Run>& runs)
    497 {
    498     // When we're "inIsolate()" we're resolving the parent context which
    499     // ignores (skips over) the isolated content, including embedding levels.
    500     // We should never accrue embedding levels while skipping over isolated content.
    501     ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
    502 
    503     using namespace WTF::Unicode;
    504 
    505     unsigned char fromLevel = context()->level();
    506     RefPtr<BidiContext> toContext = context();
    507 
    508     for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
    509         BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i];
    510         if (embedding.direction() == PopDirectionalFormat) {
    511             if (BidiContext* parentContext = toContext->parent())
    512                 toContext = parentContext;
    513         } else {
    514             Direction direction = (embedding.direction() == RightToLeftEmbedding || embedding.direction() == RightToLeftOverride) ? RightToLeft : LeftToRight;
    515             bool override = embedding.direction() == LeftToRightOverride || embedding.direction() == RightToLeftOverride;
    516             unsigned char level = toContext->level();
    517             if (direction == RightToLeft)
    518                 level = nextGreaterOddLevel(level);
    519             else
    520                 level = nextGreaterEvenLevel(level);
    521             if (level < BidiContext::kMaxLevel)
    522                 toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get());
    523         }
    524     }
    525 
    526     unsigned char toLevel = toContext->level();
    527 
    528     if (toLevel > fromLevel)
    529         raiseExplicitEmbeddingLevel(runs, fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
    530     else if (toLevel < fromLevel)
    531         lowerExplicitEmbeddingLevel(runs, fromLevel % 2 ? RightToLeft : LeftToRight);
    532 
    533     setContext(toContext);
    534 
    535     m_currentExplicitEmbeddingSequence.clear();
    536 
    537     return fromLevel != toLevel;
    538 }
    539 
    540 template <class Iterator, class Run>
    541 inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(WTF::Unicode::Direction dirCurrent)
    542 {
    543     using namespace WTF::Unicode;
    544     switch (dirCurrent) {
    545     case EuropeanNumberTerminator:
    546         if (m_status.last != EuropeanNumber)
    547             m_status.last = EuropeanNumberTerminator;
    548         break;
    549     case EuropeanNumberSeparator:
    550     case CommonNumberSeparator:
    551     case SegmentSeparator:
    552     case WhiteSpaceNeutral:
    553     case OtherNeutral:
    554         switch (m_status.last) {
    555         case LeftToRight:
    556         case RightToLeft:
    557         case RightToLeftArabic:
    558         case EuropeanNumber:
    559         case ArabicNumber:
    560             m_status.last = dirCurrent;
    561             break;
    562         default:
    563             m_status.last = OtherNeutral;
    564         }
    565         break;
    566     case NonSpacingMark:
    567     case BoundaryNeutral:
    568     case RightToLeftEmbedding:
    569     case LeftToRightEmbedding:
    570     case RightToLeftOverride:
    571     case LeftToRightOverride:
    572     case PopDirectionalFormat:
    573         // ignore these
    574         break;
    575     case EuropeanNumber:
    576         // fall through
    577     default:
    578         m_status.last = dirCurrent;
    579     }
    580 }
    581 
    582 template <class Iterator, class Run>
    583 inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels(BidiRunList<Run>& runs) const
    584 {
    585     unsigned char levelLow = BidiContext::kMaxLevel;
    586     unsigned char levelHigh = 0;
    587     for (Run* run = runs.firstRun(); run; run = run->next()) {
    588         levelHigh = std::max(run->level(), levelHigh);
    589         levelLow = std::min(run->level(), levelLow);
    590     }
    591 
    592     // This implements reordering of the line (L2 according to Bidi spec):
    593     // http://unicode.org/reports/tr9/#L2
    594     // L2. From the highest level found in the text to the lowest odd level on each line,
    595     // reverse any contiguous sequence of characters that are at that level or higher.
    596 
    597     // Reversing is only done up to the lowest odd level.
    598     if (!(levelLow % 2))
    599         levelLow++;
    600 
    601     unsigned count = runs.runCount() - 1;
    602 
    603     while (levelHigh >= levelLow) {
    604         unsigned i = 0;
    605         Run* run = runs.firstRun();
    606         while (i < count) {
    607             for (;i < count && run && run->level() < levelHigh; i++)
    608                 run = run->next();
    609             unsigned start = i;
    610             for (;i <= count && run && run->level() >= levelHigh; i++)
    611                 run = run->next();
    612             unsigned end = i - 1;
    613             runs.reverseRuns(start, end);
    614         }
    615         levelHigh--;
    616     }
    617 }
    618 
    619 template <class Iterator, class Run>
    620 TextDirection BidiResolver<Iterator, Run>::determineParagraphDirectionality(bool* hasStrongDirectionality)
    621 {
    622     while (!m_current.atEnd()) {
    623         if (inIsolate()) {
    624             increment();
    625             continue;
    626         }
    627         if (m_current.atParagraphSeparator())
    628             break;
    629         UChar32 current = m_current.current();
    630         if (UNLIKELY(U16_IS_SURROGATE(current))) {
    631             increment();
    632             // If this not the high part of the surrogate pair, then drop it and move to the next.
    633             if (!U16_IS_SURROGATE_LEAD(current))
    634                 continue;
    635             UChar high = static_cast<UChar>(current);
    636             if (m_current.atEnd())
    637                 continue;
    638             UChar low = m_current.current();
    639             // Verify the low part. If invalid, then assume an invalid surrogate pair and retry.
    640             if (!U16_IS_TRAIL(low))
    641                 continue;
    642             current = U16_GET_SUPPLEMENTARY(high, low);
    643         }
    644         WTF::Unicode::Direction charDirection = WTF::Unicode::direction(current);
    645         if (charDirection == WTF::Unicode::LeftToRight) {
    646             if (hasStrongDirectionality)
    647                 *hasStrongDirectionality = true;
    648             return LTR;
    649         }
    650         if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
    651             if (hasStrongDirectionality)
    652                 *hasStrongDirectionality = true;
    653             return RTL;
    654         }
    655         increment();
    656     }
    657     if (hasStrongDirectionality)
    658         *hasStrongDirectionality = false;
    659     return LTR;
    660 }
    661 
    662 template <class Iterator, class Run>
    663 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak, bool reorderRuns)
    664 {
    665     using namespace WTF::Unicode;
    666 
    667     ASSERT(m_direction == OtherNeutral);
    668     m_trailingSpaceRun = 0;
    669 
    670     m_endOfLine = end;
    671 
    672     if (override != NoVisualOverride) {
    673         m_emptyRun = false;
    674         m_sor = m_current;
    675         m_eor = Iterator();
    676         while (m_current != end && !m_current.atEnd()) {
    677             m_eor = m_current;
    678             increment();
    679         }
    680         m_direction = override == VisualLeftToRightOverride ? LeftToRight : RightToLeft;
    681         appendRun(m_runs);
    682         m_runs.setLogicallyLastRun(m_runs.lastRun());
    683         if (override == VisualRightToLeftOverride && m_runs.runCount())
    684             m_runs.reverseRuns(0, m_runs.runCount() - 1);
    685         return;
    686     }
    687 
    688     m_emptyRun = true;
    689 
    690     m_eor = Iterator();
    691 
    692     m_last = m_current;
    693     bool lastLineEnded = false;
    694     BidiResolver<Iterator, Run> stateAtEnd;
    695 
    696     while (true) {
    697         if (inIsolate() && m_emptyRun) {
    698             m_sor = m_current;
    699             m_emptyRun = false;
    700         }
    701 
    702         if (!lastLineEnded && isEndOfLine(end)) {
    703             if (m_emptyRun)
    704                 break;
    705 
    706             stateAtEnd.m_status = m_status;
    707             stateAtEnd.m_sor = m_sor;
    708             stateAtEnd.m_eor = m_eor;
    709             stateAtEnd.m_last = m_last;
    710             stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine;
    711             stateAtEnd.m_lastBeforeET = m_lastBeforeET;
    712             stateAtEnd.m_emptyRun = m_emptyRun;
    713             m_endOfRunAtEndOfLine = m_last;
    714             lastLineEnded = true;
    715         }
    716         Direction dirCurrent;
    717         if (lastLineEnded && (hardLineBreak || m_current.atEnd())) {
    718             BidiContext* c = context();
    719             if (hardLineBreak) {
    720                 // A deviation from the Unicode Bidi Algorithm in order to match
    721                 // WinIE and user expectations: hard line breaks reset bidi state
    722                 // coming from unicode bidi control characters, but not those from
    723                 // DOM nodes with specified directionality
    724                 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts());
    725 
    726                 dirCurrent = stateAtEnd.context()->dir();
    727                 stateAtEnd.setEorDir(dirCurrent);
    728                 stateAtEnd.setLastDir(dirCurrent);
    729                 stateAtEnd.setLastStrongDir(dirCurrent);
    730             } else {
    731                 while (c->parent())
    732                     c = c->parent();
    733                 dirCurrent = c->dir();
    734             }
    735         } else {
    736             dirCurrent = m_current.direction();
    737             if (context()->override()
    738                 && dirCurrent != RightToLeftEmbedding
    739                 && dirCurrent != LeftToRightEmbedding
    740                 && dirCurrent != RightToLeftOverride
    741                 && dirCurrent != LeftToRightOverride
    742                 && dirCurrent != PopDirectionalFormat)
    743                 dirCurrent = context()->dir();
    744             else if (dirCurrent == NonSpacingMark)
    745                 dirCurrent = m_status.last;
    746         }
    747 
    748         // We ignore all character directionality while in unicode-bidi: isolate spans.
    749         // We'll handle ordering the isolated characters in a second pass.
    750         if (inIsolate())
    751             dirCurrent = OtherNeutral;
    752 
    753         ASSERT(m_status.eor != OtherNeutral || m_eor.atEnd());
    754         switch (dirCurrent) {
    755 
    756         // embedding and overrides (X1-X9 in the Bidi specs)
    757         case RightToLeftEmbedding:
    758         case LeftToRightEmbedding:
    759         case RightToLeftOverride:
    760         case LeftToRightOverride:
    761         case PopDirectionalFormat:
    762             embed(dirCurrent, FromUnicode);
    763             commitExplicitEmbedding(m_runs);
    764             break;
    765 
    766         // strong types
    767         case LeftToRight:
    768             switch (m_status.last) {
    769             case RightToLeft:
    770             case RightToLeftArabic:
    771             case EuropeanNumber:
    772             case ArabicNumber:
    773                 if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
    774                     appendRun(m_runs);
    775                 break;
    776             case LeftToRight:
    777                 break;
    778             case EuropeanNumberSeparator:
    779             case EuropeanNumberTerminator:
    780             case CommonNumberSeparator:
    781             case BoundaryNeutral:
    782             case BlockSeparator:
    783             case SegmentSeparator:
    784             case WhiteSpaceNeutral:
    785             case OtherNeutral:
    786                 if (m_status.eor == EuropeanNumber) {
    787                     if (m_status.lastStrong != LeftToRight) {
    788                         // the numbers need to be on a higher embedding level, so let's close that run
    789                         m_direction = EuropeanNumber;
    790                         appendRun(m_runs);
    791                         if (context()->dir() != LeftToRight) {
    792                             // the neutrals take the embedding direction, which is R
    793                             m_eor = m_last;
    794                             m_direction = RightToLeft;
    795                             appendRun(m_runs);
    796                         }
    797                     }
    798                 } else if (m_status.eor == ArabicNumber) {
    799                     // Arabic numbers are always on a higher embedding level, so let's close that run
    800                     m_direction = ArabicNumber;
    801                     appendRun(m_runs);
    802                     if (context()->dir() != LeftToRight) {
    803                         // the neutrals take the embedding direction, which is R
    804                         m_eor = m_last;
    805                         m_direction = RightToLeft;
    806                         appendRun(m_runs);
    807                     }
    808                 } else if (m_status.lastStrong != LeftToRight) {
    809                     // last stuff takes embedding dir
    810                     if (context()->dir() == RightToLeft) {
    811                         m_eor = m_last;
    812                         m_direction = RightToLeft;
    813                     }
    814                     appendRun(m_runs);
    815                 }
    816             default:
    817                 break;
    818             }
    819             m_eor = m_current;
    820             m_status.eor = LeftToRight;
    821             m_status.lastStrong = LeftToRight;
    822             m_direction = LeftToRight;
    823             break;
    824         case RightToLeftArabic:
    825         case RightToLeft:
    826             switch (m_status.last) {
    827             case LeftToRight:
    828             case EuropeanNumber:
    829             case ArabicNumber:
    830                 appendRun(m_runs);
    831             case RightToLeft:
    832             case RightToLeftArabic:
    833                 break;
    834             case EuropeanNumberSeparator:
    835             case EuropeanNumberTerminator:
    836             case CommonNumberSeparator:
    837             case BoundaryNeutral:
    838             case BlockSeparator:
    839             case SegmentSeparator:
    840             case WhiteSpaceNeutral:
    841             case OtherNeutral:
    842                 if (m_status.eor == EuropeanNumber) {
    843                     if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
    844                         m_eor = m_last;
    845                     appendRun(m_runs);
    846                 } else if (m_status.eor == ArabicNumber) {
    847                     appendRun(m_runs);
    848                 } else if (m_status.lastStrong == LeftToRight) {
    849                     if (context()->dir() == LeftToRight)
    850                         m_eor = m_last;
    851                     appendRun(m_runs);
    852                 }
    853             default:
    854                 break;
    855             }
    856             m_eor = m_current;
    857             m_status.eor = RightToLeft;
    858             m_status.lastStrong = dirCurrent;
    859             m_direction = RightToLeft;
    860             break;
    861 
    862             // weak types:
    863 
    864         case EuropeanNumber:
    865             if (m_status.lastStrong != RightToLeftArabic) {
    866                 // if last strong was AL change EN to AN
    867                 switch (m_status.last) {
    868                 case EuropeanNumber:
    869                 case LeftToRight:
    870                     break;
    871                 case RightToLeft:
    872                 case RightToLeftArabic:
    873                 case ArabicNumber:
    874                     m_eor = m_last;
    875                     appendRun(m_runs);
    876                     m_direction = EuropeanNumber;
    877                     break;
    878                 case EuropeanNumberSeparator:
    879                 case CommonNumberSeparator:
    880                     if (m_status.eor == EuropeanNumber)
    881                         break;
    882                 case EuropeanNumberTerminator:
    883                 case BoundaryNeutral:
    884                 case BlockSeparator:
    885                 case SegmentSeparator:
    886                 case WhiteSpaceNeutral:
    887                 case OtherNeutral:
    888                     if (m_status.eor == EuropeanNumber) {
    889                         if (m_status.lastStrong == RightToLeft) {
    890                             // ENs on both sides behave like Rs, so the neutrals should be R.
    891                             // Terminate the EN run.
    892                             appendRun(m_runs);
    893                             // Make an R run.
    894                             m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
    895                             m_direction = RightToLeft;
    896                             appendRun(m_runs);
    897                             // Begin a new EN run.
    898                             m_direction = EuropeanNumber;
    899                         }
    900                     } else if (m_status.eor == ArabicNumber) {
    901                         // Terminate the AN run.
    902                         appendRun(m_runs);
    903                         if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
    904                             // Make an R run.
    905                             m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
    906                             m_direction = RightToLeft;
    907                             appendRun(m_runs);
    908                             // Begin a new EN run.
    909                             m_direction = EuropeanNumber;
    910                         }
    911                     } else if (m_status.lastStrong == RightToLeft) {
    912                         // Extend the R run to include the neutrals.
    913                         m_eor = m_status.last == EuropeanNumberTerminator ? m_lastBeforeET : m_last;
    914                         m_direction = RightToLeft;
    915                         appendRun(m_runs);
    916                         // Begin a new EN run.
    917                         m_direction = EuropeanNumber;
    918                     }
    919                 default:
    920                     break;
    921                 }
    922                 m_eor = m_current;
    923                 m_status.eor = EuropeanNumber;
    924                 if (m_direction == OtherNeutral)
    925                     m_direction = LeftToRight;
    926                 break;
    927             }
    928         case ArabicNumber:
    929             dirCurrent = ArabicNumber;
    930             switch (m_status.last) {
    931             case LeftToRight:
    932                 if (context()->dir() == LeftToRight)
    933                     appendRun(m_runs);
    934                 break;
    935             case ArabicNumber:
    936                 break;
    937             case RightToLeft:
    938             case RightToLeftArabic:
    939             case EuropeanNumber:
    940                 m_eor = m_last;
    941                 appendRun(m_runs);
    942                 break;
    943             case CommonNumberSeparator:
    944                 if (m_status.eor == ArabicNumber)
    945                     break;
    946             case EuropeanNumberSeparator:
    947             case EuropeanNumberTerminator:
    948             case BoundaryNeutral:
    949             case BlockSeparator:
    950             case SegmentSeparator:
    951             case WhiteSpaceNeutral:
    952             case OtherNeutral:
    953                 if (m_status.eor == ArabicNumber
    954                     || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
    955                     || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
    956                     // Terminate the run before the neutrals.
    957                     appendRun(m_runs);
    958                     // Begin an R run for the neutrals.
    959                     m_direction = RightToLeft;
    960                 } else if (m_direction == OtherNeutral) {
    961                     m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
    962                 }
    963                 m_eor = m_last;
    964                 appendRun(m_runs);
    965             default:
    966                 break;
    967             }
    968             m_eor = m_current;
    969             m_status.eor = ArabicNumber;
    970             if (m_direction == OtherNeutral)
    971                 m_direction = ArabicNumber;
    972             break;
    973         case EuropeanNumberSeparator:
    974         case CommonNumberSeparator:
    975             break;
    976         case EuropeanNumberTerminator:
    977             if (m_status.last == EuropeanNumber) {
    978                 dirCurrent = EuropeanNumber;
    979                 m_eor = m_current;
    980                 m_status.eor = dirCurrent;
    981             } else if (m_status.last != EuropeanNumberTerminator) {
    982                 m_lastBeforeET = m_emptyRun ? m_eor : m_last;
    983             }
    984             break;
    985 
    986         // boundary neutrals should be ignored
    987         case BoundaryNeutral:
    988             if (m_eor == m_last)
    989                 m_eor = m_current;
    990             break;
    991             // neutrals
    992         case BlockSeparator:
    993             // ### what do we do with newline and paragraph seperators that come to here?
    994             break;
    995         case SegmentSeparator:
    996             // ### implement rule L1
    997             break;
    998         case WhiteSpaceNeutral:
    999             break;
   1000         case OtherNeutral:
   1001             break;
   1002         default:
   1003             break;
   1004         }
   1005 
   1006         if (lastLineEnded && m_eor == m_current) {
   1007             if (!m_reachedEndOfLine) {
   1008                 m_eor = m_endOfRunAtEndOfLine;
   1009                 switch (m_status.eor) {
   1010                 case LeftToRight:
   1011                 case RightToLeft:
   1012                 case ArabicNumber:
   1013                     m_direction = m_status.eor;
   1014                     break;
   1015                 case EuropeanNumber:
   1016                     m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
   1017                     break;
   1018                 default:
   1019                     ASSERT_NOT_REACHED();
   1020                 }
   1021                 appendRun(m_runs);
   1022             }
   1023             m_current = end;
   1024             m_status = stateAtEnd.m_status;
   1025             m_sor = stateAtEnd.m_sor;
   1026             m_eor = stateAtEnd.m_eor;
   1027             m_last = stateAtEnd.m_last;
   1028             m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
   1029             m_lastBeforeET = stateAtEnd.m_lastBeforeET;
   1030             m_emptyRun = stateAtEnd.m_emptyRun;
   1031             m_direction = OtherNeutral;
   1032             break;
   1033         }
   1034 
   1035         updateStatusLastFromCurrentDirection(dirCurrent);
   1036         m_last = m_current;
   1037 
   1038         if (m_emptyRun) {
   1039             m_sor = m_current;
   1040             m_emptyRun = false;
   1041         }
   1042 
   1043         increment();
   1044         if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
   1045             bool committed = commitExplicitEmbedding(m_runs);
   1046             if (committed && lastLineEnded) {
   1047                 m_current = end;
   1048                 m_status = stateAtEnd.m_status;
   1049                 m_sor = stateAtEnd.m_sor;
   1050                 m_eor = stateAtEnd.m_eor;
   1051                 m_last = stateAtEnd.m_last;
   1052                 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
   1053                 m_lastBeforeET = stateAtEnd.m_lastBeforeET;
   1054                 m_emptyRun = stateAtEnd.m_emptyRun;
   1055                 m_direction = OtherNeutral;
   1056                 break;
   1057             }
   1058         }
   1059     }
   1060 
   1061     m_runs.setLogicallyLastRun(m_runs.lastRun());
   1062     if (reorderRuns)
   1063         reorderRunsFromLevels(m_runs);
   1064     m_endOfRunAtEndOfLine = Iterator();
   1065     m_endOfLine = Iterator();
   1066 
   1067     if (!hardLineBreak && m_runs.runCount())
   1068         applyL1Rule(m_runs);
   1069 }
   1070 
   1071 template <class Iterator, class Run>
   1072 void BidiResolver<Iterator, Run>::setMidpointStateForIsolatedRun(Run* run, const MidpointState<Iterator>& midpoint)
   1073 {
   1074     ASSERT(!m_midpointStateForIsolatedRun.contains(run));
   1075     m_midpointStateForIsolatedRun.add(run, midpoint);
   1076 }
   1077 
   1078 template<class Iterator, class Run>
   1079 MidpointState<Iterator> BidiResolver<Iterator, Run>::midpointStateForIsolatedRun(Run* run)
   1080 {
   1081     return m_midpointStateForIsolatedRun.take(run);
   1082 }
   1083 
   1084 
   1085 } // namespace blink
   1086 
   1087 #endif // BidiResolver_h
   1088