Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2013 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 #include "platform/text/BidiResolver.h"
     33 
     34 #include "platform/text/BidiTestHarness.h"
     35 #include "platform/text/TextRunIterator.h"
     36 #include "wtf/OwnPtr.h"
     37 #include <fstream>
     38 #include <gtest/gtest.h>
     39 
     40 namespace {
     41 
     42 using namespace WTF;
     43 using namespace blink;
     44 
     45 TEST(BidiResolver, Basic)
     46 {
     47     bool hasStrongDirectionality;
     48     String value("foo");
     49     TextRun run(value);
     50     BidiResolver<TextRunIterator, BidiCharacterRun> bidiResolver;
     51     bidiResolver.setStatus(BidiStatus(run.direction(), run.directionalOverride()));
     52     bidiResolver.setPositionIgnoringNestedIsolates(TextRunIterator(&run, 0));
     53     TextDirection direction = bidiResolver.determineParagraphDirectionality(&hasStrongDirectionality);
     54     EXPECT_TRUE(hasStrongDirectionality);
     55     EXPECT_EQ(LTR, direction);
     56 }
     57 
     58 TextDirection determineParagraphDirectionality(const TextRun& textRun, bool* hasStrongDirectionality = 0)
     59 {
     60     BidiResolver<TextRunIterator, BidiCharacterRun> resolver;
     61     resolver.setStatus(BidiStatus(LTR, false));
     62     resolver.setPositionIgnoringNestedIsolates(TextRunIterator(&textRun, 0));
     63     return resolver.determineParagraphDirectionality(hasStrongDirectionality);
     64 }
     65 
     66 struct TestData {
     67     UChar text[3];
     68     size_t length;
     69     TextDirection expectedDirection;
     70     bool expectedStrong;
     71 };
     72 
     73 void testDirectionality(const TestData& entry)
     74 {
     75     bool hasStrongDirectionality;
     76     String data(entry.text, entry.length);
     77     TextRun run(data);
     78     TextDirection direction = determineParagraphDirectionality(run, &hasStrongDirectionality);
     79     EXPECT_EQ(entry.expectedStrong, hasStrongDirectionality);
     80     EXPECT_EQ(entry.expectedDirection, direction);
     81 }
     82 
     83 TEST(BidiResolver, ParagraphDirectionSurrogates)
     84 {
     85     const TestData testData[] = {
     86         // Test strong RTL, non-BMP. (U+10858 Imperial Aramaic number one, strong RTL)
     87         { { 0xD802, 0xDC58 }, 2, RTL, true },
     88 
     89         // Test strong LTR, non-BMP. (U+1D15F Musical symbol quarter note, strong LTR)
     90         { { 0xD834, 0xDD5F }, 2, LTR, true },
     91 
     92         // Test broken surrogate: valid leading, invalid trail. (Lead of U+10858, space)
     93         { { 0xD802, ' ' }, 2, LTR, false },
     94 
     95         // Test broken surrogate: invalid leading. (Trail of U+10858, U+05D0 Hebrew Alef)
     96         { { 0xDC58, 0x05D0 }, 2, RTL, true },
     97 
     98         // Test broken surrogate: valid leading, invalid trail/valid lead, valid trail.
     99         { { 0xD802, 0xD802, 0xDC58 }, 3, RTL, true },
    100 
    101         // Test broken surrogate: valid leading, no trail (string too short). (Lead of U+10858)
    102         { { 0xD802, 0xDC58 }, 1, LTR, false },
    103 
    104         // Test broken surrogate: trail appearing before lead. (U+10858 units reversed)
    105         { { 0xDC58, 0xD802 }, 2, LTR, false }
    106     };
    107     for (size_t i = 0; i < WTF_ARRAY_LENGTH(testData); ++i)
    108         testDirectionality(testData[i]);
    109 }
    110 
    111 class BidiTestRunner {
    112 public:
    113     BidiTestRunner()
    114         : m_testsRun(0)
    115         , m_testsSkipped(0)
    116         , m_ignoredCharFailures(0)
    117         , m_levelFailures(0)
    118         , m_orderFailures(0)
    119     {
    120     }
    121 
    122     void skipTestsWith(UChar codepoint)
    123     {
    124         m_skippedCodePoints.insert(codepoint);
    125     }
    126 
    127     void runTest(const std::basic_string<UChar>& input, const std::vector<int>& reorder,
    128         const std::vector<int>& levels, bidi_test::ParagraphDirection,
    129         const std::string& line, size_t lineNumber);
    130 
    131     size_t m_testsRun;
    132     size_t m_testsSkipped;
    133     std::set<UChar> m_skippedCodePoints;
    134     size_t m_ignoredCharFailures;
    135     size_t m_levelFailures;
    136     size_t m_orderFailures;
    137 };
    138 
    139 // Blink's UBA does not filter out control characters, etc. Maybe it should?
    140 // Instead it depends on later layers of Blink to simply ignore them.
    141 // This function helps us emulate that to be compatible with BidiTest.txt expectations.
    142 static bool isNonRenderedCodePoint(UChar c)
    143 {
    144     // The tests also expect us to ignore soft-hyphen.
    145     if (c == 0xAD)
    146         return true;
    147     // Control characters are not rendered:
    148     return c >= 0x202A && c <= 0x202E;
    149     // But it seems to expect LRI, etc. to be rendered!?
    150 }
    151 
    152 std::string diffString(const std::vector<int>& actual, const std::vector<int>& expected)
    153 {
    154     std::ostringstream diff;
    155     diff << "actual: ";
    156     // This is the magical way to print a vector to a stream, clear, right?
    157     std::copy(actual.begin(), actual.end(), std::ostream_iterator<int>(diff, " "));
    158     diff << " expected: ";
    159     std::copy(expected.begin(), expected.end(), std::ostream_iterator<int>(diff, " "));
    160     return diff.str();
    161 }
    162 
    163 void BidiTestRunner::runTest(const std::basic_string<UChar>& input, const std::vector<int>& expectedOrder,
    164     const std::vector<int>& expectedLevels, bidi_test::ParagraphDirection paragraphDirection,
    165     const std::string& line, size_t lineNumber)
    166 {
    167     if (!m_skippedCodePoints.empty()) {
    168         for (size_t i = 0; i < input.size(); i++) {
    169             if (m_skippedCodePoints.count(input[i])) {
    170                 m_testsSkipped++;
    171                 return;
    172             }
    173         }
    174     }
    175 
    176     m_testsRun++;
    177 
    178     TextRun textRun(input.data(), input.size());
    179     switch (paragraphDirection) {
    180     case bidi_test::DirectionAutoLTR:
    181         textRun.setDirection(determineParagraphDirectionality(textRun));
    182         break;
    183     case bidi_test::DirectionLTR:
    184         textRun.setDirection(LTR);
    185         break;
    186     case bidi_test::DirectionRTL:
    187         textRun.setDirection(RTL);
    188         break;
    189     }
    190     BidiResolver<TextRunIterator, BidiCharacterRun> resolver;
    191     resolver.setStatus(BidiStatus(textRun.direction(), textRun.directionalOverride()));
    192     resolver.setPositionIgnoringNestedIsolates(TextRunIterator(&textRun, 0));
    193 
    194     BidiRunList<BidiCharacterRun>& runs = resolver.runs();
    195     resolver.createBidiRunsForLine(TextRunIterator(&textRun, textRun.length()));
    196 
    197     std::ostringstream errorContext;
    198     errorContext << ", line " << lineNumber << " \"" << line << "\"";
    199     errorContext << " context: " << bidi_test::nameFromParagraphDirection(paragraphDirection);
    200 
    201     std::vector<int> actualOrder;
    202     std::vector<int> actualLevels;
    203     actualLevels.assign(input.size(), -1);
    204     BidiCharacterRun* run = runs.firstRun();
    205     while (run) {
    206         // Blink's UBA just makes runs, the actual ordering of the display of characters
    207         // is handled later in our pipeline, so we fake it here:
    208         bool reversed = run->reversed(false);
    209         ASSERT(run->stop() >= run->start());
    210         size_t length = run->stop() - run->start();
    211         for (size_t i = 0; i < length; i++) {
    212             int inputIndex = reversed ? run->stop() - i - 1 : run->start() + i;
    213             if (!isNonRenderedCodePoint(input[inputIndex]))
    214                 actualOrder.push_back(inputIndex);
    215             // BidiTest.txt gives expected level data in the order of the original input.
    216             actualLevels[inputIndex] = run->level();
    217         }
    218         run = run->next();
    219     }
    220 
    221     if (expectedOrder.size() != actualOrder.size()) {
    222         m_ignoredCharFailures++;
    223         EXPECT_EQ(expectedOrder.size(), actualOrder.size()) << errorContext.str();
    224     } else if (expectedOrder != actualOrder) {
    225         m_orderFailures++;
    226         printf("ORDER %s%s\n", diffString(actualOrder, expectedOrder).c_str(), errorContext.str().c_str());
    227     }
    228 
    229     if (expectedLevels.size() != actualLevels.size()) {
    230         m_ignoredCharFailures++;
    231         EXPECT_EQ(expectedLevels.size(), actualLevels.size()) << errorContext.str();
    232     } else {
    233         for (size_t i = 0; i < expectedLevels.size(); i++) {
    234             // level == -1 means the level should be ignored.
    235             if (expectedLevels[i] == actualLevels[i] || expectedLevels[i] == -1)
    236                 continue;
    237 
    238             printf("LEVELS %s%s\n", diffString(actualLevels, expectedLevels).c_str(), errorContext.str().c_str());
    239             m_levelFailures++;
    240             break;
    241         }
    242     }
    243     runs.deleteRuns();
    244 }
    245 
    246 
    247 TEST(BidiResolver, BidiTest_txt)
    248 {
    249     BidiTestRunner runner;
    250     // Blink's Unicode Bidi Algorithm (UBA) doesn't yet support the
    251     // new isolate directives from Unicode 6.3:
    252     // http://www.unicode.org/reports/tr9/#Explicit_Directional_Isolates
    253     runner.skipTestsWith(0x2066); // LRI
    254     runner.skipTestsWith(0x2067); // RLI
    255     runner.skipTestsWith(0x2068); // FSI
    256     runner.skipTestsWith(0x2069); // PDI
    257 
    258     // This code wants to use PathService from base/path_service.h
    259     // but we aren't allowed to depend on base/ directly from Blink yet.
    260     // Alternatively we could use:
    261     // blink::Platform::current()->unitTestSupport()->webKitRootDir()
    262     // and a relative path, but that would require running inside
    263     // webkit_unit_tests (to have a functioning Platform object).
    264     // The file we want is:
    265     // src/third_party/icu/source/test/testdata/BidiTest.txt
    266     // but we don't have any good way to find it from this unittest.
    267     // Just assume we're running this test manually for now. On the
    268     // bots we just print a warning that we can't find the test file.
    269     std::string bidiTestPath = "BidiTest.txt";
    270     std::ifstream bidiTestFile(bidiTestPath.c_str());
    271     if (!bidiTestFile.is_open()) {
    272         printf("ERROR: Failed to open BidiTest.txt, cannot run tests.\n");
    273         return;
    274     }
    275 
    276     bidi_test::Harness<BidiTestRunner> harness(runner);
    277     harness.parse(bidiTestFile);
    278     bidiTestFile.close();
    279 
    280     if (runner.m_testsSkipped)
    281         printf("WARNING: Skipped %zu tests.\n", runner.m_testsSkipped);
    282     printf("Ran %zu tests: %zu level failures %zu order failures.\n",
    283         runner.m_testsRun, runner.m_levelFailures, runner.m_orderFailures);
    284 
    285     // The unittest harness only pays attention to GTest output, so we verify
    286     // that the tests behaved as expected:
    287     EXPECT_EQ(352098u, runner.m_testsRun);
    288     EXPECT_EQ(418143u, runner.m_testsSkipped);
    289     EXPECT_EQ(0u, runner.m_ignoredCharFailures);
    290     EXPECT_EQ(44882u, runner.m_levelFailures);
    291     EXPECT_EQ(19151u, runner.m_orderFailures);
    292 }
    293 
    294 }
    295