1 /* 2 * Copyright (C) 2013 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "platform/text/BidiResolver.h" 33 34 #include "platform/text/BidiTestHarness.h" 35 #include "platform/text/TextRunIterator.h" 36 #include "wtf/OwnPtr.h" 37 #include <fstream> 38 #include <gtest/gtest.h> 39 40 namespace { 41 42 using namespace WTF; 43 using namespace blink; 44 45 TEST(BidiResolver, Basic) 46 { 47 bool hasStrongDirectionality; 48 String value("foo"); 49 TextRun run(value); 50 BidiResolver<TextRunIterator, BidiCharacterRun> bidiResolver; 51 bidiResolver.setStatus(BidiStatus(run.direction(), run.directionalOverride())); 52 bidiResolver.setPositionIgnoringNestedIsolates(TextRunIterator(&run, 0)); 53 TextDirection direction = bidiResolver.determineParagraphDirectionality(&hasStrongDirectionality); 54 EXPECT_TRUE(hasStrongDirectionality); 55 EXPECT_EQ(LTR, direction); 56 } 57 58 TextDirection determineParagraphDirectionality(const TextRun& textRun, bool* hasStrongDirectionality = 0) 59 { 60 BidiResolver<TextRunIterator, BidiCharacterRun> resolver; 61 resolver.setStatus(BidiStatus(LTR, false)); 62 resolver.setPositionIgnoringNestedIsolates(TextRunIterator(&textRun, 0)); 63 return resolver.determineParagraphDirectionality(hasStrongDirectionality); 64 } 65 66 struct TestData { 67 UChar text[3]; 68 size_t length; 69 TextDirection expectedDirection; 70 bool expectedStrong; 71 }; 72 73 void testDirectionality(const TestData& entry) 74 { 75 bool hasStrongDirectionality; 76 String data(entry.text, entry.length); 77 TextRun run(data); 78 TextDirection direction = determineParagraphDirectionality(run, &hasStrongDirectionality); 79 EXPECT_EQ(entry.expectedStrong, hasStrongDirectionality); 80 EXPECT_EQ(entry.expectedDirection, direction); 81 } 82 83 TEST(BidiResolver, ParagraphDirectionSurrogates) 84 { 85 const TestData testData[] = { 86 // Test strong RTL, non-BMP. (U+10858 Imperial Aramaic number one, strong RTL) 87 { { 0xD802, 0xDC58 }, 2, RTL, true }, 88 89 // Test strong LTR, non-BMP. (U+1D15F Musical symbol quarter note, strong LTR) 90 { { 0xD834, 0xDD5F }, 2, LTR, true }, 91 92 // Test broken surrogate: valid leading, invalid trail. (Lead of U+10858, space) 93 { { 0xD802, ' ' }, 2, LTR, false }, 94 95 // Test broken surrogate: invalid leading. (Trail of U+10858, U+05D0 Hebrew Alef) 96 { { 0xDC58, 0x05D0 }, 2, RTL, true }, 97 98 // Test broken surrogate: valid leading, invalid trail/valid lead, valid trail. 99 { { 0xD802, 0xD802, 0xDC58 }, 3, RTL, true }, 100 101 // Test broken surrogate: valid leading, no trail (string too short). (Lead of U+10858) 102 { { 0xD802, 0xDC58 }, 1, LTR, false }, 103 104 // Test broken surrogate: trail appearing before lead. (U+10858 units reversed) 105 { { 0xDC58, 0xD802 }, 2, LTR, false } 106 }; 107 for (size_t i = 0; i < WTF_ARRAY_LENGTH(testData); ++i) 108 testDirectionality(testData[i]); 109 } 110 111 class BidiTestRunner { 112 public: 113 BidiTestRunner() 114 : m_testsRun(0) 115 , m_testsSkipped(0) 116 , m_ignoredCharFailures(0) 117 , m_levelFailures(0) 118 , m_orderFailures(0) 119 { 120 } 121 122 void skipTestsWith(UChar codepoint) 123 { 124 m_skippedCodePoints.insert(codepoint); 125 } 126 127 void runTest(const std::basic_string<UChar>& input, const std::vector<int>& reorder, 128 const std::vector<int>& levels, bidi_test::ParagraphDirection, 129 const std::string& line, size_t lineNumber); 130 131 size_t m_testsRun; 132 size_t m_testsSkipped; 133 std::set<UChar> m_skippedCodePoints; 134 size_t m_ignoredCharFailures; 135 size_t m_levelFailures; 136 size_t m_orderFailures; 137 }; 138 139 // Blink's UBA does not filter out control characters, etc. Maybe it should? 140 // Instead it depends on later layers of Blink to simply ignore them. 141 // This function helps us emulate that to be compatible with BidiTest.txt expectations. 142 static bool isNonRenderedCodePoint(UChar c) 143 { 144 // The tests also expect us to ignore soft-hyphen. 145 if (c == 0xAD) 146 return true; 147 // Control characters are not rendered: 148 return c >= 0x202A && c <= 0x202E; 149 // But it seems to expect LRI, etc. to be rendered!? 150 } 151 152 std::string diffString(const std::vector<int>& actual, const std::vector<int>& expected) 153 { 154 std::ostringstream diff; 155 diff << "actual: "; 156 // This is the magical way to print a vector to a stream, clear, right? 157 std::copy(actual.begin(), actual.end(), std::ostream_iterator<int>(diff, " ")); 158 diff << " expected: "; 159 std::copy(expected.begin(), expected.end(), std::ostream_iterator<int>(diff, " ")); 160 return diff.str(); 161 } 162 163 void BidiTestRunner::runTest(const std::basic_string<UChar>& input, const std::vector<int>& expectedOrder, 164 const std::vector<int>& expectedLevels, bidi_test::ParagraphDirection paragraphDirection, 165 const std::string& line, size_t lineNumber) 166 { 167 if (!m_skippedCodePoints.empty()) { 168 for (size_t i = 0; i < input.size(); i++) { 169 if (m_skippedCodePoints.count(input[i])) { 170 m_testsSkipped++; 171 return; 172 } 173 } 174 } 175 176 m_testsRun++; 177 178 TextRun textRun(input.data(), input.size()); 179 switch (paragraphDirection) { 180 case bidi_test::DirectionAutoLTR: 181 textRun.setDirection(determineParagraphDirectionality(textRun)); 182 break; 183 case bidi_test::DirectionLTR: 184 textRun.setDirection(LTR); 185 break; 186 case bidi_test::DirectionRTL: 187 textRun.setDirection(RTL); 188 break; 189 } 190 BidiResolver<TextRunIterator, BidiCharacterRun> resolver; 191 resolver.setStatus(BidiStatus(textRun.direction(), textRun.directionalOverride())); 192 resolver.setPositionIgnoringNestedIsolates(TextRunIterator(&textRun, 0)); 193 194 BidiRunList<BidiCharacterRun>& runs = resolver.runs(); 195 resolver.createBidiRunsForLine(TextRunIterator(&textRun, textRun.length())); 196 197 std::ostringstream errorContext; 198 errorContext << ", line " << lineNumber << " \"" << line << "\""; 199 errorContext << " context: " << bidi_test::nameFromParagraphDirection(paragraphDirection); 200 201 std::vector<int> actualOrder; 202 std::vector<int> actualLevels; 203 actualLevels.assign(input.size(), -1); 204 BidiCharacterRun* run = runs.firstRun(); 205 while (run) { 206 // Blink's UBA just makes runs, the actual ordering of the display of characters 207 // is handled later in our pipeline, so we fake it here: 208 bool reversed = run->reversed(false); 209 ASSERT(run->stop() >= run->start()); 210 size_t length = run->stop() - run->start(); 211 for (size_t i = 0; i < length; i++) { 212 int inputIndex = reversed ? run->stop() - i - 1 : run->start() + i; 213 if (!isNonRenderedCodePoint(input[inputIndex])) 214 actualOrder.push_back(inputIndex); 215 // BidiTest.txt gives expected level data in the order of the original input. 216 actualLevels[inputIndex] = run->level(); 217 } 218 run = run->next(); 219 } 220 221 if (expectedOrder.size() != actualOrder.size()) { 222 m_ignoredCharFailures++; 223 EXPECT_EQ(expectedOrder.size(), actualOrder.size()) << errorContext.str(); 224 } else if (expectedOrder != actualOrder) { 225 m_orderFailures++; 226 printf("ORDER %s%s\n", diffString(actualOrder, expectedOrder).c_str(), errorContext.str().c_str()); 227 } 228 229 if (expectedLevels.size() != actualLevels.size()) { 230 m_ignoredCharFailures++; 231 EXPECT_EQ(expectedLevels.size(), actualLevels.size()) << errorContext.str(); 232 } else { 233 for (size_t i = 0; i < expectedLevels.size(); i++) { 234 // level == -1 means the level should be ignored. 235 if (expectedLevels[i] == actualLevels[i] || expectedLevels[i] == -1) 236 continue; 237 238 printf("LEVELS %s%s\n", diffString(actualLevels, expectedLevels).c_str(), errorContext.str().c_str()); 239 m_levelFailures++; 240 break; 241 } 242 } 243 runs.deleteRuns(); 244 } 245 246 247 TEST(BidiResolver, BidiTest_txt) 248 { 249 BidiTestRunner runner; 250 // Blink's Unicode Bidi Algorithm (UBA) doesn't yet support the 251 // new isolate directives from Unicode 6.3: 252 // http://www.unicode.org/reports/tr9/#Explicit_Directional_Isolates 253 runner.skipTestsWith(0x2066); // LRI 254 runner.skipTestsWith(0x2067); // RLI 255 runner.skipTestsWith(0x2068); // FSI 256 runner.skipTestsWith(0x2069); // PDI 257 258 // This code wants to use PathService from base/path_service.h 259 // but we aren't allowed to depend on base/ directly from Blink yet. 260 // Alternatively we could use: 261 // blink::Platform::current()->unitTestSupport()->webKitRootDir() 262 // and a relative path, but that would require running inside 263 // webkit_unit_tests (to have a functioning Platform object). 264 // The file we want is: 265 // src/third_party/icu/source/test/testdata/BidiTest.txt 266 // but we don't have any good way to find it from this unittest. 267 // Just assume we're running this test manually for now. On the 268 // bots we just print a warning that we can't find the test file. 269 std::string bidiTestPath = "BidiTest.txt"; 270 std::ifstream bidiTestFile(bidiTestPath.c_str()); 271 if (!bidiTestFile.is_open()) { 272 printf("ERROR: Failed to open BidiTest.txt, cannot run tests.\n"); 273 return; 274 } 275 276 bidi_test::Harness<BidiTestRunner> harness(runner); 277 harness.parse(bidiTestFile); 278 bidiTestFile.close(); 279 280 if (runner.m_testsSkipped) 281 printf("WARNING: Skipped %zu tests.\n", runner.m_testsSkipped); 282 printf("Ran %zu tests: %zu level failures %zu order failures.\n", 283 runner.m_testsRun, runner.m_levelFailures, runner.m_orderFailures); 284 285 // The unittest harness only pays attention to GTest output, so we verify 286 // that the tests behaved as expected: 287 EXPECT_EQ(352098u, runner.m_testsRun); 288 EXPECT_EQ(418143u, runner.m_testsSkipped); 289 EXPECT_EQ(0u, runner.m_ignoredCharFailures); 290 EXPECT_EQ(44882u, runner.m_levelFailures); 291 EXPECT_EQ(19151u, runner.m_orderFailures); 292 } 293 294 } 295