1 /* 2 * Copyright (C) 2013 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef BidiTestHarness_h 32 #define BidiTestHarness_h 33 34 #include <istream> 35 #include <map> 36 #include <stdio.h> 37 #include <string> 38 #include <vector> 39 40 // FIXME: We don't have any business owning this code. We should try to 41 // upstream this to unicode.org if possible (for other implementations to use). 42 // Unicode.org provides a reference implmentation, including parser: 43 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c 44 // But it, like the other implementations I've found, is rather tied to 45 // the algorithms it is testing. This file seeks to only implement the parser bits. 46 47 // Other C/C++ implementations of this parser: 48 // https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c 49 // http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidiconf.cpp 50 // Both of those are too tied to their respective projects to be use to Blink. 51 52 // There are non-C implmentations to parse BidiTest.txt as well, including: 53 // https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb 54 55 // NOTE: None of this file is currently written to be thread-safe. 56 57 namespace bidi_test { 58 59 enum ParagraphDirection { 60 DirectionAutoLTR = 1, 61 DirectionLTR = 2, 62 DirectionRTL = 4, 63 }; 64 const int kMaxParagraphDirection = DirectionAutoLTR | DirectionLTR | DirectionRTL; 65 66 // For error printing: 67 std::string nameFromParagraphDirection(ParagraphDirection paragraphDirection) 68 { 69 switch (paragraphDirection) { 70 case bidi_test::DirectionAutoLTR: 71 return "Auto-LTR"; 72 case bidi_test::DirectionLTR: 73 return "LTR"; 74 case bidi_test::DirectionRTL: 75 return "RTL"; 76 } 77 // This should never be reached. 78 return ""; 79 } 80 81 template<class Runner> 82 class Harness { 83 public: 84 Harness(Runner& runner) 85 : m_runner(runner) 86 { 87 } 88 void parse(std::istream& bidiTestFile); 89 90 private: 91 Runner& m_runner; 92 }; 93 94 // We could use boost::trim, but no other part of Blink uses boost yet. 95 inline void ltrim(std::string& s) 96 { 97 static const std::string separators(" \t"); 98 s.erase(0, s.find_first_not_of(separators)); 99 } 100 101 inline void rtrim(std::string& s) 102 { 103 static const std::string separators(" \t"); 104 size_t lastNonSpace = s.find_last_not_of(separators); 105 if (lastNonSpace == std::string::npos) { 106 s.erase(); 107 return; 108 } 109 size_t firstSpaceAtEndOfString = lastNonSpace + 1; 110 if (firstSpaceAtEndOfString >= s.size()) 111 return; // lastNonSpace was the last char. 112 s.erase(firstSpaceAtEndOfString, std::string::npos); // erase to the end of the string. 113 } 114 115 inline void trim(std::string& s) 116 { 117 rtrim(s); 118 ltrim(s); 119 } 120 121 static std::vector<std::string> parseStringList(const std::string& str) 122 { 123 std::vector<std::string> strings; 124 static const std::string separators(" \t"); 125 size_t lastPos = str.find_first_not_of(separators); // skip leading spaces 126 size_t pos = str.find_first_of(separators, lastPos); // find next space 127 128 while (std::string::npos != pos || std::string::npos != lastPos) { 129 strings.push_back(str.substr(lastPos, pos - lastPos)); 130 lastPos = str.find_first_not_of(separators, pos); 131 pos = str.find_first_of(separators, lastPos); 132 } 133 return strings; 134 } 135 136 static std::vector<int> parseIntList(const std::string& str) 137 { 138 std::vector<int> ints; 139 std::vector<std::string> strings = parseStringList(str); 140 for (size_t x = 0; x < strings.size(); x++) { 141 int i = atoi(strings[x].c_str()); 142 ints.push_back(i); 143 } 144 return ints; 145 } 146 147 static std::vector<int> parseLevels(const std::string& line) 148 { 149 std::vector<int> levels; 150 std::vector<std::string> strings = parseStringList(line); 151 for (size_t x = 0; x < strings.size(); x++) { 152 const std::string& levelString = strings[x]; 153 int i; 154 if (levelString == "x") 155 i = -1; 156 else 157 i = atoi(levelString.c_str()); 158 levels.push_back(i); 159 } 160 return levels; 161 } 162 163 // This is not thread-safe as written. 164 static std::basic_string<UChar> parseTestString(const std::string& line) 165 { 166 std::basic_string<UChar> testString; 167 static std::map<std::string, UChar> charClassExamples; 168 if (charClassExamples.empty()) { 169 // FIXME: Explicit make_pair is ugly, but required for C++98 compat. 170 charClassExamples.insert(std::make_pair("L", 0x6c)); // 'l' for L 171 charClassExamples.insert(std::make_pair("R", 0x05D0)); // HEBREW ALEF 172 charClassExamples.insert(std::make_pair("EN", 0x33)); // '3' for EN 173 charClassExamples.insert(std::make_pair("ES", 0x2d)); // '-' for ES 174 charClassExamples.insert(std::make_pair("ET", 0x25)); // '%' for ET 175 charClassExamples.insert(std::make_pair("AN", 0x0660)); // arabic 0 176 charClassExamples.insert(std::make_pair("CS", 0x2c)); // ',' for CS 177 charClassExamples.insert(std::make_pair("B", 0x0A)); // <control-000A> 178 charClassExamples.insert(std::make_pair("S", 0x09)); // <control-0009> 179 charClassExamples.insert(std::make_pair("WS", 0x20)); // ' ' for WS 180 charClassExamples.insert(std::make_pair("ON", 0x3d)); // '=' for ON 181 charClassExamples.insert(std::make_pair("NSM", 0x05BF)); // HEBREW POINT RAFE 182 charClassExamples.insert(std::make_pair("AL", 0x0608)); // ARABIC RAY 183 charClassExamples.insert(std::make_pair("BN", 0x00AD)); // SOFT HYPHEN 184 charClassExamples.insert(std::make_pair("LRE", 0x202A)); 185 charClassExamples.insert(std::make_pair("RLE", 0x202B)); 186 charClassExamples.insert(std::make_pair("PDF", 0x202C)); 187 charClassExamples.insert(std::make_pair("LRO", 0x202D)); 188 charClassExamples.insert(std::make_pair("RLO", 0x202E)); 189 charClassExamples.insert(std::make_pair("LRI", 0x2066)); 190 charClassExamples.insert(std::make_pair("RLI", 0x2067)); 191 charClassExamples.insert(std::make_pair("FSI", 0x2068)); 192 charClassExamples.insert(std::make_pair("PDI", 0x2069)); 193 } 194 195 std::vector<std::string> charClasses = parseStringList(line); 196 for (size_t i = 0; i < charClasses.size(); i++) { 197 // FIXME: If the lookup failed we could return false for a parse error. 198 testString.push_back(charClassExamples.find(charClasses[i])->second); 199 } 200 return testString; 201 } 202 203 static bool parseParagraphDirectionMask(const std::string& line, int& modeMask) 204 { 205 modeMask = atoi(line.c_str()); 206 return modeMask >= 1 && modeMask <= kMaxParagraphDirection; 207 } 208 209 static void parseError(const std::string& line, size_t lineNumber) 210 { 211 // Use printf to avoid the expense of std::cout. 212 printf("Parse error, line %zu : %s\n", lineNumber, line.c_str()); 213 } 214 215 template<class Runner> 216 void Harness<Runner>::parse(std::istream& bidiTestFile) 217 { 218 static const std::string levelsPrefix("@Levels"); 219 static const std::string reorderPrefix("@Reorder"); 220 221 // FIXME: UChar is an ICU type and cheating a bit to use here. 222 // uint16_t might be more portable. 223 std::basic_string<UChar> testString; 224 std::vector<int> levels; 225 std::vector<int> reorder; 226 int paragraphDirectionMask; 227 228 std::string line; 229 size_t lineNumber = 0; 230 while (std::getline(bidiTestFile, line)) { 231 lineNumber++; 232 const std::string originalLine = line; 233 size_t commentStart = line.find_first_of('#'); 234 if (commentStart != std::string::npos) 235 line = line.substr(0, commentStart); 236 trim(line); 237 if (line.empty()) 238 continue; 239 if (line[0] == '@') { 240 if (!line.find(levelsPrefix)) { 241 levels = parseLevels(line.substr(levelsPrefix.length() + 1)); 242 continue; 243 } 244 if (!line.find(reorderPrefix)) { 245 reorder = parseIntList(line.substr(reorderPrefix.length() + 1)); 246 continue; 247 } 248 } else { 249 // Assume it's a data line. 250 size_t seperatorIndex = line.find_first_of(';'); 251 if (seperatorIndex == std::string::npos) { 252 parseError(originalLine, lineNumber); 253 continue; 254 } 255 testString = parseTestString(line.substr(0, seperatorIndex)); 256 if (!parseParagraphDirectionMask(line.substr(seperatorIndex + 1), paragraphDirectionMask)) { 257 parseError(originalLine, lineNumber); 258 continue; 259 } 260 261 if (paragraphDirectionMask & DirectionAutoLTR) 262 m_runner.runTest(testString, reorder, levels, DirectionAutoLTR, originalLine, lineNumber); 263 if (paragraphDirectionMask & DirectionLTR) 264 m_runner.runTest(testString, reorder, levels, DirectionLTR, originalLine, lineNumber); 265 if (paragraphDirectionMask & DirectionRTL) 266 m_runner.runTest(testString, reorder, levels, DirectionRTL, originalLine, lineNumber); 267 } 268 } 269 } 270 271 } // namespace bidi_test 272 273 #endif // BidiTestHarness_h 274