Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2013 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #ifndef BidiTestHarness_h
     32 #define BidiTestHarness_h
     33 
     34 #include <istream>
     35 #include <map>
     36 #include <stdio.h>
     37 #include <string>
     38 #include <vector>
     39 
     40 // FIXME: We don't have any business owning this code. We should try to
     41 // upstream this to unicode.org if possible (for other implementations to use).
     42 // Unicode.org provides a reference implmentation, including parser:
     43 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c
     44 // But it, like the other implementations I've found, is rather tied to
     45 // the algorithms it is testing. This file seeks to only implement the parser bits.
     46 
     47 // Other C/C++ implementations of this parser:
     48 // https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c
     49 // http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidiconf.cpp
     50 // Both of those are too tied to their respective projects to be use to Blink.
     51 
     52 // There are non-C implmentations to parse BidiTest.txt as well, including:
     53 // https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb
     54 
     55 // NOTE: None of this file is currently written to be thread-safe.
     56 
     57 namespace bidi_test {
     58 
     59 enum ParagraphDirection {
     60     DirectionAutoLTR = 1,
     61     DirectionLTR = 2,
     62     DirectionRTL = 4,
     63 };
     64 const int kMaxParagraphDirection = DirectionAutoLTR | DirectionLTR | DirectionRTL;
     65 
     66 // For error printing:
     67 std::string nameFromParagraphDirection(ParagraphDirection paragraphDirection)
     68 {
     69     switch (paragraphDirection) {
     70     case bidi_test::DirectionAutoLTR:
     71         return "Auto-LTR";
     72     case bidi_test::DirectionLTR:
     73         return "LTR";
     74     case bidi_test::DirectionRTL:
     75         return "RTL";
     76     }
     77     // This should never be reached.
     78     return "";
     79 }
     80 
     81 template<class Runner>
     82 class Harness {
     83 public:
     84     Harness(Runner& runner)
     85         : m_runner(runner)
     86     {
     87     }
     88     void parse(std::istream& bidiTestFile);
     89 
     90 private:
     91     Runner& m_runner;
     92 };
     93 
     94 // We could use boost::trim, but no other part of Blink uses boost yet.
     95 inline void ltrim(std::string& s)
     96 {
     97     static const std::string separators(" \t");
     98     s.erase(0, s.find_first_not_of(separators));
     99 }
    100 
    101 inline void rtrim(std::string& s)
    102 {
    103     static const std::string separators(" \t");
    104     size_t lastNonSpace = s.find_last_not_of(separators);
    105     if (lastNonSpace == std::string::npos) {
    106         s.erase();
    107         return;
    108     }
    109     size_t firstSpaceAtEndOfString = lastNonSpace + 1;
    110     if (firstSpaceAtEndOfString >= s.size())
    111         return; // lastNonSpace was the last char.
    112     s.erase(firstSpaceAtEndOfString, std::string::npos); // erase to the end of the string.
    113 }
    114 
    115 inline void trim(std::string& s)
    116 {
    117     rtrim(s);
    118     ltrim(s);
    119 }
    120 
    121 static std::vector<std::string> parseStringList(const std::string& str)
    122 {
    123     std::vector<std::string> strings;
    124     static const std::string separators(" \t");
    125     size_t lastPos = str.find_first_not_of(separators); // skip leading spaces
    126     size_t pos = str.find_first_of(separators, lastPos); // find next space
    127 
    128     while (std::string::npos != pos || std::string::npos != lastPos) {
    129         strings.push_back(str.substr(lastPos, pos - lastPos));
    130         lastPos = str.find_first_not_of(separators, pos);
    131         pos = str.find_first_of(separators, lastPos);
    132     }
    133     return strings;
    134 }
    135 
    136 static std::vector<int> parseIntList(const std::string& str)
    137 {
    138     std::vector<int> ints;
    139     std::vector<std::string> strings = parseStringList(str);
    140     for (size_t x = 0; x < strings.size(); x++) {
    141         int i = atoi(strings[x].c_str());
    142         ints.push_back(i);
    143     }
    144     return ints;
    145 }
    146 
    147 static std::vector<int> parseLevels(const std::string& line)
    148 {
    149     std::vector<int> levels;
    150     std::vector<std::string> strings = parseStringList(line);
    151     for (size_t x = 0; x < strings.size(); x++) {
    152         const std::string& levelString = strings[x];
    153         int i;
    154         if (levelString == "x")
    155             i = -1;
    156         else
    157             i = atoi(levelString.c_str());
    158         levels.push_back(i);
    159     }
    160     return levels;
    161 }
    162 
    163 // This is not thread-safe as written.
    164 static std::basic_string<UChar> parseTestString(const std::string& line)
    165 {
    166     std::basic_string<UChar> testString;
    167     static std::map<std::string, UChar> charClassExamples;
    168     if (charClassExamples.empty()) {
    169         // FIXME: Explicit make_pair is ugly, but required for C++98 compat.
    170         charClassExamples.insert(std::make_pair("L", 0x6c)); // 'l' for L
    171         charClassExamples.insert(std::make_pair("R", 0x05D0)); // HEBREW ALEF
    172         charClassExamples.insert(std::make_pair("EN", 0x33)); // '3' for EN
    173         charClassExamples.insert(std::make_pair("ES", 0x2d)); // '-' for ES
    174         charClassExamples.insert(std::make_pair("ET", 0x25)); // '%' for ET
    175         charClassExamples.insert(std::make_pair("AN", 0x0660)); // arabic 0
    176         charClassExamples.insert(std::make_pair("CS", 0x2c)); // ',' for CS
    177         charClassExamples.insert(std::make_pair("B", 0x0A)); // <control-000A>
    178         charClassExamples.insert(std::make_pair("S", 0x09)); // <control-0009>
    179         charClassExamples.insert(std::make_pair("WS", 0x20)); // ' ' for WS
    180         charClassExamples.insert(std::make_pair("ON", 0x3d)); // '=' for ON
    181         charClassExamples.insert(std::make_pair("NSM", 0x05BF)); // HEBREW POINT RAFE
    182         charClassExamples.insert(std::make_pair("AL", 0x0608)); // ARABIC RAY
    183         charClassExamples.insert(std::make_pair("BN", 0x00AD)); // SOFT HYPHEN
    184         charClassExamples.insert(std::make_pair("LRE", 0x202A));
    185         charClassExamples.insert(std::make_pair("RLE", 0x202B));
    186         charClassExamples.insert(std::make_pair("PDF", 0x202C));
    187         charClassExamples.insert(std::make_pair("LRO", 0x202D));
    188         charClassExamples.insert(std::make_pair("RLO", 0x202E));
    189         charClassExamples.insert(std::make_pair("LRI", 0x2066));
    190         charClassExamples.insert(std::make_pair("RLI", 0x2067));
    191         charClassExamples.insert(std::make_pair("FSI", 0x2068));
    192         charClassExamples.insert(std::make_pair("PDI", 0x2069));
    193     }
    194 
    195     std::vector<std::string> charClasses = parseStringList(line);
    196     for (size_t i = 0; i < charClasses.size(); i++) {
    197         // FIXME: If the lookup failed we could return false for a parse error.
    198         testString.push_back(charClassExamples.find(charClasses[i])->second);
    199     }
    200     return testString;
    201 }
    202 
    203 static bool parseParagraphDirectionMask(const std::string& line, int& modeMask)
    204 {
    205     modeMask = atoi(line.c_str());
    206     return modeMask >= 1 && modeMask <= kMaxParagraphDirection;
    207 }
    208 
    209 static void parseError(const std::string& line, size_t lineNumber)
    210 {
    211     // Use printf to avoid the expense of std::cout.
    212     printf("Parse error, line %zu : %s\n", lineNumber, line.c_str());
    213 }
    214 
    215 template<class Runner>
    216 void Harness<Runner>::parse(std::istream& bidiTestFile)
    217 {
    218     static const std::string levelsPrefix("@Levels");
    219     static const std::string reorderPrefix("@Reorder");
    220 
    221     // FIXME: UChar is an ICU type and cheating a bit to use here.
    222     // uint16_t might be more portable.
    223     std::basic_string<UChar> testString;
    224     std::vector<int> levels;
    225     std::vector<int> reorder;
    226     int paragraphDirectionMask;
    227 
    228     std::string line;
    229     size_t lineNumber = 0;
    230     while (std::getline(bidiTestFile, line)) {
    231         lineNumber++;
    232         const std::string originalLine = line;
    233         size_t commentStart = line.find_first_of('#');
    234         if (commentStart != std::string::npos)
    235             line = line.substr(0, commentStart);
    236         trim(line);
    237         if (line.empty())
    238             continue;
    239         if (line[0] == '@') {
    240             if (!line.find(levelsPrefix)) {
    241                 levels = parseLevels(line.substr(levelsPrefix.length() + 1));
    242                 continue;
    243             }
    244             if (!line.find(reorderPrefix)) {
    245                 reorder = parseIntList(line.substr(reorderPrefix.length() + 1));
    246                 continue;
    247             }
    248         } else {
    249             // Assume it's a data line.
    250             size_t seperatorIndex = line.find_first_of(';');
    251             if (seperatorIndex == std::string::npos) {
    252                 parseError(originalLine, lineNumber);
    253                 continue;
    254             }
    255             testString = parseTestString(line.substr(0, seperatorIndex));
    256             if (!parseParagraphDirectionMask(line.substr(seperatorIndex + 1), paragraphDirectionMask)) {
    257                 parseError(originalLine, lineNumber);
    258                 continue;
    259             }
    260 
    261             if (paragraphDirectionMask & DirectionAutoLTR)
    262                 m_runner.runTest(testString, reorder, levels, DirectionAutoLTR, originalLine, lineNumber);
    263             if (paragraphDirectionMask & DirectionLTR)
    264                 m_runner.runTest(testString, reorder, levels, DirectionLTR, originalLine, lineNumber);
    265             if (paragraphDirectionMask & DirectionRTL)
    266                 m_runner.runTest(testString, reorder, levels, DirectionRTL, originalLine, lineNumber);
    267         }
    268     }
    269 }
    270 
    271 } // namespace bidi_test
    272 
    273 #endif // BidiTestHarness_h
    274