Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2013, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  bidiconf.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009oct16
     14 *   created by: Markus W. Scherer
     15 *
     16 *   BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files.
     17 */
     18 
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <string.h>
     22 #include "unicode/utypes.h"
     23 #include "unicode/ubidi.h"
     24 #include "unicode/errorcode.h"
     25 #include "unicode/localpointer.h"
     26 #include "unicode/putil.h"
     27 #include "unicode/unistr.h"
     28 #include "intltest.h"
     29 #include "uparse.h"
     30 
     31 class BiDiConformanceTest : public IntlTest {
     32 public:
     33     BiDiConformanceTest() :
     34         directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
     35         errorCount(0) {}
     36 
     37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     38 
     39     void TestBidiTest();
     40     void TestBidiCharacterTest();
     41 private:
     42     char *getUnidataPath(char path[]);
     43 
     44     UBool parseLevels(const char *&start);
     45     UBool parseOrdering(const char *start);
     46     UBool parseInputStringFromBiDiClasses(const char *&start);
     47 
     48     UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount);
     49     UBool checkOrdering(UBiDi *ubidi);
     50 
     51     void printErrorLine();
     52 
     53     char line[10000];
     54     UBiDiLevel levels[1000];
     55     uint32_t directionBits;
     56     int32_t ordering[1000];
     57     int32_t lineNumber;
     58     int32_t levelsCount;
     59     int32_t orderingCount;
     60     int32_t errorCount;
     61     UnicodeString inputString;
     62     const char *paraLevelName;
     63     char levelNameString[12];
     64 };
     65 
     66 extern IntlTest *createBiDiConformanceTest() {
     67     return new BiDiConformanceTest();
     68 }
     69 
     70 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     71     if(exec) {
     72         logln("TestSuite BiDiConformanceTest: ");
     73     }
     74     TESTCASE_AUTO_BEGIN;
     75     TESTCASE_AUTO(TestBidiTest);
     76     TESTCASE_AUTO(TestBidiCharacterTest);
     77     TESTCASE_AUTO_END;
     78 }
     79 
     80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
     81 char *BiDiConformanceTest::getUnidataPath(char path[]) {
     82     IcuTestErrorCode errorCode(*this, "getUnidataPath");
     83     const int kUnicodeDataTxtLength=15;  // strlen("UnicodeData.txt")
     84 
     85     // Look inside ICU_DATA first.
     86     strcpy(path, pathToDataDirectory());
     87     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
     88     FILE *f=fopen(path, "r");
     89     if(f!=NULL) {
     90         fclose(f);
     91         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
     92         return path;
     93     }
     94 
     95     // As a fallback, try to guess where the source data was located
     96     // at the time ICU was built, and look there.
     97 #   ifdef U_TOPSRCDIR
     98         strcpy(path, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
     99 #   else
    100         strcpy(path, loadTestData(errorCode));
    101         strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
    102                      U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
    103                      U_FILE_SEP_STRING "data");
    104 #   endif
    105     strcat(path, U_FILE_SEP_STRING);
    106     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
    107     f=fopen(path, "r");
    108     if(f!=NULL) {
    109         fclose(f);
    110         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
    111         return path;
    112     }
    113     return NULL;
    114 }
    115 
    116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
    117 
    118 UBool BiDiConformanceTest::parseLevels(const char *&start) {
    119     directionBits=0;
    120     levelsCount=0;
    121     while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
    122         if(*start=='x') {
    123             levels[levelsCount++]=UBIDI_DEFAULT_LTR;
    124             ++start;
    125         } else {
    126             char *end;
    127             uint32_t value=(uint32_t)strtoul(start, &end, 10);
    128             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';')
    129                           || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
    130                 errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start);
    131                 printErrorLine();
    132                 return FALSE;
    133             }
    134             levels[levelsCount++]=(UBiDiLevel)value;
    135             directionBits|=(1<<(value&1));
    136             start=end;
    137         }
    138     }
    139     return TRUE;
    140 }
    141 
    142 UBool BiDiConformanceTest::parseOrdering(const char *start) {
    143     orderingCount=0;
    144     while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
    145         char *end;
    146         uint32_t value=(uint32_t)strtoul(start, &end, 10);
    147         if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) {
    148             errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start);
    149             printErrorLine();
    150             return FALSE;
    151         }
    152         ordering[orderingCount++]=(int32_t)value;
    153         start=end;
    154     }
    155     return TRUE;
    156 }
    157 
    158 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
    159     0x6c,   // 'l' for L
    160     0x52,   // 'R' for R
    161     0x33,   // '3' for EN
    162     0x2d,   // '-' for ES
    163     0x25,   // '%' for ET
    164     0x39,   // '9' for AN
    165     0x2c,   // ',' for CS
    166     0x2f,   // '/' for B
    167     0x5f,   // '_' for S
    168     0x20,   // ' ' for WS
    169     0x3d,   // '=' for ON
    170     0x65,   // 'e' for LRE
    171     0x6f,   // 'o' for LRO
    172     0x41,   // 'A' for AL
    173     0x45,   // 'E' for RLE
    174     0x4f,   // 'O' for RLO
    175     0x2a,   // '*' for PDF
    176     0x60,   // '`' for NSM
    177     0x7c,   // '|' for BN
    178     // new in Unicode 6.3/ICU 52
    179     0x53,   // 'S' for FSI
    180     0x69,   // 'i' for LRI
    181     0x49,   // 'I' for RLI
    182     0x2e    // '.' for PDI
    183 };
    184 
    185 U_CDECL_BEGIN
    186 
    187 static UCharDirection U_CALLCONV
    188 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
    189     for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
    190         if(c==charFromBiDiClass[i]) {
    191             return (UCharDirection)i;
    192         }
    193     }
    194     // Character not in our hardcoded table.
    195     // Should not occur during testing.
    196     return U_BIDI_CLASS_DEFAULT;
    197 }
    198 
    199 U_CDECL_END
    200 
    201 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
    202     1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0
    203 };
    204 
    205 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
    206     inputString.remove();
    207     /*
    208      * Lengthy but fast BiDi class parser.
    209      * A simple parser could terminate or extract the name string and use
    210      *   int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
    211      * but that makes this test take significantly more time.
    212      */
    213     while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
    214         UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
    215         // Compare each character once until we have a match on
    216         // a complete, short BiDi class name.
    217         if(start[0]=='L') {
    218             if(start[1]=='R') {
    219                 if(start[2]=='E') {
    220                     biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
    221                 } else if(start[2]=='I') {
    222                     biDiClass=U_LEFT_TO_RIGHT_ISOLATE;
    223                 } else if(start[2]=='O') {
    224                     biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
    225                 }
    226             } else {
    227                 biDiClass=U_LEFT_TO_RIGHT;
    228             }
    229         } else if(start[0]=='R') {
    230             if(start[1]=='L') {
    231                 if(start[2]=='E') {
    232                     biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
    233                 } else if(start[2]=='I') {
    234                     biDiClass=U_RIGHT_TO_LEFT_ISOLATE;
    235                 } else if(start[2]=='O') {
    236                     biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
    237                 }
    238             } else {
    239                 biDiClass=U_RIGHT_TO_LEFT;
    240             }
    241         } else if(start[0]=='E') {
    242             if(start[1]=='N') {
    243                 biDiClass=U_EUROPEAN_NUMBER;
    244             } else if(start[1]=='S') {
    245                 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
    246             } else if(start[1]=='T') {
    247                 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
    248             }
    249         } else if(start[0]=='A') {
    250             if(start[1]=='L') {
    251                 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
    252             } else if(start[1]=='N') {
    253                 biDiClass=U_ARABIC_NUMBER;
    254             }
    255         } else if(start[0]=='C' && start[1]=='S') {
    256             biDiClass=U_COMMON_NUMBER_SEPARATOR;
    257         } else if(start[0]=='B') {
    258             if(start[1]=='N') {
    259                 biDiClass=U_BOUNDARY_NEUTRAL;
    260             } else {
    261                 biDiClass=U_BLOCK_SEPARATOR;
    262             }
    263         } else if(start[0]=='S') {
    264             biDiClass=U_SEGMENT_SEPARATOR;
    265         } else if(start[0]=='W' && start[1]=='S') {
    266             biDiClass=U_WHITE_SPACE_NEUTRAL;
    267         } else if(start[0]=='O' && start[1]=='N') {
    268             biDiClass=U_OTHER_NEUTRAL;
    269         } else if(start[0]=='P' && start[1]=='D') {
    270             if(start[2]=='F') {
    271                 biDiClass=U_POP_DIRECTIONAL_FORMAT;
    272             } else if(start[2]=='I') {
    273                 biDiClass=U_POP_DIRECTIONAL_ISOLATE;
    274             }
    275         } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
    276             biDiClass=U_DIR_NON_SPACING_MARK;
    277         } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') {
    278             biDiClass=U_FIRST_STRONG_ISOLATE;
    279         }
    280         // Now we verify that the class name is terminated properly,
    281         // and not just the start of a longer word.
    282         int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
    283         char c=start[biDiClassNameLength];
    284         if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) {
    285             inputString.append(charFromBiDiClass[biDiClass]);
    286             start+=biDiClassNameLength;
    287             continue;
    288         }
    289         errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start);
    290         printErrorLine();
    291         return FALSE;
    292     }
    293     return TRUE;
    294 }
    295 
    296 void BiDiConformanceTest::TestBidiTest() {
    297     IcuTestErrorCode errorCode(*this, "TestBidiTest");
    298     const char *sourceTestDataPath=getSourceTestData(errorCode);
    299     if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
    300                                       "folder (getSourceTestData())")) {
    301         return;
    302     }
    303     char bidiTestPath[400];
    304     strcpy(bidiTestPath, sourceTestDataPath);
    305     strcat(bidiTestPath, "BidiTest.txt");
    306     LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
    307     if(bidiTestFile.isNull()) {
    308         errln("unable to open %s", bidiTestPath);
    309         return;
    310     }
    311     LocalUBiDiPointer ubidi(ubidi_open());
    312     ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
    313                            NULL, NULL, errorCode);
    314     if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
    315         return;
    316     }
    317     lineNumber=0;
    318     levelsCount=0;
    319     orderingCount=0;
    320     errorCount=0;
    321     while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
    322         ++lineNumber;
    323         // Remove trailing comments and whitespace.
    324         char *commentStart=strchr(line, '#');
    325         if(commentStart!=NULL) {
    326             *commentStart=0;
    327         }
    328         u_rtrim(line);
    329         const char *start=u_skipWhitespace(line);
    330         if(*start==0) {
    331             continue;  // Skip empty and comment-only lines.
    332         }
    333         if(*start=='@') {
    334             ++start;
    335             if(0==strncmp(start, "Levels:", 7)) {
    336                 start+=7;
    337                 if(!parseLevels(start)) {
    338                     return;
    339                 }
    340             } else if(0==strncmp(start, "Reorder:", 8)) {
    341                 if(!parseOrdering(start+8)) {
    342                     return;
    343                 }
    344             }
    345             // Skip unknown @Xyz: ...
    346         } else {
    347             if(!parseInputStringFromBiDiClasses(start)) {
    348                 return;
    349             }
    350             start=u_skipWhitespace(start);
    351             if(*start!=';') {
    352                 errln("missing ; separator on input line %s", line);
    353                 return;
    354             }
    355             start=u_skipWhitespace(start+1);
    356             char *end;
    357             uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
    358             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
    359                 errln("input bitset parse error at %s", start);
    360                 return;
    361             }
    362             // Loop over the bitset.
    363             static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
    364             static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
    365             for(int i=0; i<=3; ++i) {
    366                 if(bitset&(1<<i)) {
    367                     ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
    368                                   paraLevels[i], NULL, errorCode);
    369                     const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
    370                     if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
    371                         errln("Input line %d: %s", (int)lineNumber, line);
    372                         return;
    373                     }
    374                     paraLevelName=paraLevelNames[i];
    375                     if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
    376                         // continue outerLoop;  does not exist in C++
    377                         // so just break out of the inner loop.
    378                         break;
    379                     }
    380                     if(!checkOrdering(ubidi.getAlias())) {
    381                         // continue outerLoop;  does not exist in C++
    382                         // so just break out of the inner loop.
    383                         break;
    384                     }
    385                 }
    386             }
    387         }
    388     }
    389 }
    390 
    391 /*
    392 *******************************************************************************
    393 *
    394 *   created on: 2013jul01
    395 *   created by: Matitiahu Allouche
    396 
    397 This function performs a conformance test for implementations of the
    398 Unicode Bidirectional Algorithm, specified in UAX #9: Unicode
    399 Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/
    400 
    401 Each test case is represented in a single line which is read from a file
    402 named BidiCharacter.txt.  Empty, blank and comment lines may also appear
    403 in this file.
    404 
    405 The format of the test data is specified below.  Note that each test
    406 case constitutes a single line of text; reordering is applied within a
    407 single line and independently of a rendering engine, and rules L3 and L4
    408 are out of scope.
    409 
    410 The number sign '#' is the comment character: everything is ignored from
    411 the occurrence of '#' until the end of the line,
    412 Empty lines and lines containing only spaces and/or comments are ignored.
    413 
    414 Lines which represent test cases consist of 4 or 5 fields separated by a
    415 semicolon.  Each field consists of tokens separated by whitespace (space
    416 or Tab).  Whitespace before and after semicolons is optional.
    417 
    418 Field 0: A sequence of hexadecimal code point values separated by space
    419 
    420 Field 1: A value representing the paragraph direction, as follows:
    421     - 0 represents left-to-right
    422     - 1 represents right-to-left
    423     - 2 represents auto-LTR according to rules P2 and P3 of the algorithm
    424     - 3 represents auto-RTL according to rules P2 and P3 of the algorithm
    425     - a negative number whose absolute value is taken as paragraph level;
    426       this may be useful to test cases where the embedding level approaches
    427       or exceeds the maximum embedding level.
    428 
    429 Field 2: The resolved paragraph embedding level.  If the input (field 0)
    430          includes more than one paragraph, this field represents the
    431          resolved level of the first paragraph.
    432 
    433 Field 3: An ordered list of resulting levels for each token in field 0
    434          (each token represents one source character).
    435          The UBA does not assign levels to certain characters (e.g. LRO);
    436          characters removed in rule X9 are indicated with an 'x'.
    437 
    438 Field 4: An ordered list of indices showing the resulting visual ordering
    439          from left to right; characters with a resolved level of 'x' are
    440          skipped.  The number are zero-based.  Each index corresponds to
    441          a character in the reordered (visual) string. It represents the
    442          index of the source character in the input (field 0).
    443          This field is optional.  When it is absent, the visual ordering
    444          is not verified.
    445 
    446 Examples:
    447 
    448 # This is a comment line.
    449 L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3
    450 L L ON R;0;0;0 0 0 1;0 1 2 3
    451 
    452 # Note: in the next line, 'B' represents a block separator, not the letter 'B'.
    453 LRE A B C PDF;2;0;x 2 0 0 x;1 2 3
    454 # Note: in the next line, 'b' represents the letter 'b', not a block separator.
    455 a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5
    456 
    457 a R R x ; 1 ; 1 ; 2 1 1 2
    458 L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1
    459 
    460 *
    461 *******************************************************************************
    462 */
    463 void BiDiConformanceTest::TestBidiCharacterTest() {
    464     IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest");
    465     const char *sourceTestDataPath=getSourceTestData(errorCode);
    466     if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
    467                                       "folder (getSourceTestData())")) {
    468         return;
    469     }
    470     char bidiTestPath[400];
    471     strcpy(bidiTestPath, sourceTestDataPath);
    472     strcat(bidiTestPath, "BidiCharacterTest.txt");
    473     LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
    474     if(bidiTestFile.isNull()) {
    475         errln("unable to open %s", bidiTestPath);
    476         return;
    477     }
    478     LocalUBiDiPointer ubidi(ubidi_open());
    479     lineNumber=0;
    480     levelsCount=0;
    481     orderingCount=0;
    482     errorCount=0;
    483     while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
    484         ++lineNumber;
    485         paraLevelName="N/A";
    486         inputString="N/A";
    487         // Remove trailing comments and whitespace.
    488         char *commentStart=strchr(line, '#');
    489         if(commentStart!=NULL) {
    490             *commentStart=0;
    491         }
    492         u_rtrim(line);
    493         const char *start=u_skipWhitespace(line);
    494         if(*start==0) {
    495             continue;  // Skip empty and comment-only lines.
    496         }
    497         // Parse the code point string in field 0.
    498         UChar *buffer=inputString.getBuffer(200);
    499         int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode);
    500         if(errorCode.logIfFailureAndReset("Invalid string in field 0")) {
    501             errln("Input line %d: %s", (int)lineNumber, line);
    502             inputString.remove();
    503             continue;
    504         }
    505         inputString.releaseBuffer(length);
    506         start=strchr(start, ';');
    507         if(start==NULL) {
    508             errorCount++;
    509             errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
    510             continue;
    511         }
    512         start=u_skipWhitespace(start+1);
    513         char *end;
    514         int32_t paraDirection=(int32_t)strtol(start, &end, 10);
    515         UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2;
    516         if(paraDirection==0) {
    517             paraLevel=0;
    518             paraLevelName="LTR";
    519         }
    520         else if(paraDirection==1) {
    521             paraLevel=1;
    522             paraLevelName="RTL";
    523         }
    524         else if(paraDirection==2) {
    525             paraLevel=UBIDI_DEFAULT_LTR;
    526             paraLevelName="Auto/LTR";
    527         }
    528         else if(paraDirection==3) {
    529             paraLevel=UBIDI_DEFAULT_RTL;
    530             paraLevelName="Auto/RTL";
    531         }
    532         else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
    533             paraLevel=(UBiDiLevel)(-paraDirection);
    534             sprintf(levelNameString, "%d", (int)paraLevel);
    535             paraLevelName=levelNameString;
    536         }
    537         if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
    538                          paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) {
    539             errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start);
    540             printErrorLine();
    541             continue;
    542         }
    543         start=u_skipWhitespace(end);
    544         if(*start!=';') {
    545             errorCount++;
    546             errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
    547             continue;
    548         }
    549         start++;
    550         uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10);
    551         if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
    552            resolvedParaLevel>1) {
    553             errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start);
    554             printErrorLine();
    555             continue;
    556         }
    557         start=u_skipWhitespace(end);
    558         if(*start!=';') {
    559             errorCount++;
    560             errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
    561             return;
    562         }
    563         start++;
    564         if(!parseLevels(start)) {
    565             continue;
    566         }
    567         start=u_skipWhitespace(start);
    568         if(*start==';') {
    569             if(!parseOrdering(start+1)) {
    570                 continue;
    571             }
    572         }
    573         else
    574             orderingCount=-1;
    575 
    576         ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
    577                       paraLevel, NULL, errorCode);
    578         const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
    579         if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
    580             errln("Input line %d: %s", (int)lineNumber, line);
    581             continue;
    582         }
    583         UBiDiLevel actualLevel;
    584         if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
    585             printErrorLine();
    586             errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d",
    587                    (int)lineNumber, resolvedParaLevel, actualLevel);
    588             continue;
    589         }
    590         if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
    591             continue;
    592         }
    593         if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) {
    594             continue;
    595         }
    596     }
    597 }
    598 
    599 static UChar printLevel(UBiDiLevel level) {
    600     if(level<UBIDI_DEFAULT_LTR) {
    601         return 0x30+level;
    602     } else {
    603         return 0x78;  // 'x'
    604     }
    605 }
    606 
    607 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
    608     uint32_t actualDirectionBits=0;
    609     for(int32_t i=0; i<actualCount; ++i) {
    610         actualDirectionBits|=(1<<(actualLevels[i]&1));
    611     }
    612     return actualDirectionBits;
    613 }
    614 
    615 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) {
    616     UBool isOk=TRUE;
    617     if(levelsCount!=actualCount) {
    618         errln("\nError on line %d: Wrong number of level values; expected %d actual %d",
    619               (int)lineNumber, (int)levelsCount, (int)actualCount);
    620         isOk=FALSE;
    621     } else {
    622         for(int32_t i=0; i<actualCount; ++i) {
    623             if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
    624                 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
    625                     // ICU used a shortcut:
    626                     // Since the text is unidirectional, it did not store the resolved
    627                     // levels but just returns all levels as the paragraph level 0 or 1.
    628                     // The reordering result is the same, so this is fine.
    629                     break;
    630                 } else {
    631                     errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d",
    632                           (int)lineNumber, (int)i, levels[i], actualLevels[i]);
    633                     isOk=FALSE;
    634                     break;
    635                 }
    636             }
    637         }
    638     }
    639     if(!isOk) {
    640         printErrorLine();
    641         UnicodeString els("Expected levels:   ");
    642         int32_t i;
    643         for(i=0; i<levelsCount; ++i) {
    644             els.append((UChar)0x20).append(printLevel(levels[i]));
    645         }
    646         UnicodeString als("Actual   levels:   ");
    647         for(i=0; i<actualCount; ++i) {
    648             als.append((UChar)0x20).append(printLevel(actualLevels[i]));
    649         }
    650         errln(els);
    651         errln(als);
    652     }
    653     return isOk;
    654 }
    655 
    656 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
    657 // does not work for custom BiDi class assignments
    658 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
    659 // Therefore we just skip the indexes for BiDi controls while comparing
    660 // with the expected ordering that has them omitted.
    661 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) {
    662     UBool isOk=TRUE;
    663     IcuTestErrorCode errorCode(*this, "checkOrdering()");
    664     int32_t resultLength=ubidi_getResultLength(ubidi);  // visual length including BiDi controls
    665     int32_t i, visualIndex;
    666     // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
    667     // and loop over each run's indexes, but that seems unnecessary for this test code.
    668     for(i=visualIndex=0; i<resultLength; ++i) {
    669         int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
    670         if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
    671             errln("Input line %d: %s", (int)lineNumber, line);
    672             return FALSE;
    673         }
    674         if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
    675             continue;  // BiDi control, omitted from expected ordering.
    676         }
    677         if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
    678             errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d",
    679                   (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex);
    680             isOk=FALSE;
    681             break;
    682         }
    683         ++visualIndex;
    684     }
    685     // visualIndex is now the visual length minus the BiDi controls,
    686     // which should match the length of the BidiTest.txt ordering.
    687     if(isOk && orderingCount!=visualIndex) {
    688         errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d",
    689               (int)lineNumber, (int)orderingCount, (int)visualIndex);
    690         isOk=FALSE;
    691     }
    692     if(!isOk) {
    693         printErrorLine();
    694         UnicodeString eord("Expected ordering: ");
    695         for(i=0; i<orderingCount; ++i) {
    696             eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
    697         }
    698         UnicodeString aord("Actual   ordering: ");
    699         for(i=0; i<resultLength; ++i) {
    700             int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
    701             if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
    702                 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
    703             }
    704         }
    705         errln(eord);
    706         errln(aord);
    707     }
    708     return isOk;
    709 }
    710 
    711 void BiDiConformanceTest::printErrorLine() {
    712     ++errorCount;
    713     errln("Input line %5d:   %s", (int)lineNumber, line);
    714     errln(UnicodeString("Input string:       ")+inputString);
    715     errln("Para level:         %s", paraLevelName);
    716 }
    717