Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  bidiconf.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009oct16
     14 *   created by: Markus W. Scherer
     15 *
     16 *   BiDi conformance test, using the Unicode BidiTest.txt file.
     17 */
     18 
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <string.h>
     22 #include "unicode/utypes.h"
     23 #include "unicode/ubidi.h"
     24 #include "unicode/errorcode.h"
     25 #include "unicode/localpointer.h"
     26 #include "unicode/putil.h"
     27 #include "unicode/unistr.h"
     28 #include "intltest.h"
     29 #include "uparse.h"
     30 
     31 class BiDiConformanceTest : public IntlTest {
     32 public:
     33     BiDiConformanceTest() :
     34         directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
     35         errorCount(0) {}
     36 
     37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     38 
     39     void TestBidiTest();
     40 private:
     41     char *getUnidataPath(char path[]);
     42 
     43     UBool parseLevels(const char *start);
     44     UBool parseOrdering(const char *start);
     45     UBool parseInputStringFromBiDiClasses(const char *&start);
     46 
     47     UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
     48                       const char *paraLevelName);
     49     UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
     50 
     51     void printErrorLine(const char *paraLevelName);
     52 
     53     char line[10000];
     54     UBiDiLevel levels[1000];
     55     uint32_t directionBits;
     56     int32_t ordering[1000];
     57     int32_t lineNumber;
     58     int32_t levelsCount;
     59     int32_t orderingCount;
     60     int32_t errorCount;
     61     UnicodeString inputString;
     62 };
     63 
     64 extern IntlTest *createBiDiConformanceTest() {
     65     return new BiDiConformanceTest();
     66 }
     67 
     68 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     69     if(exec) {
     70         logln("TestSuite BiDiConformanceTest: ");
     71     }
     72     switch (index) {
     73         TESTCASE(0, TestBidiTest);
     74         default:
     75             name="";
     76             break; // needed to end the loop
     77     }
     78 }
     79 
     80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
     81 char *BiDiConformanceTest::getUnidataPath(char path[]) {
     82     IcuTestErrorCode errorCode(*this, "getUnidataPath");
     83     const int kUnicodeDataTxtLength=15;  // strlen("UnicodeData.txt")
     84 
     85     // Look inside ICU_DATA first.
     86     strcpy(path, pathToDataDirectory());
     87     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
     88     FILE *f=fopen(path, "r");
     89     if(f!=NULL) {
     90         fclose(f);
     91         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
     92         return path;
     93     }
     94 
     95     // As a fallback, try to guess where the source data was located
     96     // at the time ICU was built, and look there.
     97 #   ifdef U_TOPSRCDIR
     98         strcpy(path, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
     99 #   else
    100         strcpy(path, loadTestData(errorCode));
    101         strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
    102                      U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
    103                      U_FILE_SEP_STRING "data");
    104 #   endif
    105     strcat(path, U_FILE_SEP_STRING);
    106     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
    107     f=fopen(path, "r");
    108     if(f!=NULL) {
    109         fclose(f);
    110         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
    111         return path;
    112     }
    113     return NULL;
    114 }
    115 
    116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
    117 
    118 UBool BiDiConformanceTest::parseLevels(const char *start) {
    119     directionBits=0;
    120     levelsCount=0;
    121     while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
    122         if(*start=='x') {
    123             levels[levelsCount++]=UBIDI_DEFAULT_LTR;
    124             ++start;
    125         } else {
    126             char *end;
    127             uint32_t value=(uint32_t)strtoul(start, &end, 10);
    128             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
    129                 errln("@Levels: parse error at %s", start);
    130                 return FALSE;
    131             }
    132             levels[levelsCount++]=(UBiDiLevel)value;
    133             directionBits|=(1<<(value&1));
    134             start=end;
    135         }
    136     }
    137     return TRUE;
    138 }
    139 
    140 UBool BiDiConformanceTest::parseOrdering(const char *start) {
    141     orderingCount=0;
    142     while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
    143         char *end;
    144         uint32_t value=(uint32_t)strtoul(start, &end, 10);
    145         if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
    146             errln("@Reorder: parse error at %s", start);
    147             return FALSE;
    148         }
    149         ordering[orderingCount++]=(int32_t)value;
    150         start=end;
    151     }
    152     return TRUE;
    153 }
    154 
    155 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
    156     0x6c,   // 'l' for L
    157     0x52,   // 'R' for R
    158     0x33,   // '3' for EN
    159     0x2d,   // '-' for ES
    160     0x25,   // '%' for ET
    161     0x39,   // '9' for AN
    162     0x2c,   // ',' for CS
    163     0x2f,   // '/' for B
    164     0x5f,   // '_' for S
    165     0x20,   // ' ' for WS
    166     0x3d,   // '=' for ON
    167     0x65,   // 'e' for LRE
    168     0x6f,   // 'o' for LRO
    169     0x41,   // 'A' for AL
    170     0x45,   // 'E' for RLE
    171     0x4f,   // 'O' for RLO
    172     0x2a,   // '*' for PDF
    173     0x60,   // '`' for NSM
    174     0x7c    // '|' for BN
    175 };
    176 
    177 U_CDECL_BEGIN
    178 
    179 static UCharDirection U_CALLCONV
    180 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
    181     for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
    182         if(c==charFromBiDiClass[i]) {
    183             return (UCharDirection)i;
    184         }
    185     }
    186     // Character not in our hardcoded table.
    187     // Should not occur during testing.
    188     return U_BIDI_CLASS_DEFAULT;
    189 }
    190 
    191 U_CDECL_END
    192 
    193 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
    194     1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
    195 };
    196 
    197 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
    198     inputString.remove();
    199     /*
    200      * Lengthy but fast BiDi class parser.
    201      * A simple parser could terminate or extract the name string and use
    202      *   int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
    203      * but that makes this test take significantly more time.
    204      */
    205     while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
    206         UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
    207         // Compare each character once until we have a match on
    208         // a complete, short BiDi class name.
    209         if(start[0]=='L') {
    210             if(start[1]=='R') {
    211                 if(start[2]=='E') {
    212                     biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
    213                 } else if(start[2]=='O') {
    214                     biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
    215                 }
    216             } else {
    217                 biDiClass=U_LEFT_TO_RIGHT;
    218             }
    219         } else if(start[0]=='R') {
    220             if(start[1]=='L') {
    221                 if(start[2]=='E') {
    222                     biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
    223                 } else if(start[2]=='O') {
    224                     biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
    225                 }
    226             } else {
    227                 biDiClass=U_RIGHT_TO_LEFT;
    228             }
    229         } else if(start[0]=='E') {
    230             if(start[1]=='N') {
    231                 biDiClass=U_EUROPEAN_NUMBER;
    232             } else if(start[1]=='S') {
    233                 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
    234             } else if(start[1]=='T') {
    235                 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
    236             }
    237         } else if(start[0]=='A') {
    238             if(start[1]=='L') {
    239                 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
    240             } else if(start[1]=='N') {
    241                 biDiClass=U_ARABIC_NUMBER;
    242             }
    243         } else if(start[0]=='C' && start[1]=='S') {
    244             biDiClass=U_COMMON_NUMBER_SEPARATOR;
    245         } else if(start[0]=='B') {
    246             if(start[1]=='N') {
    247                 biDiClass=U_BOUNDARY_NEUTRAL;
    248             } else {
    249                 biDiClass=U_BLOCK_SEPARATOR;
    250             }
    251         } else if(start[0]=='S') {
    252             biDiClass=U_SEGMENT_SEPARATOR;
    253         } else if(start[0]=='W' && start[1]=='S') {
    254             biDiClass=U_WHITE_SPACE_NEUTRAL;
    255         } else if(start[0]=='O' && start[1]=='N') {
    256             biDiClass=U_OTHER_NEUTRAL;
    257         } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
    258             biDiClass=U_POP_DIRECTIONAL_FORMAT;
    259         } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
    260             biDiClass=U_DIR_NON_SPACING_MARK;
    261         }
    262         // Now we verify that the class name is terminated properly,
    263         // and not just the start of a longer word.
    264         int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
    265         char c=start[biDiClassNameLength];
    266         if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
    267             errln("BiDi class string not recognized at %s", start);
    268             return FALSE;
    269         }
    270         inputString.append(charFromBiDiClass[biDiClass]);
    271         start+=biDiClassNameLength;
    272     }
    273     return TRUE;
    274 }
    275 
    276 void BiDiConformanceTest::TestBidiTest() {
    277     IcuTestErrorCode errorCode(*this, "TestBidiTest");
    278     const char *sourceTestDataPath=getSourceTestData(errorCode);
    279     if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
    280                                       "folder (getSourceTestData())")) {
    281         return;
    282     }
    283     char bidiTestPath[400];
    284     strcpy(bidiTestPath, sourceTestDataPath);
    285     strcat(bidiTestPath, "BidiTest.txt");
    286     LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
    287     if(bidiTestFile.isNull()) {
    288         errln("unable to open %s", bidiTestPath);
    289         return;
    290     }
    291     LocalUBiDiPointer ubidi(ubidi_open());
    292     ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
    293                            NULL, NULL, errorCode);
    294     if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
    295         return;
    296     }
    297     lineNumber=0;
    298     levelsCount=0;
    299     orderingCount=0;
    300     errorCount=0;
    301     while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
    302         ++lineNumber;
    303         // Remove trailing comments and whitespace.
    304         char *commentStart=strchr(line, '#');
    305         if(commentStart!=NULL) {
    306             *commentStart=0;
    307         }
    308         u_rtrim(line);
    309         const char *start=u_skipWhitespace(line);
    310         if(*start==0) {
    311             continue;  // Skip empty and comment-only lines.
    312         }
    313         if(*start=='@') {
    314             ++start;
    315             if(0==strncmp(start, "Levels:", 7)) {
    316                 if(!parseLevels(start+7)) {
    317                     return;
    318                 }
    319             } else if(0==strncmp(start, "Reorder:", 8)) {
    320                 if(!parseOrdering(start+8)) {
    321                     return;
    322                 }
    323             }
    324             // Skip unknown @Xyz: ...
    325         } else {
    326             if(!parseInputStringFromBiDiClasses(start)) {
    327                 return;
    328             }
    329             start=u_skipWhitespace(start);
    330             if(*start!=';') {
    331                 errln("missing ; separator on input line %s", line);
    332                 return;
    333             }
    334             start=u_skipWhitespace(start+1);
    335             char *end;
    336             uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
    337             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
    338                 errln("input bitset parse error at %s", start);
    339                 return;
    340             }
    341             // Loop over the bitset.
    342             static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
    343             static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
    344             for(int i=0; i<=3; ++i) {
    345                 if(bitset&(1<<i)) {
    346                     ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
    347                                   paraLevels[i], NULL, errorCode);
    348                     const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
    349                     if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
    350                         errln("Input line %d: %s", (int)lineNumber, line);
    351                         return;
    352                     }
    353                     if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
    354                                     paraLevelNames[i])) {
    355                         // continue outerLoop;  does not exist in C++
    356                         // so just break out of the inner loop.
    357                         break;
    358                     }
    359                     if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
    360                         // continue outerLoop;  does not exist in C++
    361                         // so just break out of the inner loop.
    362                         break;
    363                     }
    364                 }
    365             }
    366         }
    367     }
    368 }
    369 
    370 static UChar printLevel(UBiDiLevel level) {
    371     if(level<UBIDI_DEFAULT_LTR) {
    372         return 0x30+level;
    373     } else {
    374         return 0x78;  // 'x'
    375     }
    376 }
    377 
    378 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
    379     uint32_t actualDirectionBits=0;
    380     for(int32_t i=0; i<actualCount; ++i) {
    381         actualDirectionBits|=(1<<(actualLevels[i]&1));
    382     }
    383     return actualDirectionBits;
    384 }
    385 
    386 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
    387                                        const char *paraLevelName) {
    388     UBool isOk=TRUE;
    389     if(levelsCount!=actualCount) {
    390         errln("Wrong number of level values; expected %d actual %d",
    391               (int)levelsCount, (int)actualCount);
    392         isOk=FALSE;
    393     } else {
    394         for(int32_t i=0; i<actualCount; ++i) {
    395             if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
    396                 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
    397                     // ICU used a shortcut:
    398                     // Since the text is unidirectional, it did not store the resolved
    399                     // levels but just returns all levels as the paragraph level 0 or 1.
    400                     // The reordering result is the same, so this is fine.
    401                     break;
    402                 } else {
    403                     errln("Wrong level value at index %d; expected %d actual %d",
    404                           (int)i, levels[i], actualLevels[i]);
    405                     isOk=FALSE;
    406                     break;
    407                 }
    408             }
    409         }
    410     }
    411     if(!isOk) {
    412         printErrorLine(paraLevelName);
    413         UnicodeString els("Expected levels:   ");
    414         int32_t i;
    415         for(i=0; i<levelsCount; ++i) {
    416             els.append((UChar)0x20).append(printLevel(levels[i]));
    417         }
    418         UnicodeString als("Actual   levels:   ");
    419         for(i=0; i<actualCount; ++i) {
    420             als.append((UChar)0x20).append(printLevel(actualLevels[i]));
    421         }
    422         errln(els);
    423         errln(als);
    424     }
    425     return isOk;
    426 }
    427 
    428 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
    429 // does not work for custom BiDi class assignments
    430 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
    431 // Therefore we just skip the indexes for BiDi controls while comparing
    432 // with the expected ordering that has them omitted.
    433 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
    434     UBool isOk=TRUE;
    435     IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
    436     int32_t resultLength=ubidi_getResultLength(ubidi);  // visual length including BiDi controls
    437     int32_t i, visualIndex;
    438     // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
    439     // and loop over each run's indexes, but that seems unnecessary for this test code.
    440     for(i=visualIndex=0; i<resultLength; ++i) {
    441         int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
    442         if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
    443             errln("Input line %d: %s", (int)lineNumber, line);
    444             return FALSE;
    445         }
    446         if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
    447             continue;  // BiDi control, omitted from expected ordering.
    448         }
    449         if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
    450             errln("Wrong ordering value at visual index %d; expected %d actual %d",
    451                   (int)visualIndex, ordering[visualIndex], logicalIndex);
    452             isOk=FALSE;
    453             break;
    454         }
    455         ++visualIndex;
    456     }
    457     // visualIndex is now the visual length minus the BiDi controls,
    458     // which should match the length of the BidiTest.txt ordering.
    459     if(isOk && orderingCount!=visualIndex) {
    460         errln("Wrong number of ordering values; expected %d actual %d",
    461               (int)orderingCount, (int)visualIndex);
    462         isOk=FALSE;
    463     }
    464     if(!isOk) {
    465         printErrorLine(paraLevelName);
    466         UnicodeString eord("Expected ordering: ");
    467         for(i=0; i<orderingCount; ++i) {
    468             eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
    469         }
    470         UnicodeString aord("Actual   ordering: ");
    471         for(i=0; i<resultLength; ++i) {
    472             int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
    473             if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
    474                 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
    475             }
    476         }
    477         errln(eord);
    478         errln(aord);
    479     }
    480     return isOk;
    481 }
    482 
    483 void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
    484     ++errorCount;
    485     errln("Input line %5d:   %s", (int)lineNumber, line);
    486     errln(UnicodeString("Input string:       ")+inputString);
    487     errln("Para level:         %s", paraLevelName);
    488 }
    489