Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  bidiconf.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009oct16
     14 *   created by: Markus W. Scherer
     15 *
     16 *   BiDi conformance test, using the Unicode BidiTest.txt file.
     17 */
     18 
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <string.h>
     22 #include "unicode/utypes.h"
     23 #include "unicode/ubidi.h"
     24 #include "unicode/errorcode.h"
     25 #include "unicode/localpointer.h"
     26 #include "unicode/putil.h"
     27 #include "unicode/unistr.h"
     28 #include "intltest.h"
     29 #include "uparse.h"
     30 
     31 class BiDiConformanceTest : public IntlTest {
     32 public:
     33     BiDiConformanceTest() :
     34         directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
     35         errorCount(0) {}
     36 
     37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     38 
     39     void TestBidiTest();
     40 private:
     41     char *getUnidataPath(char path[]);
     42 
     43     UBool parseLevels(const char *start);
     44     UBool parseOrdering(const char *start);
     45     UBool parseInputStringFromBiDiClasses(const char *&start);
     46 
     47     UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
     48                       const char *paraLevelName);
     49     UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
     50 
     51     void printErrorLine(const char *paraLevelName);
     52 
     53     char line[10000];
     54     UBiDiLevel levels[1000];
     55     uint32_t directionBits;
     56     int32_t ordering[1000];
     57     int32_t lineNumber;
     58     int32_t levelsCount;
     59     int32_t orderingCount;
     60     int32_t errorCount;
     61     UnicodeString inputString;
     62 };
     63 
     64 extern IntlTest *createBiDiConformanceTest() {
     65     return new BiDiConformanceTest();
     66 }
     67 
     68 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) {
     69     if(exec) {
     70         logln("TestSuite BiDiConformanceTest: ");
     71     }
     72     switch (index) {
     73         TESTCASE(0, TestBidiTest);
     74         default:
     75             name="";
     76             break; // needed to end the loop
     77     }
     78 }
     79 
     80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
     81 char *BiDiConformanceTest::getUnidataPath(char path[]) {
     82     IcuTestErrorCode errorCode(*this, "getUnidataPath");
     83     const int kUnicodeDataTxtLength=15;  // strlen("UnicodeData.txt")
     84 
     85     // Look inside ICU_DATA first.
     86     strcpy(path, pathToDataDirectory());
     87     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
     88     FILE *f=fopen(path, "r");
     89     if(f!=NULL) {
     90         fclose(f);
     91         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
     92         return path;
     93     }
     94 
     95     // As a fallback, try to guess where the source data was located
     96     // at the time ICU was built, and look there.
     97 #   ifdef U_TOPSRCDIR
     98         strcpy(path, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
     99 #   else
    100         strcpy(path, loadTestData(errorCode));
    101         strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
    102                      U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
    103                      U_FILE_SEP_STRING "data");
    104 #   endif
    105     strcat(path, U_FILE_SEP_STRING);
    106     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
    107     f=fopen(path, "r");
    108     if(f!=NULL) {
    109         fclose(f);
    110         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
    111         return path;
    112     }
    113     return NULL;
    114 }
    115 
    116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
    117 
    118 // TODO: Make "public" in uparse.h.
    119 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
    120 
    121 UBool BiDiConformanceTest::parseLevels(const char *start) {
    122     directionBits=0;
    123     levelsCount=0;
    124     while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
    125         if(*start=='x') {
    126             levels[levelsCount++]=UBIDI_DEFAULT_LTR;
    127             ++start;
    128         } else {
    129             char *end;
    130             uint32_t value=(uint32_t)strtoul(start, &end, 10);
    131             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
    132                 errln("@Levels: parse error at %s", start);
    133                 return FALSE;
    134             }
    135             levels[levelsCount++]=(UBiDiLevel)value;
    136             directionBits|=(1<<(value&1));
    137             start=end;
    138         }
    139     }
    140     return TRUE;
    141 }
    142 
    143 UBool BiDiConformanceTest::parseOrdering(const char *start) {
    144     orderingCount=0;
    145     while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
    146         char *end;
    147         uint32_t value=(uint32_t)strtoul(start, &end, 10);
    148         if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
    149             errln("@Reorder: parse error at %s", start);
    150             return FALSE;
    151         }
    152         ordering[orderingCount++]=(int32_t)value;
    153         start=end;
    154     }
    155     return TRUE;
    156 }
    157 
    158 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
    159     0x6c,   // 'l' for L
    160     0x52,   // 'R' for R
    161     0x33,   // '3' for EN
    162     0x2d,   // '-' for ES
    163     0x25,   // '%' for ET
    164     0x39,   // '9' for AN
    165     0x2c,   // ',' for CS
    166     0x2f,   // '/' for B
    167     0x5f,   // '_' for S
    168     0x20,   // ' ' for WS
    169     0x3d,   // '=' for ON
    170     0x65,   // 'e' for LRE
    171     0x6f,   // 'o' for LRO
    172     0x41,   // 'A' for AL
    173     0x45,   // 'E' for RLE
    174     0x4f,   // 'O' for RLO
    175     0x2a,   // '*' for PDF
    176     0x60,   // '`' for NSM
    177     0x7c    // '|' for BN
    178 };
    179 
    180 U_CDECL_BEGIN
    181 
    182 static UCharDirection U_CALLCONV
    183 biDiConfUBiDiClassCallback(const void *context, UChar32 c) {
    184     for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
    185         if(c==charFromBiDiClass[i]) {
    186             return (UCharDirection)i;
    187         }
    188     }
    189     // Character not in our hardcoded table.
    190     // Should not occur during testing.
    191     return U_BIDI_CLASS_DEFAULT;
    192 }
    193 
    194 U_CDECL_END
    195 
    196 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
    197     1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
    198 };
    199 
    200 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
    201     inputString.remove();
    202     /*
    203      * Lengthy but fast BiDi class parser.
    204      * A simple parser could terminate or extract the name string and use
    205      *   int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
    206      * but that makes this test take significantly more time.
    207      */
    208     while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
    209         UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
    210         // Compare each character once until we have a match on
    211         // a complete, short BiDi class name.
    212         if(start[0]=='L') {
    213             if(start[1]=='R') {
    214                 if(start[2]=='E') {
    215                     biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
    216                 } else if(start[2]=='O') {
    217                     biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
    218                 }
    219             } else {
    220                 biDiClass=U_LEFT_TO_RIGHT;
    221             }
    222         } else if(start[0]=='R') {
    223             if(start[1]=='L') {
    224                 if(start[2]=='E') {
    225                     biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
    226                 } else if(start[2]=='O') {
    227                     biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
    228                 }
    229             } else {
    230                 biDiClass=U_RIGHT_TO_LEFT;
    231             }
    232         } else if(start[0]=='E') {
    233             if(start[1]=='N') {
    234                 biDiClass=U_EUROPEAN_NUMBER;
    235             } else if(start[1]=='S') {
    236                 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
    237             } else if(start[1]=='T') {
    238                 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
    239             }
    240         } else if(start[0]=='A') {
    241             if(start[1]=='L') {
    242                 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
    243             } else if(start[1]=='N') {
    244                 biDiClass=U_ARABIC_NUMBER;
    245             }
    246         } else if(start[0]=='C' && start[1]=='S') {
    247             biDiClass=U_COMMON_NUMBER_SEPARATOR;
    248         } else if(start[0]=='B') {
    249             if(start[1]=='N') {
    250                 biDiClass=U_BOUNDARY_NEUTRAL;
    251             } else {
    252                 biDiClass=U_BLOCK_SEPARATOR;
    253             }
    254         } else if(start[0]=='S') {
    255             biDiClass=U_SEGMENT_SEPARATOR;
    256         } else if(start[0]=='W' && start[1]=='S') {
    257             biDiClass=U_WHITE_SPACE_NEUTRAL;
    258         } else if(start[0]=='O' && start[1]=='N') {
    259             biDiClass=U_OTHER_NEUTRAL;
    260         } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
    261             biDiClass=U_POP_DIRECTIONAL_FORMAT;
    262         } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
    263             biDiClass=U_DIR_NON_SPACING_MARK;
    264         }
    265         // Now we verify that the class name is terminated properly,
    266         // and not just the start of a longer word.
    267         int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
    268         char c=start[biDiClassNameLength];
    269         if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
    270             errln("BiDi class string not recognized at %s", start);
    271             return FALSE;
    272         }
    273         inputString.append(charFromBiDiClass[biDiClass]);
    274         start+=biDiClassNameLength;
    275     }
    276     return TRUE;
    277 }
    278 
    279 void BiDiConformanceTest::TestBidiTest() {
    280     IcuTestErrorCode errorCode(*this, "TestBidiTest");
    281     const char *sourceTestDataPath=getSourceTestData(errorCode);
    282     if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
    283                                       "folder (getSourceTestData())")) {
    284         return;
    285     }
    286     char bidiTestPath[400];
    287     strcpy(bidiTestPath, sourceTestDataPath);
    288     strcat(bidiTestPath, "BidiTest.txt");
    289     LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
    290     if(bidiTestFile.isNull()) {
    291         errln("unable to open %s", bidiTestPath);
    292         return;
    293     }
    294     LocalUBiDiPointer ubidi(ubidi_open());
    295     ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
    296                            NULL, NULL, errorCode);
    297     if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
    298         return;
    299     }
    300     lineNumber=0;
    301     levelsCount=0;
    302     orderingCount=0;
    303     errorCount=0;
    304     while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
    305         ++lineNumber;
    306         // Remove trailing comments and whitespace.
    307         char *commentStart=strchr(line, '#');
    308         if(commentStart!=NULL) {
    309             *commentStart=0;
    310         }
    311         u_rtrim(line);
    312         const char *start=u_skipWhitespace(line);
    313         if(*start==0) {
    314             continue;  // Skip empty and comment-only lines.
    315         }
    316         if(*start=='@') {
    317             ++start;
    318             if(0==strncmp(start, "Levels:", 7)) {
    319                 if(!parseLevels(start+7)) {
    320                     return;
    321                 }
    322             } else if(0==strncmp(start, "Reorder:", 8)) {
    323                 if(!parseOrdering(start+8)) {
    324                     return;
    325                 }
    326             }
    327             // Skip unknown @Xyz: ...
    328         } else {
    329             if(!parseInputStringFromBiDiClasses(start)) {
    330                 return;
    331             }
    332             start=u_skipWhitespace(start);
    333             if(*start!=';') {
    334                 errln("missing ; separator on input line %s", line);
    335                 return;
    336             }
    337             start=u_skipWhitespace(start+1);
    338             char *end;
    339             uint32_t bitset=(uint32_t)strtoul(start, &end, 10);
    340             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
    341                 errln("input bitset parse error at %s", start);
    342                 return;
    343             }
    344             // Loop over the bitset.
    345             static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1 };
    346             static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL" };
    347             for(int i=0; i<=2; ++i) {
    348                 if(bitset&(1<<i)) {
    349                     ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
    350                                   paraLevels[i], NULL, errorCode);
    351                     const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
    352                     if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
    353                         errln("Input line %d: %s", (int)lineNumber, line);
    354                         return;
    355                     }
    356                     if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
    357                                     paraLevelNames[i])) {
    358                         // continue outerLoop;  does not exist in C++
    359                         // so just break out of the inner loop.
    360                         break;
    361                     }
    362                     if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
    363                         // continue outerLoop;  does not exist in C++
    364                         // so just break out of the inner loop.
    365                         break;
    366                     }
    367                 }
    368             }
    369         }
    370     }
    371 }
    372 
    373 static UChar printLevel(UBiDiLevel level) {
    374     if(level<UBIDI_DEFAULT_LTR) {
    375         return 0x30+level;
    376     } else {
    377         return 0x78;  // 'x'
    378     }
    379 }
    380 
    381 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
    382     uint32_t actualDirectionBits=0;
    383     for(int32_t i=0; i<actualCount; ++i) {
    384         actualDirectionBits|=(1<<(actualLevels[i]&1));
    385     }
    386     return actualDirectionBits;
    387 }
    388 
    389 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
    390                                        const char *paraLevelName) {
    391     UBool isOk=TRUE;
    392     if(levelsCount!=actualCount) {
    393         errln("Wrong number of level values; expected %d actual %d",
    394               (int)levelsCount, (int)actualCount);
    395         isOk=FALSE;
    396     } else {
    397         for(int32_t i=0; i<actualCount; ++i) {
    398             if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
    399                 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
    400                     // ICU used a shortcut:
    401                     // Since the text is unidirectional, it did not store the resolved
    402                     // levels but just returns all levels as the paragraph level 0 or 1.
    403                     // The reordering result is the same, so this is fine.
    404                     break;
    405                 } else {
    406                     errln("Wrong level value at index %d; expected %d actual %d",
    407                           (int)i, levels[i], actualLevels[i]);
    408                     isOk=FALSE;
    409                     break;
    410                 }
    411             }
    412         }
    413     }
    414     if(!isOk) {
    415         printErrorLine(paraLevelName);
    416         UnicodeString els("Expected levels:   ");
    417         int32_t i;
    418         for(i=0; i<levelsCount; ++i) {
    419             els.append((UChar)0x20).append(printLevel(levels[i]));
    420         }
    421         UnicodeString als("Actual   levels:   ");
    422         for(i=0; i<actualCount; ++i) {
    423             als.append((UChar)0x20).append(printLevel(actualLevels[i]));
    424         }
    425         errln(els);
    426         errln(als);
    427     }
    428     return isOk;
    429 }
    430 
    431 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
    432 // does not work for custom BiDi class assignments
    433 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
    434 // Therefore we just skip the indexes for BiDi controls while comparing
    435 // with the expected ordering that has them omitted.
    436 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
    437     UBool isOk=TRUE;
    438     IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
    439     int32_t resultLength=ubidi_getResultLength(ubidi);  // visual length including BiDi controls
    440     int32_t i, visualIndex;
    441     // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
    442     // and loop over each run's indexes, but that seems unnecessary for this test code.
    443     for(i=visualIndex=0; i<resultLength; ++i) {
    444         int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
    445         if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
    446             errln("Input line %d: %s", (int)lineNumber, line);
    447             return FALSE;
    448         }
    449         if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
    450             continue;  // BiDi control, omitted from expected ordering.
    451         }
    452         if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
    453             errln("Wrong ordering value at visual index %d; expected %d actual %d",
    454                   (int)visualIndex, ordering[visualIndex], logicalIndex);
    455             isOk=FALSE;
    456             break;
    457         }
    458         ++visualIndex;
    459     }
    460     // visualIndex is now the visual length minus the BiDi controls,
    461     // which should match the length of the BidiTest.txt ordering.
    462     if(isOk && orderingCount!=visualIndex) {
    463         errln("Wrong number of ordering values; expected %d actual %d",
    464               (int)orderingCount, (int)visualIndex);
    465         isOk=FALSE;
    466     }
    467     if(!isOk) {
    468         printErrorLine(paraLevelName);
    469         UnicodeString eord("Expected ordering: ");
    470         for(i=0; i<orderingCount; ++i) {
    471             eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
    472         }
    473         UnicodeString aord("Actual   ordering: ");
    474         for(i=0; i<resultLength; ++i) {
    475             int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
    476             if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
    477                 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
    478             }
    479         }
    480         errln(eord);
    481         errln(aord);
    482     }
    483     return isOk;
    484 }
    485 
    486 void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
    487     ++errorCount;
    488     errln("Input line %5d:   %s", (int)lineNumber, line);
    489     errln(UnicodeString("Input string:       ")+inputString);
    490     errln("Para level:         %s", paraLevelName);
    491 }
    492