1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2009-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: bidiconf.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2009oct16 14 * created by: Markus W. Scherer 15 * 16 * BiDi conformance test, using the Unicode BidiTest.txt file. 17 */ 18 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include "unicode/utypes.h" 23 #include "unicode/ubidi.h" 24 #include "unicode/errorcode.h" 25 #include "unicode/localpointer.h" 26 #include "unicode/putil.h" 27 #include "unicode/unistr.h" 28 #include "intltest.h" 29 #include "uparse.h" 30 31 class BiDiConformanceTest : public IntlTest { 32 public: 33 BiDiConformanceTest() : 34 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), 35 errorCount(0) {} 36 37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 38 39 void TestBidiTest(); 40 private: 41 char *getUnidataPath(char path[]); 42 43 UBool parseLevels(const char *start); 44 UBool parseOrdering(const char *start); 45 UBool parseInputStringFromBiDiClasses(const char *&start); 46 47 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, 48 const char *paraLevelName); 49 UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName); 50 51 void printErrorLine(const char *paraLevelName); 52 53 char line[10000]; 54 UBiDiLevel levels[1000]; 55 uint32_t directionBits; 56 int32_t ordering[1000]; 57 int32_t lineNumber; 58 int32_t levelsCount; 59 int32_t orderingCount; 60 int32_t errorCount; 61 UnicodeString inputString; 62 }; 63 64 extern IntlTest *createBiDiConformanceTest() { 65 return new BiDiConformanceTest(); 66 } 67 68 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 69 if(exec) { 70 logln("TestSuite BiDiConformanceTest: "); 71 } 72 switch (index) { 73 TESTCASE(0, TestBidiTest); 74 default: 75 name=""; 76 break; // needed to end the loop 77 } 78 } 79 80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp). 81 char *BiDiConformanceTest::getUnidataPath(char path[]) { 82 IcuTestErrorCode errorCode(*this, "getUnidataPath"); 83 const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt") 84 85 // Look inside ICU_DATA first. 86 strcpy(path, pathToDataDirectory()); 87 strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); 88 FILE *f=fopen(path, "r"); 89 if(f!=NULL) { 90 fclose(f); 91 *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. 92 return path; 93 } 94 95 // As a fallback, try to guess where the source data was located 96 // at the time ICU was built, and look there. 97 # ifdef U_TOPSRCDIR 98 strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data"); 99 # else 100 strcpy(path, loadTestData(errorCode)); 101 strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 102 U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 103 U_FILE_SEP_STRING "data"); 104 # endif 105 strcat(path, U_FILE_SEP_STRING); 106 strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); 107 f=fopen(path, "r"); 108 if(f!=NULL) { 109 fclose(f); 110 *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. 111 return path; 112 } 113 return NULL; 114 } 115 116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); 117 118 UBool BiDiConformanceTest::parseLevels(const char *start) { 119 directionBits=0; 120 levelsCount=0; 121 while(*start!=0 && *(start=u_skipWhitespace(start))!=0) { 122 if(*start=='x') { 123 levels[levelsCount++]=UBIDI_DEFAULT_LTR; 124 ++start; 125 } else { 126 char *end; 127 uint32_t value=(uint32_t)strtoul(start, &end, 10); 128 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { 129 errln("@Levels: parse error at %s", start); 130 return FALSE; 131 } 132 levels[levelsCount++]=(UBiDiLevel)value; 133 directionBits|=(1<<(value&1)); 134 start=end; 135 } 136 } 137 return TRUE; 138 } 139 140 UBool BiDiConformanceTest::parseOrdering(const char *start) { 141 orderingCount=0; 142 while(*start!=0 && *(start=u_skipWhitespace(start))!=0) { 143 char *end; 144 uint32_t value=(uint32_t)strtoul(start, &end, 10); 145 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) { 146 errln("@Reorder: parse error at %s", start); 147 return FALSE; 148 } 149 ordering[orderingCount++]=(int32_t)value; 150 start=end; 151 } 152 return TRUE; 153 } 154 155 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ 156 0x6c, // 'l' for L 157 0x52, // 'R' for R 158 0x33, // '3' for EN 159 0x2d, // '-' for ES 160 0x25, // '%' for ET 161 0x39, // '9' for AN 162 0x2c, // ',' for CS 163 0x2f, // '/' for B 164 0x5f, // '_' for S 165 0x20, // ' ' for WS 166 0x3d, // '=' for ON 167 0x65, // 'e' for LRE 168 0x6f, // 'o' for LRO 169 0x41, // 'A' for AL 170 0x45, // 'E' for RLE 171 0x4f, // 'O' for RLO 172 0x2a, // '*' for PDF 173 0x60, // '`' for NSM 174 0x7c // '|' for BN 175 }; 176 177 U_CDECL_BEGIN 178 179 static UCharDirection U_CALLCONV 180 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { 181 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { 182 if(c==charFromBiDiClass[i]) { 183 return (UCharDirection)i; 184 } 185 } 186 // Character not in our hardcoded table. 187 // Should not occur during testing. 188 return U_BIDI_CLASS_DEFAULT; 189 } 190 191 U_CDECL_END 192 193 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ 194 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0 195 }; 196 197 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { 198 inputString.remove(); 199 /* 200 * Lengthy but fast BiDi class parser. 201 * A simple parser could terminate or extract the name string and use 202 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString); 203 * but that makes this test take significantly more time. 204 */ 205 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { 206 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; 207 // Compare each character once until we have a match on 208 // a complete, short BiDi class name. 209 if(start[0]=='L') { 210 if(start[1]=='R') { 211 if(start[2]=='E') { 212 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; 213 } else if(start[2]=='O') { 214 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; 215 } 216 } else { 217 biDiClass=U_LEFT_TO_RIGHT; 218 } 219 } else if(start[0]=='R') { 220 if(start[1]=='L') { 221 if(start[2]=='E') { 222 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; 223 } else if(start[2]=='O') { 224 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; 225 } 226 } else { 227 biDiClass=U_RIGHT_TO_LEFT; 228 } 229 } else if(start[0]=='E') { 230 if(start[1]=='N') { 231 biDiClass=U_EUROPEAN_NUMBER; 232 } else if(start[1]=='S') { 233 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; 234 } else if(start[1]=='T') { 235 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; 236 } 237 } else if(start[0]=='A') { 238 if(start[1]=='L') { 239 biDiClass=U_RIGHT_TO_LEFT_ARABIC; 240 } else if(start[1]=='N') { 241 biDiClass=U_ARABIC_NUMBER; 242 } 243 } else if(start[0]=='C' && start[1]=='S') { 244 biDiClass=U_COMMON_NUMBER_SEPARATOR; 245 } else if(start[0]=='B') { 246 if(start[1]=='N') { 247 biDiClass=U_BOUNDARY_NEUTRAL; 248 } else { 249 biDiClass=U_BLOCK_SEPARATOR; 250 } 251 } else if(start[0]=='S') { 252 biDiClass=U_SEGMENT_SEPARATOR; 253 } else if(start[0]=='W' && start[1]=='S') { 254 biDiClass=U_WHITE_SPACE_NEUTRAL; 255 } else if(start[0]=='O' && start[1]=='N') { 256 biDiClass=U_OTHER_NEUTRAL; 257 } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') { 258 biDiClass=U_POP_DIRECTIONAL_FORMAT; 259 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { 260 biDiClass=U_DIR_NON_SPACING_MARK; 261 } 262 // Now we verify that the class name is terminated properly, 263 // and not just the start of a longer word. 264 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; 265 char c=start[biDiClassNameLength]; 266 if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) { 267 errln("BiDi class string not recognized at %s", start); 268 return FALSE; 269 } 270 inputString.append(charFromBiDiClass[biDiClass]); 271 start+=biDiClassNameLength; 272 } 273 return TRUE; 274 } 275 276 void BiDiConformanceTest::TestBidiTest() { 277 IcuTestErrorCode errorCode(*this, "TestBidiTest"); 278 const char *sourceTestDataPath=getSourceTestData(errorCode); 279 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " 280 "folder (getSourceTestData())")) { 281 return; 282 } 283 char bidiTestPath[400]; 284 strcpy(bidiTestPath, sourceTestDataPath); 285 strcat(bidiTestPath, "BidiTest.txt"); 286 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); 287 if(bidiTestFile.isNull()) { 288 errln("unable to open %s", bidiTestPath); 289 return; 290 } 291 LocalUBiDiPointer ubidi(ubidi_open()); 292 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, 293 NULL, NULL, errorCode); 294 if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { 295 return; 296 } 297 lineNumber=0; 298 levelsCount=0; 299 orderingCount=0; 300 errorCount=0; 301 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { 302 ++lineNumber; 303 // Remove trailing comments and whitespace. 304 char *commentStart=strchr(line, '#'); 305 if(commentStart!=NULL) { 306 *commentStart=0; 307 } 308 u_rtrim(line); 309 const char *start=u_skipWhitespace(line); 310 if(*start==0) { 311 continue; // Skip empty and comment-only lines. 312 } 313 if(*start=='@') { 314 ++start; 315 if(0==strncmp(start, "Levels:", 7)) { 316 if(!parseLevels(start+7)) { 317 return; 318 } 319 } else if(0==strncmp(start, "Reorder:", 8)) { 320 if(!parseOrdering(start+8)) { 321 return; 322 } 323 } 324 // Skip unknown @Xyz: ... 325 } else { 326 if(!parseInputStringFromBiDiClasses(start)) { 327 return; 328 } 329 start=u_skipWhitespace(start); 330 if(*start!=';') { 331 errln("missing ; separator on input line %s", line); 332 return; 333 } 334 start=u_skipWhitespace(start+1); 335 char *end; 336 uint32_t bitset=(uint32_t)strtoul(start, &end, 16); 337 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { 338 errln("input bitset parse error at %s", start); 339 return; 340 } 341 // Loop over the bitset. 342 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL }; 343 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" }; 344 for(int i=0; i<=3; ++i) { 345 if(bitset&(1<<i)) { 346 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), 347 paraLevels[i], NULL, errorCode); 348 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); 349 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { 350 errln("Input line %d: %s", (int)lineNumber, line); 351 return; 352 } 353 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()), 354 paraLevelNames[i])) { 355 // continue outerLoop; does not exist in C++ 356 // so just break out of the inner loop. 357 break; 358 } 359 if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) { 360 // continue outerLoop; does not exist in C++ 361 // so just break out of the inner loop. 362 break; 363 } 364 } 365 } 366 } 367 } 368 } 369 370 static UChar printLevel(UBiDiLevel level) { 371 if(level<UBIDI_DEFAULT_LTR) { 372 return 0x30+level; 373 } else { 374 return 0x78; // 'x' 375 } 376 } 377 378 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) { 379 uint32_t actualDirectionBits=0; 380 for(int32_t i=0; i<actualCount; ++i) { 381 actualDirectionBits|=(1<<(actualLevels[i]&1)); 382 } 383 return actualDirectionBits; 384 } 385 386 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, 387 const char *paraLevelName) { 388 UBool isOk=TRUE; 389 if(levelsCount!=actualCount) { 390 errln("Wrong number of level values; expected %d actual %d", 391 (int)levelsCount, (int)actualCount); 392 isOk=FALSE; 393 } else { 394 for(int32_t i=0; i<actualCount; ++i) { 395 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { 396 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { 397 // ICU used a shortcut: 398 // Since the text is unidirectional, it did not store the resolved 399 // levels but just returns all levels as the paragraph level 0 or 1. 400 // The reordering result is the same, so this is fine. 401 break; 402 } else { 403 errln("Wrong level value at index %d; expected %d actual %d", 404 (int)i, levels[i], actualLevels[i]); 405 isOk=FALSE; 406 break; 407 } 408 } 409 } 410 } 411 if(!isOk) { 412 printErrorLine(paraLevelName); 413 UnicodeString els("Expected levels: "); 414 int32_t i; 415 for(i=0; i<levelsCount; ++i) { 416 els.append((UChar)0x20).append(printLevel(levels[i])); 417 } 418 UnicodeString als("Actual levels: "); 419 for(i=0; i<actualCount; ++i) { 420 als.append((UChar)0x20).append(printLevel(actualLevels[i])); 421 } 422 errln(els); 423 errln(als); 424 } 425 return isOk; 426 } 427 428 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); 429 // does not work for custom BiDi class assignments 430 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. 431 // Therefore we just skip the indexes for BiDi controls while comparing 432 // with the expected ordering that has them omitted. 433 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) { 434 UBool isOk=TRUE; 435 IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()"); 436 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls 437 int32_t i, visualIndex; 438 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() 439 // and loop over each run's indexes, but that seems unnecessary for this test code. 440 for(i=visualIndex=0; i<resultLength; ++i) { 441 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); 442 if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { 443 errln("Input line %d: %s", (int)lineNumber, line); 444 return FALSE; 445 } 446 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { 447 continue; // BiDi control, omitted from expected ordering. 448 } 449 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { 450 errln("Wrong ordering value at visual index %d; expected %d actual %d", 451 (int)visualIndex, ordering[visualIndex], logicalIndex); 452 isOk=FALSE; 453 break; 454 } 455 ++visualIndex; 456 } 457 // visualIndex is now the visual length minus the BiDi controls, 458 // which should match the length of the BidiTest.txt ordering. 459 if(isOk && orderingCount!=visualIndex) { 460 errln("Wrong number of ordering values; expected %d actual %d", 461 (int)orderingCount, (int)visualIndex); 462 isOk=FALSE; 463 } 464 if(!isOk) { 465 printErrorLine(paraLevelName); 466 UnicodeString eord("Expected ordering: "); 467 for(i=0; i<orderingCount; ++i) { 468 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); 469 } 470 UnicodeString aord("Actual ordering: "); 471 for(i=0; i<resultLength; ++i) { 472 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); 473 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { 474 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); 475 } 476 } 477 errln(eord); 478 errln(aord); 479 } 480 return isOk; 481 } 482 483 void BiDiConformanceTest::printErrorLine(const char *paraLevelName) { 484 ++errorCount; 485 errln("Input line %5d: %s", (int)lineNumber, line); 486 errln(UnicodeString("Input string: ")+inputString); 487 errln("Para level: %s", paraLevelName); 488 } 489