1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2009-2013, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: bidiconf.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2009oct16 14 * created by: Markus W. Scherer 15 * 16 * BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files. 17 */ 18 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include "unicode/utypes.h" 23 #include "unicode/ubidi.h" 24 #include "unicode/errorcode.h" 25 #include "unicode/localpointer.h" 26 #include "unicode/putil.h" 27 #include "unicode/unistr.h" 28 #include "intltest.h" 29 #include "uparse.h" 30 31 class BiDiConformanceTest : public IntlTest { 32 public: 33 BiDiConformanceTest() : 34 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), 35 errorCount(0) {} 36 37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 38 39 void TestBidiTest(); 40 void TestBidiCharacterTest(); 41 private: 42 char *getUnidataPath(char path[]); 43 44 UBool parseLevels(const char *&start); 45 UBool parseOrdering(const char *start); 46 UBool parseInputStringFromBiDiClasses(const char *&start); 47 48 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount); 49 UBool checkOrdering(UBiDi *ubidi); 50 51 void printErrorLine(); 52 53 char line[10000]; 54 UBiDiLevel levels[1000]; 55 uint32_t directionBits; 56 int32_t ordering[1000]; 57 int32_t lineNumber; 58 int32_t levelsCount; 59 int32_t orderingCount; 60 int32_t errorCount; 61 UnicodeString inputString; 62 const char *paraLevelName; 63 char levelNameString[12]; 64 }; 65 66 extern IntlTest *createBiDiConformanceTest() { 67 return new BiDiConformanceTest(); 68 } 69 70 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 71 if(exec) { 72 logln("TestSuite BiDiConformanceTest: "); 73 } 74 TESTCASE_AUTO_BEGIN; 75 TESTCASE_AUTO(TestBidiTest); 76 TESTCASE_AUTO(TestBidiCharacterTest); 77 TESTCASE_AUTO_END; 78 } 79 80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp). 81 char *BiDiConformanceTest::getUnidataPath(char path[]) { 82 IcuTestErrorCode errorCode(*this, "getUnidataPath"); 83 const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt") 84 85 // Look inside ICU_DATA first. 86 strcpy(path, pathToDataDirectory()); 87 strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); 88 FILE *f=fopen(path, "r"); 89 if(f!=NULL) { 90 fclose(f); 91 *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. 92 return path; 93 } 94 95 // As a fallback, try to guess where the source data was located 96 // at the time ICU was built, and look there. 97 # ifdef U_TOPSRCDIR 98 strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data"); 99 # else 100 strcpy(path, loadTestData(errorCode)); 101 strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 102 U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 103 U_FILE_SEP_STRING "data"); 104 # endif 105 strcat(path, U_FILE_SEP_STRING); 106 strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); 107 f=fopen(path, "r"); 108 if(f!=NULL) { 109 fclose(f); 110 *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. 111 return path; 112 } 113 return NULL; 114 } 115 116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); 117 118 UBool BiDiConformanceTest::parseLevels(const char *&start) { 119 directionBits=0; 120 levelsCount=0; 121 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { 122 if(*start=='x') { 123 levels[levelsCount++]=UBIDI_DEFAULT_LTR; 124 ++start; 125 } else { 126 char *end; 127 uint32_t value=(uint32_t)strtoul(start, &end, 10); 128 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') 129 || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { 130 errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start); 131 printErrorLine(); 132 return FALSE; 133 } 134 levels[levelsCount++]=(UBiDiLevel)value; 135 directionBits|=(1<<(value&1)); 136 start=end; 137 } 138 } 139 return TRUE; 140 } 141 142 UBool BiDiConformanceTest::parseOrdering(const char *start) { 143 orderingCount=0; 144 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { 145 char *end; 146 uint32_t value=(uint32_t)strtoul(start, &end, 10); 147 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) { 148 errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start); 149 printErrorLine(); 150 return FALSE; 151 } 152 ordering[orderingCount++]=(int32_t)value; 153 start=end; 154 } 155 return TRUE; 156 } 157 158 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ 159 0x6c, // 'l' for L 160 0x52, // 'R' for R 161 0x33, // '3' for EN 162 0x2d, // '-' for ES 163 0x25, // '%' for ET 164 0x39, // '9' for AN 165 0x2c, // ',' for CS 166 0x2f, // '/' for B 167 0x5f, // '_' for S 168 0x20, // ' ' for WS 169 0x3d, // '=' for ON 170 0x65, // 'e' for LRE 171 0x6f, // 'o' for LRO 172 0x41, // 'A' for AL 173 0x45, // 'E' for RLE 174 0x4f, // 'O' for RLO 175 0x2a, // '*' for PDF 176 0x60, // '`' for NSM 177 0x7c, // '|' for BN 178 // new in Unicode 6.3/ICU 52 179 0x53, // 'S' for FSI 180 0x69, // 'i' for LRI 181 0x49, // 'I' for RLI 182 0x2e // '.' for PDI 183 }; 184 185 U_CDECL_BEGIN 186 187 static UCharDirection U_CALLCONV 188 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { 189 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { 190 if(c==charFromBiDiClass[i]) { 191 return (UCharDirection)i; 192 } 193 } 194 // Character not in our hardcoded table. 195 // Should not occur during testing. 196 return U_BIDI_CLASS_DEFAULT; 197 } 198 199 U_CDECL_END 200 201 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ 202 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0 203 }; 204 205 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { 206 inputString.remove(); 207 /* 208 * Lengthy but fast BiDi class parser. 209 * A simple parser could terminate or extract the name string and use 210 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString); 211 * but that makes this test take significantly more time. 212 */ 213 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { 214 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; 215 // Compare each character once until we have a match on 216 // a complete, short BiDi class name. 217 if(start[0]=='L') { 218 if(start[1]=='R') { 219 if(start[2]=='E') { 220 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; 221 } else if(start[2]=='I') { 222 biDiClass=U_LEFT_TO_RIGHT_ISOLATE; 223 } else if(start[2]=='O') { 224 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; 225 } 226 } else { 227 biDiClass=U_LEFT_TO_RIGHT; 228 } 229 } else if(start[0]=='R') { 230 if(start[1]=='L') { 231 if(start[2]=='E') { 232 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; 233 } else if(start[2]=='I') { 234 biDiClass=U_RIGHT_TO_LEFT_ISOLATE; 235 } else if(start[2]=='O') { 236 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; 237 } 238 } else { 239 biDiClass=U_RIGHT_TO_LEFT; 240 } 241 } else if(start[0]=='E') { 242 if(start[1]=='N') { 243 biDiClass=U_EUROPEAN_NUMBER; 244 } else if(start[1]=='S') { 245 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; 246 } else if(start[1]=='T') { 247 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; 248 } 249 } else if(start[0]=='A') { 250 if(start[1]=='L') { 251 biDiClass=U_RIGHT_TO_LEFT_ARABIC; 252 } else if(start[1]=='N') { 253 biDiClass=U_ARABIC_NUMBER; 254 } 255 } else if(start[0]=='C' && start[1]=='S') { 256 biDiClass=U_COMMON_NUMBER_SEPARATOR; 257 } else if(start[0]=='B') { 258 if(start[1]=='N') { 259 biDiClass=U_BOUNDARY_NEUTRAL; 260 } else { 261 biDiClass=U_BLOCK_SEPARATOR; 262 } 263 } else if(start[0]=='S') { 264 biDiClass=U_SEGMENT_SEPARATOR; 265 } else if(start[0]=='W' && start[1]=='S') { 266 biDiClass=U_WHITE_SPACE_NEUTRAL; 267 } else if(start[0]=='O' && start[1]=='N') { 268 biDiClass=U_OTHER_NEUTRAL; 269 } else if(start[0]=='P' && start[1]=='D') { 270 if(start[2]=='F') { 271 biDiClass=U_POP_DIRECTIONAL_FORMAT; 272 } else if(start[2]=='I') { 273 biDiClass=U_POP_DIRECTIONAL_ISOLATE; 274 } 275 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { 276 biDiClass=U_DIR_NON_SPACING_MARK; 277 } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') { 278 biDiClass=U_FIRST_STRONG_ISOLATE; 279 } 280 // Now we verify that the class name is terminated properly, 281 // and not just the start of a longer word. 282 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; 283 char c=start[biDiClassNameLength]; 284 if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) { 285 inputString.append(charFromBiDiClass[biDiClass]); 286 start+=biDiClassNameLength; 287 continue; 288 } 289 errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start); 290 printErrorLine(); 291 return FALSE; 292 } 293 return TRUE; 294 } 295 296 void BiDiConformanceTest::TestBidiTest() { 297 IcuTestErrorCode errorCode(*this, "TestBidiTest"); 298 const char *sourceTestDataPath=getSourceTestData(errorCode); 299 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " 300 "folder (getSourceTestData())")) { 301 return; 302 } 303 char bidiTestPath[400]; 304 strcpy(bidiTestPath, sourceTestDataPath); 305 strcat(bidiTestPath, "BidiTest.txt"); 306 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); 307 if(bidiTestFile.isNull()) { 308 errln("unable to open %s", bidiTestPath); 309 return; 310 } 311 LocalUBiDiPointer ubidi(ubidi_open()); 312 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, 313 NULL, NULL, errorCode); 314 if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { 315 return; 316 } 317 lineNumber=0; 318 levelsCount=0; 319 orderingCount=0; 320 errorCount=0; 321 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { 322 ++lineNumber; 323 // Remove trailing comments and whitespace. 324 char *commentStart=strchr(line, '#'); 325 if(commentStart!=NULL) { 326 *commentStart=0; 327 } 328 u_rtrim(line); 329 const char *start=u_skipWhitespace(line); 330 if(*start==0) { 331 continue; // Skip empty and comment-only lines. 332 } 333 if(*start=='@') { 334 ++start; 335 if(0==strncmp(start, "Levels:", 7)) { 336 start+=7; 337 if(!parseLevels(start)) { 338 return; 339 } 340 } else if(0==strncmp(start, "Reorder:", 8)) { 341 if(!parseOrdering(start+8)) { 342 return; 343 } 344 } 345 // Skip unknown @Xyz: ... 346 } else { 347 if(!parseInputStringFromBiDiClasses(start)) { 348 return; 349 } 350 start=u_skipWhitespace(start); 351 if(*start!=';') { 352 errln("missing ; separator on input line %s", line); 353 return; 354 } 355 start=u_skipWhitespace(start+1); 356 char *end; 357 uint32_t bitset=(uint32_t)strtoul(start, &end, 16); 358 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { 359 errln("input bitset parse error at %s", start); 360 return; 361 } 362 // Loop over the bitset. 363 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL }; 364 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" }; 365 for(int i=0; i<=3; ++i) { 366 if(bitset&(1<<i)) { 367 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), 368 paraLevels[i], NULL, errorCode); 369 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); 370 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { 371 errln("Input line %d: %s", (int)lineNumber, line); 372 return; 373 } 374 paraLevelName=paraLevelNames[i]; 375 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { 376 // continue outerLoop; does not exist in C++ 377 // so just break out of the inner loop. 378 break; 379 } 380 if(!checkOrdering(ubidi.getAlias())) { 381 // continue outerLoop; does not exist in C++ 382 // so just break out of the inner loop. 383 break; 384 } 385 } 386 } 387 } 388 } 389 } 390 391 /* 392 ******************************************************************************* 393 * 394 * created on: 2013jul01 395 * created by: Matitiahu Allouche 396 397 This function performs a conformance test for implementations of the 398 Unicode Bidirectional Algorithm, specified in UAX #9: Unicode 399 Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ 400 401 Each test case is represented in a single line which is read from a file 402 named BidiCharacter.txt. Empty, blank and comment lines may also appear 403 in this file. 404 405 The format of the test data is specified below. Note that each test 406 case constitutes a single line of text; reordering is applied within a 407 single line and independently of a rendering engine, and rules L3 and L4 408 are out of scope. 409 410 The number sign '#' is the comment character: everything is ignored from 411 the occurrence of '#' until the end of the line, 412 Empty lines and lines containing only spaces and/or comments are ignored. 413 414 Lines which represent test cases consist of 4 or 5 fields separated by a 415 semicolon. Each field consists of tokens separated by whitespace (space 416 or Tab). Whitespace before and after semicolons is optional. 417 418 Field 0: A sequence of hexadecimal code point values separated by space 419 420 Field 1: A value representing the paragraph direction, as follows: 421 - 0 represents left-to-right 422 - 1 represents right-to-left 423 - 2 represents auto-LTR according to rules P2 and P3 of the algorithm 424 - 3 represents auto-RTL according to rules P2 and P3 of the algorithm 425 - a negative number whose absolute value is taken as paragraph level; 426 this may be useful to test cases where the embedding level approaches 427 or exceeds the maximum embedding level. 428 429 Field 2: The resolved paragraph embedding level. If the input (field 0) 430 includes more than one paragraph, this field represents the 431 resolved level of the first paragraph. 432 433 Field 3: An ordered list of resulting levels for each token in field 0 434 (each token represents one source character). 435 The UBA does not assign levels to certain characters (e.g. LRO); 436 characters removed in rule X9 are indicated with an 'x'. 437 438 Field 4: An ordered list of indices showing the resulting visual ordering 439 from left to right; characters with a resolved level of 'x' are 440 skipped. The number are zero-based. Each index corresponds to 441 a character in the reordered (visual) string. It represents the 442 index of the source character in the input (field 0). 443 This field is optional. When it is absent, the visual ordering 444 is not verified. 445 446 Examples: 447 448 # This is a comment line. 449 L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3 450 L L ON R;0;0;0 0 0 1;0 1 2 3 451 452 # Note: in the next line, 'B' represents a block separator, not the letter 'B'. 453 LRE A B C PDF;2;0;x 2 0 0 x;1 2 3 454 # Note: in the next line, 'b' represents the letter 'b', not a block separator. 455 a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5 456 457 a R R x ; 1 ; 1 ; 2 1 1 2 458 L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1 459 460 * 461 ******************************************************************************* 462 */ 463 void BiDiConformanceTest::TestBidiCharacterTest() { 464 IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest"); 465 const char *sourceTestDataPath=getSourceTestData(errorCode); 466 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " 467 "folder (getSourceTestData())")) { 468 return; 469 } 470 char bidiTestPath[400]; 471 strcpy(bidiTestPath, sourceTestDataPath); 472 strcat(bidiTestPath, "BidiCharacterTest.txt"); 473 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); 474 if(bidiTestFile.isNull()) { 475 errln("unable to open %s", bidiTestPath); 476 return; 477 } 478 LocalUBiDiPointer ubidi(ubidi_open()); 479 lineNumber=0; 480 levelsCount=0; 481 orderingCount=0; 482 errorCount=0; 483 while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { 484 ++lineNumber; 485 paraLevelName="N/A"; 486 inputString="N/A"; 487 // Remove trailing comments and whitespace. 488 char *commentStart=strchr(line, '#'); 489 if(commentStart!=NULL) { 490 *commentStart=0; 491 } 492 u_rtrim(line); 493 const char *start=u_skipWhitespace(line); 494 if(*start==0) { 495 continue; // Skip empty and comment-only lines. 496 } 497 // Parse the code point string in field 0. 498 UChar *buffer=inputString.getBuffer(200); 499 int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode); 500 if(errorCode.logIfFailureAndReset("Invalid string in field 0")) { 501 errln("Input line %d: %s", (int)lineNumber, line); 502 inputString.remove(); 503 continue; 504 } 505 inputString.releaseBuffer(length); 506 start=strchr(start, ';'); 507 if(start==NULL) { 508 errorCount++; 509 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); 510 continue; 511 } 512 start=u_skipWhitespace(start+1); 513 char *end; 514 int32_t paraDirection=(int32_t)strtol(start, &end, 10); 515 UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2; 516 if(paraDirection==0) { 517 paraLevel=0; 518 paraLevelName="LTR"; 519 } 520 else if(paraDirection==1) { 521 paraLevel=1; 522 paraLevelName="RTL"; 523 } 524 else if(paraDirection==2) { 525 paraLevel=UBIDI_DEFAULT_LTR; 526 paraLevelName="Auto/LTR"; 527 } 528 else if(paraDirection==3) { 529 paraLevel=UBIDI_DEFAULT_RTL; 530 paraLevelName="Auto/RTL"; 531 } 532 else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) { 533 paraLevel=(UBiDiLevel)(-paraDirection); 534 sprintf(levelNameString, "%d", (int)paraLevel); 535 paraLevelName=levelNameString; 536 } 537 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || 538 paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) { 539 errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start); 540 printErrorLine(); 541 continue; 542 } 543 start=u_skipWhitespace(end); 544 if(*start!=';') { 545 errorCount++; 546 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); 547 continue; 548 } 549 start++; 550 uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10); 551 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || 552 resolvedParaLevel>1) { 553 errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start); 554 printErrorLine(); 555 continue; 556 } 557 start=u_skipWhitespace(end); 558 if(*start!=';') { 559 errorCount++; 560 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); 561 return; 562 } 563 start++; 564 if(!parseLevels(start)) { 565 continue; 566 } 567 start=u_skipWhitespace(start); 568 if(*start==';') { 569 if(!parseOrdering(start+1)) { 570 continue; 571 } 572 } 573 else 574 orderingCount=-1; 575 576 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), 577 paraLevel, NULL, errorCode); 578 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); 579 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { 580 errln("Input line %d: %s", (int)lineNumber, line); 581 continue; 582 } 583 UBiDiLevel actualLevel; 584 if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) { 585 printErrorLine(); 586 errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d", 587 (int)lineNumber, resolvedParaLevel, actualLevel); 588 continue; 589 } 590 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { 591 continue; 592 } 593 if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) { 594 continue; 595 } 596 } 597 } 598 599 static UChar printLevel(UBiDiLevel level) { 600 if(level<UBIDI_DEFAULT_LTR) { 601 return 0x30+level; 602 } else { 603 return 0x78; // 'x' 604 } 605 } 606 607 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) { 608 uint32_t actualDirectionBits=0; 609 for(int32_t i=0; i<actualCount; ++i) { 610 actualDirectionBits|=(1<<(actualLevels[i]&1)); 611 } 612 return actualDirectionBits; 613 } 614 615 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) { 616 UBool isOk=TRUE; 617 if(levelsCount!=actualCount) { 618 errln("\nError on line %d: Wrong number of level values; expected %d actual %d", 619 (int)lineNumber, (int)levelsCount, (int)actualCount); 620 isOk=FALSE; 621 } else { 622 for(int32_t i=0; i<actualCount; ++i) { 623 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { 624 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { 625 // ICU used a shortcut: 626 // Since the text is unidirectional, it did not store the resolved 627 // levels but just returns all levels as the paragraph level 0 or 1. 628 // The reordering result is the same, so this is fine. 629 break; 630 } else { 631 errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d", 632 (int)lineNumber, (int)i, levels[i], actualLevels[i]); 633 isOk=FALSE; 634 break; 635 } 636 } 637 } 638 } 639 if(!isOk) { 640 printErrorLine(); 641 UnicodeString els("Expected levels: "); 642 int32_t i; 643 for(i=0; i<levelsCount; ++i) { 644 els.append((UChar)0x20).append(printLevel(levels[i])); 645 } 646 UnicodeString als("Actual levels: "); 647 for(i=0; i<actualCount; ++i) { 648 als.append((UChar)0x20).append(printLevel(actualLevels[i])); 649 } 650 errln(els); 651 errln(als); 652 } 653 return isOk; 654 } 655 656 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); 657 // does not work for custom BiDi class assignments 658 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. 659 // Therefore we just skip the indexes for BiDi controls while comparing 660 // with the expected ordering that has them omitted. 661 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) { 662 UBool isOk=TRUE; 663 IcuTestErrorCode errorCode(*this, "checkOrdering()"); 664 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls 665 int32_t i, visualIndex; 666 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() 667 // and loop over each run's indexes, but that seems unnecessary for this test code. 668 for(i=visualIndex=0; i<resultLength; ++i) { 669 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); 670 if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { 671 errln("Input line %d: %s", (int)lineNumber, line); 672 return FALSE; 673 } 674 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { 675 continue; // BiDi control, omitted from expected ordering. 676 } 677 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { 678 errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d", 679 (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex); 680 isOk=FALSE; 681 break; 682 } 683 ++visualIndex; 684 } 685 // visualIndex is now the visual length minus the BiDi controls, 686 // which should match the length of the BidiTest.txt ordering. 687 if(isOk && orderingCount!=visualIndex) { 688 errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d", 689 (int)lineNumber, (int)orderingCount, (int)visualIndex); 690 isOk=FALSE; 691 } 692 if(!isOk) { 693 printErrorLine(); 694 UnicodeString eord("Expected ordering: "); 695 for(i=0; i<orderingCount; ++i) { 696 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); 697 } 698 UnicodeString aord("Actual ordering: "); 699 for(i=0; i<resultLength; ++i) { 700 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); 701 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { 702 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); 703 } 704 } 705 errln(eord); 706 errln(aord); 707 } 708 return isOk; 709 } 710 711 void BiDiConformanceTest::printErrorLine() { 712 ++errorCount; 713 errln("Input line %5d: %s", (int)lineNumber, line); 714 errln(UnicodeString("Input string: ")+inputString); 715 errln("Para level: %s", paraLevelName); 716 } 717