1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 #include "unicode/ustring.h" 8 #include "unicode/uchar.h" 9 #include "unicode/uniset.h" 10 #include "unicode/putil.h" 11 #include "cstring.h" 12 #include "uparse.h" 13 #include "ucdtest.h" 14 15 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0])) 16 17 UnicodeTest::UnicodeTest() 18 { 19 } 20 21 UnicodeTest::~UnicodeTest() 22 { 23 } 24 25 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 26 { 27 if (exec) logln("TestSuite UnicodeTest: "); 28 switch (index) { 29 case 0: name = "TestAdditionalProperties"; if(exec) TestAdditionalProperties(); break; 30 case 1: name = "TestBinaryValues"; if(exec) TestBinaryValues(); break; 31 default: name = ""; break; //needed to end loop 32 } 33 } 34 35 //==================================================== 36 // private data used by the tests 37 //==================================================== 38 39 // test DerivedCoreProperties.txt ------------------------------------------- 40 41 // copied from genprops.c 42 static int32_t 43 getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) { 44 const char *t, *z; 45 int32_t i, j; 46 47 s=u_skipWhitespace(s); 48 for(i=0; i<countTokens; ++i) { 49 t=tokens[i]; 50 if(t!=NULL) { 51 for(j=0;; ++j) { 52 if(t[j]!=0) { 53 if(s[j]!=t[j]) { 54 break; 55 } 56 } else { 57 z=u_skipWhitespace(s+j); 58 if(*z==';' || *z==0) { 59 return i; 60 } else { 61 break; 62 } 63 } 64 } 65 } 66 } 67 return -1; 68 } 69 70 static const char *const 71 derivedCorePropsNames[]={ 72 "Math", 73 "Alphabetic", 74 "Lowercase", 75 "Uppercase", 76 "ID_Start", 77 "ID_Continue", 78 "XID_Start", 79 "XID_Continue", 80 "Default_Ignorable_Code_Point", 81 "Grapheme_Extend", 82 "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */ 83 "Grapheme_Base" 84 }; 85 86 static const UProperty 87 derivedCorePropsIndex[]={ 88 UCHAR_MATH, 89 UCHAR_ALPHABETIC, 90 UCHAR_LOWERCASE, 91 UCHAR_UPPERCASE, 92 UCHAR_ID_START, 93 UCHAR_ID_CONTINUE, 94 UCHAR_XID_START, 95 UCHAR_XID_CONTINUE, 96 UCHAR_DEFAULT_IGNORABLE_CODE_POINT, 97 UCHAR_GRAPHEME_EXTEND, 98 UCHAR_GRAPHEME_LINK, 99 UCHAR_GRAPHEME_BASE 100 }; 101 102 U_CFUNC void U_CALLCONV 103 derivedCorePropsLineFn(void *context, 104 char *fields[][2], int32_t /* fieldCount */, 105 UErrorCode *pErrorCode) 106 { 107 UnicodeTest *me=(UnicodeTest *)context; 108 uint32_t start, end; 109 int32_t i; 110 111 u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); 112 if(U_FAILURE(*pErrorCode)) { 113 me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt field 0 at %s\n", fields[0][0]); 114 return; 115 } 116 117 /* parse derived binary property name, ignore unknown names */ 118 i=getTokenIndex(derivedCorePropsNames, LENGTHOF(derivedCorePropsNames), fields[1][0]); 119 if(i<0) { 120 me->errln("UnicodeTest warning: unknown property name '%s' in \n", fields[1][0]); 121 return; 122 } 123 124 me->derivedCoreProps[i].add(start, end); 125 } 126 127 void UnicodeTest::TestAdditionalProperties() { 128 // test DerivedCoreProperties.txt 129 if(LENGTHOF(derivedCoreProps)<LENGTHOF(derivedCorePropsNames)) { 130 errln("error: UnicodeTest::derivedCoreProps[] too short, need at least %d UnicodeSets\n", 131 LENGTHOF(derivedCorePropsNames)); 132 return; 133 } 134 if(LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)) { 135 errln("error in ucdtest.cpp: LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)\n"); 136 return; 137 } 138 139 char newPath[256]; 140 char backupPath[256]; 141 char *fields[2][2]; 142 UErrorCode errorCode=U_ZERO_ERROR; 143 144 /* Look inside ICU_DATA first */ 145 strcpy(newPath, pathToDataDirectory()); 146 strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt"); 147 148 // As a fallback, try to guess where the source data was located 149 // at the time ICU was built, and look there. 150 # ifdef U_TOPSRCDIR 151 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); 152 # else 153 strcpy(backupPath, loadTestData(errorCode)); 154 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); 155 # endif 156 strcat(backupPath, U_FILE_SEP_STRING); 157 strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt"); 158 159 u_parseDelimitedFile(newPath, ';', fields, 2, derivedCorePropsLineFn, this, &errorCode); 160 161 if(errorCode==U_FILE_ACCESS_ERROR) { 162 errorCode=U_ZERO_ERROR; 163 u_parseDelimitedFile(backupPath, ';', fields, 2, derivedCorePropsLineFn, this, &errorCode); 164 } 165 if(U_FAILURE(errorCode)) { 166 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode)); 167 return; 168 } 169 170 // now we have all derived core properties in the UnicodeSets 171 // run them all through the API 172 int32_t rangeCount, range; 173 uint32_t i; 174 UChar32 start, end; 175 int32_t noErrors = 0; 176 177 // test all TRUE properties 178 for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) { 179 rangeCount=derivedCoreProps[i].getRangeCount(); 180 for(range=0; range<rangeCount; ++range) { 181 start=derivedCoreProps[i].getRangeStart(range); 182 end=derivedCoreProps[i].getRangeEnd(range); 183 for(; start<=end; ++start) { 184 if(!u_hasBinaryProperty(start, derivedCorePropsIndex[i])) { 185 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong\n", start, derivedCorePropsNames[i]); 186 if(noErrors++ > 100) { 187 errln("Too many errors, moving to the next test"); 188 break; 189 } 190 } 191 } 192 } 193 } 194 195 noErrors = 0; 196 // invert all properties 197 for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) { 198 derivedCoreProps[i].complement(); 199 } 200 201 // test all FALSE properties 202 for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) { 203 rangeCount=derivedCoreProps[i].getRangeCount(); 204 for(range=0; range<rangeCount; ++range) { 205 start=derivedCoreProps[i].getRangeStart(range); 206 end=derivedCoreProps[i].getRangeEnd(range); 207 for(; start<=end; ++start) { 208 if(u_hasBinaryProperty(start, derivedCorePropsIndex[i])) { 209 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedCorePropsNames[i]); 210 if(noErrors++ > 100) { 211 errln("Too many errors, moving to the next test"); 212 break; 213 } 214 } 215 } 216 } 217 } 218 } 219 220 void UnicodeTest::TestBinaryValues() { 221 /* 222 * Unicode 5.1 explicitly defines binary property value aliases. 223 * Verify that they are all recognized. 224 */ 225 UErrorCode errorCode=U_ZERO_ERROR; 226 UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode); 227 if(U_FAILURE(errorCode)) { 228 dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCode)); 229 return; 230 } 231 232 static const char *const falseValues[]={ "N", "No", "F", "False" }; 233 static const char *const trueValues[]={ "Y", "Yes", "T", "True" }; 234 int32_t i; 235 for(i=0; i<LENGTHOF(falseValues); ++i) { 236 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); 237 pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_INV)); 238 errorCode=U_ZERO_ERROR; 239 UnicodeSet set(pattern, errorCode); 240 if(U_FAILURE(errorCode)) { 241 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i], u_errorName(errorCode)); 242 continue; 243 } 244 set.complement(); 245 if(set!=alpha) { 246 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alphabetic:])\n", falseValues[i]); 247 } 248 } 249 for(i=0; i<LENGTHOF(trueValues); ++i) { 250 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); 251 pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_INV)); 252 errorCode=U_ZERO_ERROR; 253 UnicodeSet set(pattern, errorCode); 254 if(U_FAILURE(errorCode)) { 255 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i], u_errorName(errorCode)); 256 continue; 257 } 258 if(set!=alpha) { 259 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n", trueValues[i]); 260 } 261 } 262 } 263