1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2013, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: testidn.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003-02-06 14 * created by: Ram Viswanadha 15 * 16 * This program reads the rfc3454_*.txt files, 17 * parses them, and extracts the data for Nameprep conformance. 18 * It then preprocesses it and writes a binary file for efficient use 19 * in various IDNA conversion processes. 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 25 26 #define USPREP_TYPE_NAMES_ARRAY 27 28 #include "unicode/uchar.h" 29 #include "unicode/putil.h" 30 #include "cmemory.h" 31 #include "cstring.h" 32 #include "unicode/udata.h" 33 #include "unicode/utf16.h" 34 #include "unewdata.h" 35 #include "uoptions.h" 36 #include "uparse.h" 37 #include "utrie.h" 38 #include "umutex.h" 39 #include "sprpimpl.h" 40 #include "testidna.h" 41 #include "punyref.h" 42 #include <stdlib.h> 43 44 UBool beVerbose=FALSE, haveCopyright=TRUE; 45 46 /* prototypes --------------------------------------------------------------- */ 47 48 49 static void 50 parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode); 51 52 static void 53 compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength, 54 UStringPrepType option); 55 56 static void 57 compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option); 58 59 static void 60 testAllCodepoints(TestIDNA& test); 61 62 static TestIDNA* pTestIDNA =NULL; 63 64 static const char* fileNames[] = { 65 "rfc3491.txt" 66 }; 67 static const UTrie *idnTrie = NULL; 68 static const int32_t *indexes = NULL; 69 static const uint16_t *mappingData = NULL; 70 /* -------------------------------------------------------------------------- */ 71 72 /* file definitions */ 73 #define DATA_TYPE "icu" 74 75 #define SPREP_DIR "sprep" 76 77 extern int 78 testData(TestIDNA& test) { 79 char *basename=NULL; 80 UErrorCode errorCode=U_ZERO_ERROR; 81 char *saveBasename =NULL; 82 83 LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode)); 84 if(U_FAILURE(errorCode)){ 85 test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode))); 86 return errorCode; 87 } 88 89 char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024); 90 //TODO get the srcDir dynamically 91 const char *srcDir=IntlTest::pathToDataDirectory(); 92 93 idnTrie = &profile->sprepTrie; 94 indexes = profile->indexes; 95 mappingData = profile->mappingData; 96 97 //initialize 98 pTestIDNA = &test; 99 100 /* prepare the filename beginning with the source dir */ 101 if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){ 102 filename[0] = 0x2E; 103 filename[1] = U_FILE_SEP_CHAR; 104 uprv_strcpy(filename+2,srcDir); 105 }else{ 106 uprv_strcpy(filename, srcDir); 107 } 108 basename=filename+uprv_strlen(filename); 109 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 110 *basename++=U_FILE_SEP_CHAR; 111 } 112 113 /* process unassigned */ 114 basename=filename+uprv_strlen(filename); 115 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 116 *basename++=U_FILE_SEP_CHAR; 117 } 118 119 /* first copy misc directory */ 120 saveBasename = basename; 121 (void)saveBasename; // Suppress set but not used warning. 122 uprv_strcpy(basename,SPREP_DIR); 123 basename = basename + uprv_strlen(SPREP_DIR); 124 *basename++=U_FILE_SEP_CHAR; 125 126 /* process unassigned */ 127 uprv_strcpy(basename,fileNames[0]); 128 parseMappings(filename,TRUE, test,&errorCode); 129 if(U_FAILURE(errorCode)) { 130 test.errln( "Could not open file %s for reading \n", filename); 131 return errorCode; 132 } 133 134 testAllCodepoints(test); 135 136 pTestIDNA = NULL; 137 free(filename); 138 return errorCode; 139 } 140 U_CDECL_BEGIN 141 142 static void U_CALLCONV 143 strprepProfileLineFn(void * /*context*/, 144 char *fields[][2], int32_t fieldCount, 145 UErrorCode *pErrorCode) { 146 uint32_t mapping[40]; 147 char *end, *map; 148 uint32_t code; 149 int32_t length; 150 /*UBool* mapWithNorm = (UBool*) context;*/ 151 const char* typeName; 152 uint32_t rangeStart=0,rangeEnd =0; 153 const char *s; 154 155 s = u_skipWhitespace(fields[0][0]); 156 if (*s == '@') { 157 /* a special directive introduced in 4.2 */ 158 return; 159 } 160 161 if(fieldCount != 3){ 162 *pErrorCode = U_INVALID_FORMAT_ERROR; 163 return; 164 } 165 166 typeName = fields[2][0]; 167 map = fields[1][0]; 168 169 if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ 170 171 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 172 173 /* store the range */ 174 compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED); 175 176 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ 177 178 u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); 179 180 /* store the range */ 181 compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED); 182 183 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ 184 /* get the character code, field 0 */ 185 code=(uint32_t)uprv_strtoul(s, &end, 16); 186 187 /* parse the mapping string */ 188 length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); 189 190 /* store the mapping */ 191 compareMapping(code,mapping, length,USPREP_MAP); 192 193 }else{ 194 *pErrorCode = U_INVALID_FORMAT_ERROR; 195 } 196 197 } 198 199 U_CDECL_END 200 201 static void 202 parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) { 203 char *fields[3][2]; 204 205 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 206 return; 207 } 208 209 u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode); 210 211 //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len); 212 213 if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { 214 test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); 215 } 216 } 217 218 219 static inline UStringPrepType 220 getValues(uint32_t result, int32_t& value, UBool& isIndex){ 221 222 UStringPrepType type; 223 224 if(result == 0){ 225 /* 226 * Initial value stored in the mapping table 227 * just return USPREP_TYPE_LIMIT .. so that 228 * the source codepoint is copied to the destination 229 */ 230 type = USPREP_TYPE_LIMIT; 231 isIndex =FALSE; 232 value = 0; 233 }else if(result >= _SPREP_TYPE_THRESHOLD){ 234 type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD); 235 isIndex =FALSE; 236 value = 0; 237 }else{ 238 /* get the state */ 239 type = USPREP_MAP; 240 /* ascertain if the value is index or delta */ 241 if(result & 0x02){ 242 isIndex = TRUE; 243 value = result >> 2; //mask off the lower 2 bits and shift 244 245 }else{ 246 isIndex = FALSE; 247 value = (int16_t)result; 248 value = (value >> 2); 249 250 } 251 if((result>>2) == _SPREP_MAX_INDEX_VALUE){ 252 type = USPREP_DELETE; 253 isIndex =FALSE; 254 value = 0; 255 } 256 } 257 return type; 258 } 259 260 261 262 static void 263 testAllCodepoints(TestIDNA& test){ 264 /* 265 { 266 UChar str[19] = { 267 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 268 0x070F,//prohibited 269 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74 270 }; 271 uint32_t in[19] = {0}; 272 UErrorCode status = U_ZERO_ERROR; 273 int32_t inLength=0, outLength=100; 274 char output[100] = {0}; 275 punycode_status error; 276 u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status); 277 278 error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output); 279 printf(output); 280 281 } 282 */ 283 284 uint32_t i = 0; 285 int32_t unassigned = 0; 286 int32_t prohibited = 0; 287 int32_t mappedWithNorm = 0; 288 int32_t mapped = 0; 289 int32_t noValueInTrie = 0; 290 291 UStringPrepType type; 292 int32_t value; 293 UBool isIndex = FALSE; 294 295 for(i=0;i<=0x10FFFF;i++){ 296 uint32_t result = 0; 297 UTRIE_GET16(idnTrie,i, result); 298 type = getValues(result,value, isIndex); 299 if(type != USPREP_TYPE_LIMIT ){ 300 if(type == USPREP_UNASSIGNED){ 301 unassigned++; 302 } 303 if(type == USPREP_PROHIBITED){ 304 prohibited++; 305 } 306 if(type == USPREP_MAP){ 307 mapped++; 308 } 309 }else{ 310 noValueInTrie++; 311 if(result > 0){ 312 test.errln("The return value for 0x%06X is wrong. %i\n",i,result); 313 } 314 } 315 } 316 317 test.logln("Number of Unassinged code points : %i \n",unassigned); 318 test.logln("Number of Prohibited code points : %i \n",prohibited); 319 test.logln("Number of Mapped code points : %i \n",mapped); 320 test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm); 321 test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie); 322 323 324 } 325 326 static void 327 compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength, 328 UStringPrepType type){ 329 uint32_t result = 0; 330 UTRIE_GET16(idnTrie,codepoint, result); 331 332 int32_t length=0; 333 UBool isIndex; 334 UStringPrepType retType; 335 int32_t value, index=0, delta=0; 336 337 retType = getValues(result,value,isIndex); 338 339 340 if(type != retType && retType != USPREP_DELETE){ 341 342 pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type); 343 344 } 345 346 if(isIndex){ 347 index = value; 348 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 349 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 350 length = 1; 351 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 352 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 353 length = 2; 354 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 355 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 356 length = 3; 357 }else{ 358 length = mappingData[index++]; 359 } 360 }else{ 361 delta = value; 362 length = (retType == USPREP_DELETE)? 0 : 1; 363 } 364 365 int32_t realLength =0; 366 /* figure out the real length */ 367 for(int32_t j=0; j<mapLength; j++){ 368 if(mapping[j] > 0xFFFF){ 369 realLength +=2; 370 }else{ 371 realLength++; 372 } 373 } 374 375 if(realLength != length){ 376 pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length); 377 } 378 379 if(isIndex){ 380 for(int8_t i =0; i< mapLength; i++){ 381 if(mapping[i] <= 0xFFFF){ 382 if(mappingData[index+i] != (uint16_t)mapping[i]){ 383 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]); 384 } 385 }else{ 386 UChar lead = U16_LEAD(mapping[i]); 387 UChar trail = U16_TRAIL(mapping[i]); 388 if(mappingData[index+i] != lead || 389 mappingData[index+i+1] != trail){ 390 pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]); 391 } 392 } 393 } 394 }else{ 395 if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){ 396 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta)); 397 } 398 } 399 400 } 401 402 static void 403 compareFlagsForRange(uint32_t start, uint32_t end, 404 UStringPrepType type){ 405 406 uint32_t result =0 ; 407 UStringPrepType retType; 408 UBool isIndex=FALSE; 409 int32_t value=0; 410 /* 411 // supplementary code point 412 UChar __lead16=U16_LEAD(0x2323E); 413 int32_t __offset; 414 415 // get data for lead surrogate 416 (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16)); 417 __offset=(&idnTrie)->getFoldingOffset(result); 418 419 // get the real data from the folded lead/trail units 420 if(__offset>0) { 421 (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff); 422 } else { 423 (result)=(uint32_t)((&idnTrie)->initialValue); 424 } 425 426 UTRIE_GET16(&idnTrie,0x2323E, result); 427 */ 428 while(start < end+1){ 429 UTRIE_GET16(idnTrie,start, result); 430 retType = getValues(result,value,isIndex); 431 if(result > _SPREP_TYPE_THRESHOLD){ 432 if(retType != type){ 433 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 434 } 435 }else{ 436 if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){ 437 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); 438 } 439 } 440 441 start++; 442 } 443 444 } 445 446 447 #endif /* #if !UCONFIG_NO_IDNA */ 448 449 /* 450 * Hey, Emacs, please set the following: 451 * 452 * Local Variables: 453 * indent-tabs-mode: nil 454 * End: 455 * 456 */ 457