1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1999-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: store.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003-02-06 14 * created by: Ram Viswanadha 15 * 16 */ 17 18 #include <stdio.h> 19 #include <stdlib.h> 20 #include "unicode/utypes.h" 21 #include "cmemory.h" 22 #include "cstring.h" 23 #include "filestrm.h" 24 #include "unicode/udata.h" 25 #include "utrie.h" 26 #include "unewdata.h" 27 #include "gensprep.h" 28 #include "uhash.h" 29 30 31 #define DO_DEBUG_OUT 0 32 33 34 /* 35 * StringPrep profile file format ------------------------------------ 36 * 37 * The file format prepared and written here contains a 16-bit trie and a mapping table. 38 * 39 * Before the data contents described below, there are the headers required by 40 * the udata API for loading ICU data. Especially, a UDataInfo structure 41 * precedes the actual data. It contains platform properties values and the 42 * file format version. 43 * 44 * The following is a description of format version 2. 45 * 46 * Data contents: 47 * 48 * The contents is a parsed, binary form of RFC3454 and possibly 49 * NormalizationCorrections.txt depending on the options specified on the profile. 50 * 51 * Any Unicode code point from 0 to 0x10ffff can be looked up to get 52 * the trie-word, if any, for that code point. This means that the input 53 * to the lookup are 21-bit unsigned integers, with not all of the 54 * 21-bit range used. 55 * 56 * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c. 57 * After that there are the following structures: 58 * 59 * int32_t indexes[_SPREP_INDEX_TOP]; -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file 60 * 61 * UTrie stringPrepTrie; -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE] 62 * 63 * uint16_t mappingTable[]; -- Contains the sequecence of code units that the code point maps to 64 * size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE] 65 * 66 * The indexes array contains the following values: 67 * indexes[_SPREP_INDEX_TRIE_SIZE] -- The size of the StringPrep trie in bytes 68 * indexes[_SPREP_INDEX_MAPPING_DATA_SIZE] -- The size of the mappingTable in bytes 69 * indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] -- The index of Unicode version of last entry in NormalizationCorrections.txt 70 * indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] -- The starting index of 1 UChar mapping index in the mapping table 71 * indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] -- The starting index of 2 UChars mapping index in the mapping table 72 * indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table 73 * indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] -- The starting index of 4 UChars mapping index in the mapping table 74 * indexes[_SPREP_OPTIONS] -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON 75 * 76 * 77 * StringPrep Trie : 78 * 79 * The StringPrep tries is a 16-bit trie that contains data for the profile. 80 * Each code point is associated with a value (trie-word) in the trie. 81 * 82 * - structure of data words from the trie 83 * 84 * i) A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0) 85 * represents the type associated with the code point 86 * if(trieWord >= _SPREP_TYPE_THRESHOLD){ 87 * type = trieWord - 0xFFF0; 88 * } 89 * The type can be : 90 * USPREP_UNASSIGNED 91 * USPREP_PROHIBITED 92 * USPREP_DELETE 93 * 94 * ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and 95 * contains distribution described below 96 * 97 * 0 - ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped. 98 * 1 - ON : The value in the next 14 bits is an index into the mapping table 99 * OFF: The value in the next 14 bits is an delta value from the code point 100 * 2..15 - Contains data as described by bit 1. If all bits are set 101 * (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE 102 * 103 * 104 * Mapping Table: 105 * The data in mapping table is sorted according to the length of the mapping sequence. 106 * If the type of the code point is USPREP_MAP and value in trie word is an index, the index 107 * is compared with start indexes of sequence length start to figure out the length according to 108 * the following algorithm: 109 * 110 * if( index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 111 * index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 112 * length = 1; 113 * }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 114 * index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 115 * length = 2; 116 * }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 117 * index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 118 * length = 3; 119 * }else{ 120 * // The first position in the mapping table contains the length 121 * // of the sequence 122 * length = mappingTable[index++]; 123 * 124 * } 125 * 126 */ 127 128 /* file data ---------------------------------------------------------------- */ 129 /* indexes[] value names */ 130 131 #if UCONFIG_NO_IDNA 132 133 /* dummy UDataInfo cf. udata.h */ 134 static UDataInfo dataInfo = { 135 sizeof(UDataInfo), 136 0, 137 138 U_IS_BIG_ENDIAN, 139 U_CHARSET_FAMILY, 140 U_SIZEOF_UCHAR, 141 0, 142 143 { 0, 0, 0, 0 }, /* dummy dataFormat */ 144 { 0, 0, 0, 0 }, /* dummy formatVersion */ 145 { 0, 0, 0, 0 } /* dummy dataVersion */ 146 }; 147 148 #else 149 150 static int32_t indexes[_SPREP_INDEX_TOP]={ 0 }; 151 152 static uint16_t* mappingData= NULL; 153 static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */ 154 static int16_t currentIndex = 0; /* the current index into the data trie */ 155 static int32_t maxLength = 0; /* maximum length of mapping string */ 156 157 158 /* UDataInfo cf. udata.h */ 159 static UDataInfo dataInfo={ 160 sizeof(UDataInfo), 161 0, 162 163 U_IS_BIG_ENDIAN, 164 U_CHARSET_FAMILY, 165 U_SIZEOF_UCHAR, 166 0, 167 168 { 0x53, 0x50, 0x52, 0x50 }, /* dataFormat="SPRP" */ 169 { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ 170 { 3, 2, 0, 0 } /* dataVersion (Unicode version) */ 171 }; 172 void 173 setUnicodeVersion(const char *v) { 174 UVersionInfo version; 175 u_versionFromString(version, v); 176 uprv_memcpy(dataInfo.dataVersion, version, 4); 177 } 178 179 void 180 setUnicodeVersionNC(UVersionInfo version){ 181 uint32_t univer = version[0] << 24; 182 univer += version[1] << 16; 183 univer += version[2] << 8; 184 univer += version[3]; 185 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer; 186 } 187 static UNewTrie *sprepTrie; 188 189 #define MAX_DATA_LENGTH 11500 190 191 192 #define SPREP_DELTA_RANGE_POSITIVE_LIMIT 8191 193 #define SPREP_DELTA_RANGE_NEGATIVE_LIMIT -8192 194 195 196 extern void 197 init() { 198 199 sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie)); 200 uprv_memset(sprepTrie, 0, sizeof(UNewTrie)); 201 202 /* initialize the two tries */ 203 if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) { 204 fprintf(stderr, "error: failed to initialize tries\n"); 205 exit(U_MEMORY_ALLOCATION_ERROR); 206 } 207 } 208 209 static UHashtable* hashTable = NULL; 210 211 212 typedef struct ValueStruct { 213 UChar* mapping; 214 int16_t length; 215 UStringPrepType type; 216 } ValueStruct; 217 218 /* Callback for deleting the value from the hashtable */ 219 static void U_CALLCONV valueDeleter(void* obj){ 220 ValueStruct* value = (ValueStruct*) obj; 221 uprv_free(value->mapping); 222 uprv_free(value); 223 } 224 225 /* Callback for hashing the entry */ 226 static int32_t U_CALLCONV hashEntry(const UHashTok parm) { 227 return parm.integer; 228 } 229 230 /* Callback for comparing two entries */ 231 static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) { 232 return (UBool)(p1.integer != p2.integer); 233 } 234 235 236 static void 237 storeMappingData(){ 238 239 int32_t pos = -1; 240 const UHashElement* element = NULL; 241 ValueStruct* value = NULL; 242 int32_t codepoint = 0; 243 int32_t elementCount = 0; 244 int32_t writtenElementCount = 0; 245 int32_t mappingLength = 1; /* minimum mapping length */ 246 int32_t oldMappingLength = 0; 247 uint16_t trieWord =0; 248 int32_t limitIndex = 0; 249 250 if (hashTable == NULL) { 251 return; 252 } 253 elementCount = uhash_count(hashTable); 254 255 /*initialize the mapping data */ 256 mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity)); 257 258 uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity); 259 260 while(writtenElementCount < elementCount){ 261 262 while( (element = uhash_nextElement(hashTable, &pos))!=NULL){ 263 264 codepoint = element->key.integer; 265 value = (ValueStruct*)element->value.pointer; 266 267 /* store the start of indexes */ 268 if(oldMappingLength != mappingLength){ 269 /* Assume that index[] is used according to the enums defined */ 270 if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){ 271 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex; 272 } 273 if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH && 274 mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){ 275 276 limitIndex = currentIndex; 277 278 } 279 oldMappingLength = mappingLength; 280 } 281 282 if(value->length == mappingLength){ 283 uint32_t savedTrieWord = 0; 284 trieWord = currentIndex << 2; 285 /* turn on the 2nd bit to signal that the following bits contain an index */ 286 trieWord += 0x02; 287 288 if(trieWord > _SPREP_TYPE_THRESHOLD){ 289 fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 290 exit(U_ILLEGAL_CHAR_FOUND); 291 } 292 /* figure out if the code point has type already stored */ 293 savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); 294 if(savedTrieWord!=0){ 295 if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ 296 /* turn on the first bit in trie word */ 297 trieWord += 0x01; 298 }else{ 299 /* 300 * the codepoint has value something other than prohibited 301 * and a mapping .. error! 302 */ 303 fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); 304 exit(U_ILLEGAL_ARGUMENT_ERROR); 305 } 306 } 307 308 /* now set the value in the trie */ 309 if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 310 fprintf(stderr,"Could not set the value for code point.\n"); 311 exit(U_ILLEGAL_ARGUMENT_ERROR); 312 } 313 314 /* written the trie word for the codepoint... increment the count*/ 315 writtenElementCount++; 316 317 /* sanity check are we exceeding the max number allowed */ 318 if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){ 319 fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE); 320 exit(U_INDEX_OUTOFBOUNDS_ERROR); 321 } 322 323 /* copy the mapping data */ 324 if(currentIndex+value->length+1 <= mappingDataCapacity){ 325 /* write the length */ 326 if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ 327 /* the cast here is safe since we donot expect the length to be > 65535 */ 328 mappingData[currentIndex++] = (uint16_t) mappingLength; 329 } 330 /* copy the contents to mappindData array */ 331 uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); 332 currentIndex += value->length; 333 334 }else{ 335 /* realloc */ 336 UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2); 337 if(newMappingData == NULL){ 338 fprintf(stderr, "Could not realloc the mapping data!\n"); 339 exit(U_MEMORY_ALLOCATION_ERROR); 340 } 341 uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity); 342 mappingDataCapacity *= 2; 343 uprv_free(mappingData); 344 mappingData = newMappingData; 345 /* write the length */ 346 if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ 347 /* the cast here is safe since we donot expect the length to be > 65535 */ 348 mappingData[currentIndex++] = (uint16_t) mappingLength; 349 } 350 /* continue copying */ 351 uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); 352 currentIndex += value->length; 353 } 354 355 } 356 } 357 mappingLength++; 358 pos = -1; 359 } 360 /* set the last length for range check */ 361 if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){ 362 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1; 363 }else{ 364 indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex; 365 } 366 367 } 368 369 extern void setOptions(int32_t options){ 370 indexes[_SPREP_OPTIONS] = options; 371 } 372 extern void 373 storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, 374 UStringPrepType type, UErrorCode* status){ 375 376 377 UChar* map = NULL; 378 int16_t adjustedLen=0, i; 379 uint16_t trieWord = 0; 380 ValueStruct *value = NULL; 381 uint32_t savedTrieWord = 0; 382 383 /* initialize the hashtable */ 384 if(hashTable==NULL){ 385 hashTable = uhash_open(hashEntry, compareEntries, NULL, status); 386 uhash_setValueDeleter(hashTable, valueDeleter); 387 } 388 389 /* figure out if the code point has type already stored */ 390 savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); 391 if(savedTrieWord!=0){ 392 if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ 393 /* turn on the first bit in trie word */ 394 trieWord += 0x01; 395 }else{ 396 /* 397 * the codepoint has value something other than prohibited 398 * and a mapping .. error! 399 */ 400 fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); 401 exit(U_ILLEGAL_ARGUMENT_ERROR); 402 } 403 } 404 405 /* figure out the real length */ 406 for(i=0; i<length; i++){ 407 if(mapping[i] > 0xFFFF){ 408 adjustedLen +=2; 409 }else{ 410 adjustedLen++; 411 } 412 } 413 414 if(adjustedLen == 0){ 415 trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2); 416 /* make sure that the value of trieWord is less than the threshold */ 417 if(trieWord < _SPREP_TYPE_THRESHOLD){ 418 /* now set the value in the trie */ 419 if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 420 fprintf(stderr,"Could not set the value for code point.\n"); 421 exit(U_ILLEGAL_ARGUMENT_ERROR); 422 } 423 /* value is set so just return */ 424 return; 425 }else{ 426 fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 427 exit(U_ILLEGAL_CHAR_FOUND); 428 } 429 } 430 431 if(adjustedLen == 1){ 432 /* calculate the delta */ 433 int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]); 434 if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){ 435 436 trieWord = delta << 2; 437 438 439 /* make sure that the second bit is OFF */ 440 if((trieWord & 0x02) != 0 ){ 441 fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n"); 442 exit(U_INTERNAL_PROGRAM_ERROR); 443 } 444 /* make sure that the value of trieWord is less than the threshold */ 445 if(trieWord < _SPREP_TYPE_THRESHOLD){ 446 /* now set the value in the trie */ 447 if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 448 fprintf(stderr,"Could not set the value for code point.\n"); 449 exit(U_ILLEGAL_ARGUMENT_ERROR); 450 } 451 /* value is set so just return */ 452 return; 453 } 454 } 455 /* 456 * if the delta is not in the given range or if the trieWord is larger than the threshold 457 * just fall through for storing the mapping in the mapping table 458 */ 459 } 460 461 map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1)); 462 uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1)); 463 464 i=0; 465 466 while(i<length){ 467 if(mapping[i] <= 0xFFFF){ 468 map[i] = (uint16_t)mapping[i]; 469 }else{ 470 map[i] = UTF16_LEAD(mapping[i]); 471 map[i+1] = UTF16_TRAIL(mapping[i]); 472 } 473 i++; 474 } 475 476 value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct)); 477 value->mapping = map; 478 value->type = type; 479 value->length = adjustedLen; 480 if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){ 481 mappingDataCapacity++; 482 } 483 if(maxLength < value->length){ 484 maxLength = value->length; 485 } 486 uhash_iput(hashTable,codepoint,value,status); 487 mappingDataCapacity += adjustedLen; 488 489 if(U_FAILURE(*status)){ 490 fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status)); 491 exit(*status); 492 } 493 } 494 495 496 extern void 497 storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){ 498 uint16_t trieWord = 0; 499 500 if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){ 501 fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n"); 502 exit(U_ILLEGAL_CHAR_FOUND); 503 } 504 trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */ 505 if(start == end){ 506 uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL); 507 if(savedTrieWord>0){ 508 if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){ 509 /* 510 * A mapping is stored in the trie word 511 * and the only other possible type that a 512 * code point can have is USPREP_PROHIBITED 513 * 514 */ 515 516 /* turn on the 0th bit in the savedTrieWord */ 517 savedTrieWord += 0x01; 518 519 /* the downcast is safe since we only save 16 bit values */ 520 trieWord = (uint16_t)savedTrieWord; 521 522 /* make sure that the value of trieWord is less than the threshold */ 523 if(trieWord < _SPREP_TYPE_THRESHOLD){ 524 /* now set the value in the trie */ 525 if(!utrie_set32(sprepTrie,start,trieWord)){ 526 fprintf(stderr,"Could not set the value for code point.\n"); 527 exit(U_ILLEGAL_ARGUMENT_ERROR); 528 } 529 /* value is set so just return */ 530 return; 531 }else{ 532 fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 533 exit(U_ILLEGAL_CHAR_FOUND); 534 } 535 536 }else if(savedTrieWord != trieWord){ 537 fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start); 538 exit(U_ILLEGAL_ARGUMENT_ERROR); 539 } 540 /* if savedTrieWord == trieWord .. fall through and set the value */ 541 } 542 if(!utrie_set32(sprepTrie,start,trieWord)){ 543 fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start); 544 exit(U_ILLEGAL_ARGUMENT_ERROR); 545 } 546 }else{ 547 if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){ 548 fprintf(stderr,"Value for certain codepoint already set.\n"); 549 exit(U_ILLEGAL_CHAR_FOUND); 550 } 551 } 552 553 } 554 555 /* folding value: just store the offset (16 bits) if there is any non-0 entry */ 556 static uint32_t U_CALLCONV 557 getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) { 558 uint32_t foldedValue, value; 559 UChar32 limit=0; 560 UBool inBlockZero; 561 562 foldedValue=0; 563 564 limit=start+0x400; 565 while(start<limit) { 566 value=utrie_get32(trie, start, &inBlockZero); 567 if(inBlockZero) { 568 start+=UTRIE_DATA_BLOCK_LENGTH; 569 } else if(value!=0) { 570 return (uint32_t)offset; 571 } else { 572 ++start; 573 } 574 } 575 return 0; 576 577 } 578 579 #endif /* #if !UCONFIG_NO_IDNA */ 580 581 extern void 582 generateData(const char *dataDir, const char* bundleName) { 583 static uint8_t sprepTrieBlock[100000]; 584 585 UNewDataMemory *pData; 586 UErrorCode errorCode=U_ZERO_ERROR; 587 int32_t size, dataLength; 588 char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100); 589 590 #if UCONFIG_NO_IDNA 591 592 size=0; 593 594 #else 595 596 int32_t sprepTrieSize; 597 598 /* sort and add mapping data */ 599 storeMappingData(); 600 601 sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode); 602 if(U_FAILURE(errorCode)) { 603 fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode)); 604 exit(errorCode); 605 } 606 607 size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes); 608 if(beVerbose) { 609 printf("size of sprep trie %5u bytes\n", (int)sprepTrieSize); 610 printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size); 611 printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR); 612 printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex); 613 printf("Maximum length of the mapping string is : %i \n", (int)maxLength); 614 } 615 616 #endif 617 618 fileName[0]=0; 619 uprv_strcat(fileName,bundleName); 620 /* write the data */ 621 pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo, 622 haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); 623 if(U_FAILURE(errorCode)) { 624 fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode); 625 exit(errorCode); 626 } 627 628 #if !UCONFIG_NO_IDNA 629 630 indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize; 631 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR; 632 633 udata_writeBlock(pData, indexes, sizeof(indexes)); 634 udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize); 635 udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]); 636 637 638 #endif 639 640 /* finish up */ 641 dataLength=udata_finish(pData, &errorCode); 642 if(U_FAILURE(errorCode)) { 643 fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode); 644 exit(errorCode); 645 } 646 647 if(dataLength!=size) { 648 fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n", 649 (long)dataLength, (long)size); 650 exit(U_INTERNAL_PROGRAM_ERROR); 651 } 652 653 #if !UCONFIG_NO_IDNA 654 /* done with writing the data .. close the hashtable */ 655 if (hashTable != NULL) { 656 uhash_close(hashTable); 657 } 658 #endif 659 } 660 661 #if !UCONFIG_NO_IDNA 662 663 extern void 664 cleanUpData(void) { 665 666 utrie_close(sprepTrie); 667 uprv_free(sprepTrie); 668 } 669 670 #endif /* #if !UCONFIG_NO_IDNA */ 671 672 /* 673 * Hey, Emacs, please set the following: 674 * 675 * Local Variables: 676 * indent-tabs-mode: nil 677 * End: 678 * 679 */ 680