Home | History | Annotate | Download | only in gensprep
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  store.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2003-02-06
     14 *   created by: Ram Viswanadha
     15 *
     16 */
     17 
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include "unicode/utypes.h"
     21 #include "cmemory.h"
     22 #include "cstring.h"
     23 #include "filestrm.h"
     24 #include "unicode/udata.h"
     25 #include "utrie.h"
     26 #include "unewdata.h"
     27 #include "gensprep.h"
     28 #include "uhash.h"
     29 
     30 
     31 #define DO_DEBUG_OUT 0
     32 
     33 
     34 /*
     35  * StringPrep profile file format ------------------------------------
     36  *
     37  * The file format prepared and written here contains a 16-bit trie and a mapping table.
     38  *
     39  * Before the data contents described below, there are the headers required by
     40  * the udata API for loading ICU data. Especially, a UDataInfo structure
     41  * precedes the actual data. It contains platform properties values and the
     42  * file format version.
     43  *
     44  * The following is a description of format version 2.
     45  *
     46  * Data contents:
     47  *
     48  * The contents is a parsed, binary form of RFC3454 and possibly
     49  * NormalizationCorrections.txt depending on the options specified on the profile.
     50  *
     51  * Any Unicode code point from 0 to 0x10ffff can be looked up to get
     52  * the trie-word, if any, for that code point. This means that the input
     53  * to the lookup are 21-bit unsigned integers, with not all of the
     54  * 21-bit range used.
     55  *
     56  * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
     57  * After that there are the following structures:
     58  *
     59  * int32_t indexes[_SPREP_INDEX_TOP];           -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
     60  *
     61  * UTrie stringPrepTrie;                        -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
     62  *
     63  * uint16_t mappingTable[];                     -- Contains the sequecence of code units that the code point maps to
     64  *                                                 size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
     65  *
     66  * The indexes array contains the following values:
     67  *  indexes[_SPREP_INDEX_TRIE_SIZE]                  -- The size of the StringPrep trie in bytes
     68  *  indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]          -- The size of the mappingTable in bytes
     69  *  indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]  -- The index of Unicode version of last entry in NormalizationCorrections.txt
     70  *  indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START]    -- The starting index of 1 UChar  mapping index in the mapping table
     71  *  indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]   -- The starting index of 2 UChars mapping index in the mapping table
     72  *  indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
     73  *  indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]  -- The starting index of 4 UChars mapping index in the mapping table
     74  *  indexes[_SPREP_OPTIONS]                          -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
     75  *
     76  *
     77  * StringPrep Trie :
     78  *
     79  * The StringPrep tries is a 16-bit trie that contains data for the profile.
     80  * Each code point is associated with a value (trie-word) in the trie.
     81  *
     82  * - structure of data words from the trie
     83  *
     84  *  i)  A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0)
     85  *      represents the type associated with the code point
     86  *      if(trieWord >= _SPREP_TYPE_THRESHOLD){
     87  *          type = trieWord - 0xFFF0;
     88  *      }
     89  *      The type can be :
     90  *             USPREP_UNASSIGNED
     91  *             USPREP_PROHIBITED
     92  *             USPREP_DELETE
     93  *
     94  *  ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
     95  *      contains distribution described below
     96  *
     97  *      0       -  ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
     98  *      1       -  ON : The value in the next 14 bits is an index into the mapping table
     99  *                 OFF: The value in the next 14 bits is an delta value from the code point
    100  *      2..15   -  Contains data as described by bit 1. If all bits are set
    101  *                 (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
    102  *
    103  *
    104  * Mapping Table:
    105  * The data in mapping table is sorted according to the length of the mapping sequence.
    106  * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
    107  * is compared with start indexes of sequence length start to figure out the length according to
    108  * the following algorithm:
    109  *
    110  *              if(       index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    111  *                        index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    112  *                   length = 1;
    113  *               }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    114  *                        index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    115  *                   length = 2;
    116  *               }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    117  *                        index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    118  *                   length = 3;
    119  *               }else{
    120  *                   // The first position in the mapping table contains the length
    121  *                   // of the sequence
    122  *                   length = mappingTable[index++];
    123  *
    124  *               }
    125  *
    126  */
    127 
    128 /* file data ---------------------------------------------------------------- */
    129 /* indexes[] value names */
    130 
    131 #if UCONFIG_NO_IDNA
    132 
    133 /* dummy UDataInfo cf. udata.h */
    134 static UDataInfo dataInfo = {
    135     sizeof(UDataInfo),
    136     0,
    137 
    138     U_IS_BIG_ENDIAN,
    139     U_CHARSET_FAMILY,
    140     U_SIZEOF_UCHAR,
    141     0,
    142 
    143     { 0, 0, 0, 0 },                 /* dummy dataFormat */
    144     { 0, 0, 0, 0 },                 /* dummy formatVersion */
    145     { 0, 0, 0, 0 }                  /* dummy dataVersion */
    146 };
    147 
    148 #else
    149 
    150 static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
    151 
    152 static uint16_t* mappingData= NULL;
    153 static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
    154 static int16_t currentIndex = 0; /* the current index into the data trie */
    155 static int32_t maxLength = 0;  /* maximum length of mapping string */
    156 
    157 
    158 /* UDataInfo cf. udata.h */
    159 static UDataInfo dataInfo={
    160     sizeof(UDataInfo),
    161     0,
    162 
    163     U_IS_BIG_ENDIAN,
    164     U_CHARSET_FAMILY,
    165     U_SIZEOF_UCHAR,
    166     0,
    167 
    168     { 0x53, 0x50, 0x52, 0x50 },                 /* dataFormat="SPRP" */
    169     { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
    170     { 3, 2, 0, 0 }                              /* dataVersion (Unicode version) */
    171 };
    172 void
    173 setUnicodeVersion(const char *v) {
    174     UVersionInfo version;
    175     u_versionFromString(version, v);
    176     uprv_memcpy(dataInfo.dataVersion, version, 4);
    177 }
    178 
    179 void
    180 setUnicodeVersionNC(UVersionInfo version){
    181     uint32_t univer = version[0] << 24;
    182     univer += version[1] << 16;
    183     univer += version[2] << 8;
    184     univer += version[3];
    185     indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
    186 }
    187 static UNewTrie *sprepTrie;
    188 
    189 #define MAX_DATA_LENGTH 11500
    190 
    191 
    192 #define SPREP_DELTA_RANGE_POSITIVE_LIMIT              8191
    193 #define SPREP_DELTA_RANGE_NEGATIVE_LIMIT              -8192
    194 
    195 
    196 extern void
    197 init() {
    198 
    199     sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
    200     uprv_memset(sprepTrie, 0, sizeof(UNewTrie));
    201 
    202     /* initialize the two tries */
    203     if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
    204         fprintf(stderr, "error: failed to initialize tries\n");
    205         exit(U_MEMORY_ALLOCATION_ERROR);
    206     }
    207 }
    208 
    209 static UHashtable* hashTable = NULL;
    210 
    211 
    212 typedef struct ValueStruct {
    213     UChar* mapping;
    214     int16_t length;
    215     UStringPrepType type;
    216 } ValueStruct;
    217 
    218 /* Callback for deleting the value from the hashtable */
    219 static void U_CALLCONV valueDeleter(void* obj){
    220     ValueStruct* value = (ValueStruct*) obj;
    221     uprv_free(value->mapping);
    222     uprv_free(value);
    223 }
    224 
    225 /* Callback for hashing the entry */
    226 static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
    227     return  parm.integer;
    228 }
    229 
    230 /* Callback for comparing two entries */
    231 static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
    232     return (UBool)(p1.integer != p2.integer);
    233 }
    234 
    235 
    236 static void
    237 storeMappingData(){
    238 
    239     int32_t pos = -1;
    240     const UHashElement* element = NULL;
    241     ValueStruct* value  = NULL;
    242     int32_t codepoint = 0;
    243     int32_t elementCount = 0;
    244     int32_t writtenElementCount = 0;
    245     int32_t mappingLength = 1; /* minimum mapping length */
    246     int32_t oldMappingLength = 0;
    247     uint16_t trieWord =0;
    248     int32_t limitIndex = 0;
    249 
    250     if (hashTable == NULL) {
    251         return;
    252     }
    253     elementCount = uhash_count(hashTable);
    254 
    255 	/*initialize the mapping data */
    256     mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity));
    257 
    258     uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity);
    259 
    260     while(writtenElementCount < elementCount){
    261 
    262         while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
    263 
    264             codepoint = element->key.integer;
    265             value = (ValueStruct*)element->value.pointer;
    266 
    267             /* store the start of indexes */
    268             if(oldMappingLength != mappingLength){
    269                 /* Assume that index[] is used according to the enums defined */
    270                 if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
    271                     indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
    272                 }
    273                 if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
    274                    mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
    275 
    276                     limitIndex = currentIndex;
    277 
    278                 }
    279                 oldMappingLength = mappingLength;
    280             }
    281 
    282             if(value->length == mappingLength){
    283                 uint32_t savedTrieWord = 0;
    284                 trieWord = currentIndex << 2;
    285                 /* turn on the 2nd bit to signal that the following bits contain an index */
    286                 trieWord += 0x02;
    287 
    288                 if(trieWord > _SPREP_TYPE_THRESHOLD){
    289                     fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
    290                     exit(U_ILLEGAL_CHAR_FOUND);
    291                 }
    292                 /* figure out if the code point has type already stored */
    293                 savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
    294                 if(savedTrieWord!=0){
    295                     if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
    296                         /* turn on the first bit in trie word */
    297                         trieWord += 0x01;
    298                     }else{
    299                         /*
    300                          * the codepoint has value something other than prohibited
    301                          * and a mapping .. error!
    302                          */
    303                         fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
    304                         exit(U_ILLEGAL_ARGUMENT_ERROR);
    305                     }
    306                 }
    307 
    308                 /* now set the value in the trie */
    309                 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
    310                     fprintf(stderr,"Could not set the value for code point.\n");
    311                     exit(U_ILLEGAL_ARGUMENT_ERROR);
    312                 }
    313 
    314                 /* written the trie word for the codepoint... increment the count*/
    315                 writtenElementCount++;
    316 
    317                 /* sanity check are we exceeding the max number allowed */
    318                 if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
    319                     fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
    320                     exit(U_INDEX_OUTOFBOUNDS_ERROR);
    321                 }
    322 
    323                 /* copy the mapping data */
    324                 if(currentIndex+value->length+1 <= mappingDataCapacity){
    325                     /* write the length */
    326                     if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
    327                          /* the cast here is safe since we donot expect the length to be > 65535 */
    328                          mappingData[currentIndex++] = (uint16_t) mappingLength;
    329                     }
    330                     /* copy the contents to mappindData array */
    331                     uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
    332                     currentIndex += value->length;
    333 
    334                 }else{
    335                     /* realloc */
    336                     UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
    337                     if(newMappingData == NULL){
    338                         fprintf(stderr, "Could not realloc the mapping data!\n");
    339                         exit(U_MEMORY_ALLOCATION_ERROR);
    340                     }
    341                     uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
    342                     mappingDataCapacity *= 2;
    343                     uprv_free(mappingData);
    344                     mappingData = newMappingData;
    345                     /* write the length */
    346                     if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
    347                          /* the cast here is safe since we donot expect the length to be > 65535 */
    348                          mappingData[currentIndex++] = (uint16_t) mappingLength;
    349                     }
    350                     /* continue copying */
    351                     uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
    352                     currentIndex += value->length;
    353                 }
    354 
    355             }
    356         }
    357         mappingLength++;
    358         pos = -1;
    359     }
    360     /* set the last length for range check */
    361     if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
    362         indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
    363     }else{
    364         indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
    365     }
    366 
    367 }
    368 
    369 extern void setOptions(int32_t options){
    370     indexes[_SPREP_OPTIONS] = options;
    371 }
    372 extern void
    373 storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
    374              UStringPrepType type, UErrorCode* status){
    375 
    376 
    377     UChar* map = NULL;
    378     int16_t adjustedLen=0, i;
    379     uint16_t trieWord = 0;
    380     ValueStruct *value = NULL;
    381     uint32_t savedTrieWord = 0;
    382 
    383     /* initialize the hashtable */
    384     if(hashTable==NULL){
    385         hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
    386         uhash_setValueDeleter(hashTable, valueDeleter);
    387     }
    388 
    389     /* figure out if the code point has type already stored */
    390     savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
    391     if(savedTrieWord!=0){
    392         if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
    393             /* turn on the first bit in trie word */
    394             trieWord += 0x01;
    395         }else{
    396             /*
    397              * the codepoint has value something other than prohibited
    398              * and a mapping .. error!
    399              */
    400             fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
    401             exit(U_ILLEGAL_ARGUMENT_ERROR);
    402         }
    403     }
    404 
    405     /* figure out the real length */
    406     for(i=0; i<length; i++){
    407         if(mapping[i] > 0xFFFF){
    408             adjustedLen +=2;
    409         }else{
    410             adjustedLen++;
    411         }
    412     }
    413 
    414     if(adjustedLen == 0){
    415         trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
    416         /* make sure that the value of trieWord is less than the threshold */
    417         if(trieWord < _SPREP_TYPE_THRESHOLD){
    418             /* now set the value in the trie */
    419             if(!utrie_set32(sprepTrie,codepoint,trieWord)){
    420                 fprintf(stderr,"Could not set the value for code point.\n");
    421                 exit(U_ILLEGAL_ARGUMENT_ERROR);
    422             }
    423             /* value is set so just return */
    424             return;
    425         }else{
    426             fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
    427             exit(U_ILLEGAL_CHAR_FOUND);
    428         }
    429     }
    430 
    431     if(adjustedLen == 1){
    432         /* calculate the delta */
    433         int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
    434         if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
    435 
    436             trieWord = delta << 2;
    437 
    438 
    439             /* make sure that the second bit is OFF */
    440             if((trieWord & 0x02) != 0 ){
    441                 fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
    442                 exit(U_INTERNAL_PROGRAM_ERROR);
    443             }
    444             /* make sure that the value of trieWord is less than the threshold */
    445             if(trieWord < _SPREP_TYPE_THRESHOLD){
    446                 /* now set the value in the trie */
    447                 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
    448                     fprintf(stderr,"Could not set the value for code point.\n");
    449                     exit(U_ILLEGAL_ARGUMENT_ERROR);
    450                 }
    451                 /* value is set so just return */
    452                 return;
    453             }
    454         }
    455         /*
    456          * if the delta is not in the given range or if the trieWord is larger than the threshold
    457          * just fall through for storing the mapping in the mapping table
    458          */
    459     }
    460 
    461     map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1));
    462     uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1));
    463 
    464     i=0;
    465 
    466     while(i<length){
    467         if(mapping[i] <= 0xFFFF){
    468             map[i] = (uint16_t)mapping[i];
    469         }else{
    470             map[i]   = UTF16_LEAD(mapping[i]);
    471             map[i+1] = UTF16_TRAIL(mapping[i]);
    472         }
    473         i++;
    474     }
    475 
    476     value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
    477     value->mapping = map;
    478     value->type   = type;
    479     value->length  = adjustedLen;
    480     if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
    481         mappingDataCapacity++;
    482     }
    483     if(maxLength < value->length){
    484         maxLength = value->length;
    485     }
    486     uhash_iput(hashTable,codepoint,value,status);
    487     mappingDataCapacity += adjustedLen;
    488 
    489     if(U_FAILURE(*status)){
    490         fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
    491         exit(*status);
    492     }
    493 }
    494 
    495 
    496 extern void
    497 storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
    498     uint16_t trieWord = 0;
    499 
    500     if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){
    501         fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
    502         exit(U_ILLEGAL_CHAR_FOUND);
    503     }
    504     trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
    505     if(start == end){
    506         uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
    507         if(savedTrieWord>0){
    508             if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
    509                 /*
    510                  * A mapping is stored in the trie word
    511                  * and the only other possible type that a
    512                  * code point can have is USPREP_PROHIBITED
    513                  *
    514                  */
    515 
    516                 /* turn on the 0th bit in the savedTrieWord */
    517                 savedTrieWord += 0x01;
    518 
    519                 /* the downcast is safe since we only save 16 bit values */
    520                 trieWord = (uint16_t)savedTrieWord;
    521 
    522                 /* make sure that the value of trieWord is less than the threshold */
    523                 if(trieWord < _SPREP_TYPE_THRESHOLD){
    524                     /* now set the value in the trie */
    525                     if(!utrie_set32(sprepTrie,start,trieWord)){
    526                         fprintf(stderr,"Could not set the value for code point.\n");
    527                         exit(U_ILLEGAL_ARGUMENT_ERROR);
    528                     }
    529                     /* value is set so just return */
    530                     return;
    531                 }else{
    532                     fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
    533                     exit(U_ILLEGAL_CHAR_FOUND);
    534                 }
    535 
    536             }else if(savedTrieWord != trieWord){
    537                 fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
    538                 exit(U_ILLEGAL_ARGUMENT_ERROR);
    539             }
    540             /* if savedTrieWord == trieWord .. fall through and set the value */
    541         }
    542         if(!utrie_set32(sprepTrie,start,trieWord)){
    543             fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
    544             exit(U_ILLEGAL_ARGUMENT_ERROR);
    545         }
    546     }else{
    547         if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
    548             fprintf(stderr,"Value for certain codepoint already set.\n");
    549             exit(U_ILLEGAL_CHAR_FOUND);
    550         }
    551     }
    552 
    553 }
    554 
    555 /* folding value: just store the offset (16 bits) if there is any non-0 entry */
    556 static uint32_t U_CALLCONV
    557 getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
    558     uint32_t foldedValue, value;
    559     UChar32 limit=0;
    560     UBool inBlockZero;
    561 
    562     foldedValue=0;
    563 
    564     limit=start+0x400;
    565     while(start<limit) {
    566         value=utrie_get32(trie, start, &inBlockZero);
    567         if(inBlockZero) {
    568             start+=UTRIE_DATA_BLOCK_LENGTH;
    569         } else if(value!=0) {
    570             return (uint32_t)offset;
    571         } else {
    572             ++start;
    573         }
    574     }
    575     return 0;
    576 
    577 }
    578 
    579 #endif /* #if !UCONFIG_NO_IDNA */
    580 
    581 extern void
    582 generateData(const char *dataDir, const char* bundleName) {
    583     static uint8_t sprepTrieBlock[100000];
    584 
    585     UNewDataMemory *pData;
    586     UErrorCode errorCode=U_ZERO_ERROR;
    587     int32_t size, dataLength;
    588     char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
    589 
    590 #if UCONFIG_NO_IDNA
    591 
    592     size=0;
    593 
    594 #else
    595 
    596     int32_t sprepTrieSize;
    597 
    598     /* sort and add mapping data */
    599     storeMappingData();
    600 
    601     sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
    602     if(U_FAILURE(errorCode)) {
    603         fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
    604         exit(errorCode);
    605     }
    606 
    607     size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
    608     if(beVerbose) {
    609         printf("size of sprep trie              %5u bytes\n", (int)sprepTrieSize);
    610         printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
    611         printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
    612         printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
    613         printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
    614     }
    615 
    616 #endif
    617 
    618     fileName[0]=0;
    619     uprv_strcat(fileName,bundleName);
    620     /* write the data */
    621     pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
    622                        haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
    623     if(U_FAILURE(errorCode)) {
    624         fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
    625         exit(errorCode);
    626     }
    627 
    628 #if !UCONFIG_NO_IDNA
    629 
    630     indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
    631     indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
    632 
    633     udata_writeBlock(pData, indexes, sizeof(indexes));
    634     udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
    635     udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
    636 
    637 
    638 #endif
    639 
    640     /* finish up */
    641     dataLength=udata_finish(pData, &errorCode);
    642     if(U_FAILURE(errorCode)) {
    643         fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
    644         exit(errorCode);
    645     }
    646 
    647     if(dataLength!=size) {
    648         fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
    649             (long)dataLength, (long)size);
    650         exit(U_INTERNAL_PROGRAM_ERROR);
    651     }
    652 
    653 #if !UCONFIG_NO_IDNA
    654     /* done with writing the data .. close the hashtable */
    655     if (hashTable != NULL) {
    656         uhash_close(hashTable);
    657     }
    658 #endif
    659 }
    660 
    661 #if !UCONFIG_NO_IDNA
    662 
    663 extern void
    664 cleanUpData(void) {
    665 
    666     utrie_close(sprepTrie);
    667     uprv_free(sprepTrie);
    668 }
    669 
    670 #endif /* #if !UCONFIG_NO_IDNA */
    671 
    672 /*
    673  * Hey, Emacs, please set the following:
    674  *
    675  * Local Variables:
    676  * indent-tabs-mode: nil
    677  * End:
    678  *
    679  */
    680