Home | History | Annotate | Download | only in common
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2009, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  usprep.cpp
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003jul2
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_IDNA
     20 
     21 #include "unicode/usprep.h"
     22 
     23 #include "unicode/unorm.h"
     24 #include "unicode/ustring.h"
     25 #include "unicode/uchar.h"
     26 #include "unicode/uversion.h"
     27 #include "umutex.h"
     28 #include "cmemory.h"
     29 #include "sprpimpl.h"
     30 #include "ustr_imp.h"
     31 #include "uhash.h"
     32 #include "cstring.h"
     33 #include "udataswp.h"
     34 #include "ucln_cmn.h"
     35 #include "unormimp.h"
     36 #include "ubidi_props.h"
     37 
     38 U_CDECL_BEGIN
     39 
     40 /*
     41 Static cache for already opened StringPrep profiles
     42 */
     43 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
     44 
     45 static UMTX usprepMutex = NULL;
     46 
     47 /* format version of spp file */
     48 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     49 
     50 /* the Unicode version of the sprep data */
     51 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     52 
     53 /* Profile names must be aligned to UStringPrepProfileType */
     54 static const char *PROFILE_NAMES[] = {
     55     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
     56     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
     57     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
     58     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
     59     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
     60     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
     61     "rfc3722",      /* USPREP_RFC3722_ISCSI */
     62     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
     63     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
     64     "rfc4011",      /* USPREP_RFC4011_MIB */
     65     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
     66     "rfc4505",      /* USPREP_RFC4505_TRACE */
     67     "rfc4518",      /* USPREP_RFC4518_LDAP */
     68     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
     69 };
     70 
     71 static UBool U_CALLCONV
     72 isSPrepAcceptable(void * /* context */,
     73              const char * /* type */,
     74              const char * /* name */,
     75              const UDataInfo *pInfo) {
     76     if(
     77         pInfo->size>=20 &&
     78         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     79         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     80         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
     81         pInfo->dataFormat[1]==0x50 &&
     82         pInfo->dataFormat[2]==0x52 &&
     83         pInfo->dataFormat[3]==0x50 &&
     84         pInfo->formatVersion[0]==3 &&
     85         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     86         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     87     ) {
     88         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     89         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     90         return TRUE;
     91     } else {
     92         return FALSE;
     93     }
     94 }
     95 
     96 static int32_t U_CALLCONV
     97 getSPrepFoldingOffset(uint32_t data) {
     98 
     99     return (int32_t)data;
    100 
    101 }
    102 
    103 /* hashes an entry  */
    104 static int32_t U_CALLCONV
    105 hashEntry(const UHashTok parm) {
    106     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
    107     UHashTok namekey, pathkey;
    108     namekey.pointer = b->name;
    109     pathkey.pointer = b->path;
    110     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
    111 }
    112 
    113 /* compares two entries */
    114 static UBool U_CALLCONV
    115 compareEntries(const UHashTok p1, const UHashTok p2) {
    116     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
    117     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
    118     UHashTok name1, name2, path1, path2;
    119     name1.pointer = b1->name;
    120     name2.pointer = b2->name;
    121     path1.pointer = b1->path;
    122     path2.pointer = b2->path;
    123     return ((UBool)(uhash_compareChars(name1, name2) &
    124         uhash_compareChars(path1, path2)));
    125 }
    126 
    127 static void
    128 usprep_unload(UStringPrepProfile* data){
    129     udata_close(data->sprepData);
    130 }
    131 
    132 static int32_t
    133 usprep_internal_flushCache(UBool noRefCount){
    134     UStringPrepProfile *profile = NULL;
    135     UStringPrepKey  *key  = NULL;
    136     int32_t pos = -1;
    137     int32_t deletedNum = 0;
    138     const UHashElement *e;
    139 
    140     /*
    141      * if shared data hasn't even been lazy evaluated yet
    142      * return 0
    143      */
    144     umtx_lock(&usprepMutex);
    145     if (SHARED_DATA_HASHTABLE == NULL) {
    146         umtx_unlock(&usprepMutex);
    147         return 0;
    148     }
    149 
    150     /*creates an enumeration to iterate through every element in the table */
    151     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
    152     {
    153         profile = (UStringPrepProfile *) e->value.pointer;
    154         key  = (UStringPrepKey *) e->key.pointer;
    155 
    156         if ((noRefCount== FALSE && profile->refCount == 0) ||
    157              noRefCount== TRUE) {
    158             deletedNum++;
    159             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
    160 
    161             /* unload the data */
    162             usprep_unload(profile);
    163 
    164             if(key->name != NULL) {
    165                 uprv_free(key->name);
    166                 key->name=NULL;
    167             }
    168             if(key->path != NULL) {
    169                 uprv_free(key->path);
    170                 key->path=NULL;
    171             }
    172             uprv_free(profile);
    173             uprv_free(key);
    174         }
    175 
    176     }
    177     umtx_unlock(&usprepMutex);
    178 
    179     return deletedNum;
    180 }
    181 
    182 /* Works just like ucnv_flushCache()
    183 static int32_t
    184 usprep_flushCache(){
    185     return usprep_internal_flushCache(FALSE);
    186 }
    187 */
    188 
    189 static UBool U_CALLCONV usprep_cleanup(void){
    190     if (SHARED_DATA_HASHTABLE != NULL) {
    191         usprep_internal_flushCache(TRUE);
    192         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
    193             uhash_close(SHARED_DATA_HASHTABLE);
    194             SHARED_DATA_HASHTABLE = NULL;
    195         }
    196     }
    197 
    198     umtx_destroy(&usprepMutex);             /* Don't worry about destroying the mutex even  */
    199                                             /*  if the hash table still exists.  The mutex  */
    200                                             /*  will lazily re-init  itself if needed.      */
    201     return (SHARED_DATA_HASHTABLE == NULL);
    202 }
    203 U_CDECL_END
    204 
    205 static void
    206 usprep_init() {
    207     umtx_init(&usprepMutex);
    208 }
    209 
    210 /** Initializes the cache for resources */
    211 static void
    212 initCache(UErrorCode *status) {
    213     UBool makeCache;
    214     UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE ==  NULL), makeCache);
    215     if(makeCache) {
    216         UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
    217         if (U_SUCCESS(*status)) {
    218             umtx_lock(&usprepMutex);
    219             if(SHARED_DATA_HASHTABLE == NULL) {
    220                 SHARED_DATA_HASHTABLE = newCache;
    221                 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
    222                 newCache = NULL;
    223             }
    224             umtx_unlock(&usprepMutex);
    225         }
    226         if(newCache != NULL) {
    227             uhash_close(newCache);
    228         }
    229     }
    230 }
    231 
    232 static UBool U_CALLCONV
    233 loadData(UStringPrepProfile* profile,
    234          const char* path,
    235          const char* name,
    236          const char* type,
    237          UErrorCode* errorCode) {
    238     /* load Unicode SPREP data from file */
    239     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    240     UDataMemory *dataMemory;
    241     const int32_t *p=NULL;
    242     const uint8_t *pb;
    243     UVersionInfo normUnicodeVersion;
    244     int32_t normUniVer, sprepUniVer, normCorrVer;
    245 
    246     if(errorCode==NULL || U_FAILURE(*errorCode)) {
    247         return 0;
    248     }
    249 
    250     /* open the data outside the mutex block */
    251     //TODO: change the path
    252     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    253     if(U_FAILURE(*errorCode)) {
    254         return FALSE;
    255     }
    256 
    257     p=(const int32_t *)udata_getMemory(dataMemory);
    258     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    259     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    260     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
    261 
    262 
    263     if(U_FAILURE(*errorCode)) {
    264         udata_close(dataMemory);
    265         return FALSE;
    266     }
    267 
    268     /* in the mutex block, set the data for this process */
    269     umtx_lock(&usprepMutex);
    270     if(profile->sprepData==NULL) {
    271         profile->sprepData=dataMemory;
    272         dataMemory=NULL;
    273         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
    274         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    275     } else {
    276         p=(const int32_t *)udata_getMemory(profile->sprepData);
    277     }
    278     umtx_unlock(&usprepMutex);
    279     /* initialize some variables */
    280     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    281 
    282     unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
    283     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
    284                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    285     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
    286                   (dataVersion[2] << 8 ) + (dataVersion[3]);
    287     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    288 
    289     if(U_FAILURE(*errorCode)){
    290         udata_close(dataMemory);
    291         return FALSE;
    292     }
    293     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
    294         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
    295         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
    296       ){
    297         *errorCode = U_INVALID_FORMAT_ERROR;
    298         udata_close(dataMemory);
    299         return FALSE;
    300     }
    301     profile->isDataLoaded = TRUE;
    302 
    303     /* if a different thread set it first, then close the extra data */
    304     if(dataMemory!=NULL) {
    305         udata_close(dataMemory); /* NULL if it was set correctly */
    306     }
    307 
    308 
    309     return profile->isDataLoaded;
    310 }
    311 
    312 static UStringPrepProfile*
    313 usprep_getProfile(const char* path,
    314                   const char* name,
    315                   UErrorCode *status){
    316 
    317     UStringPrepProfile* profile = NULL;
    318 
    319     initCache(status);
    320 
    321     if(U_FAILURE(*status)){
    322         return NULL;
    323     }
    324 
    325     UStringPrepKey stackKey;
    326     /*
    327      * const is cast way to save malloc, strcpy and free calls
    328      * we use the passed in pointers for fetching the data from the
    329      * hash table which is safe
    330      */
    331     stackKey.name = (char*) name;
    332     stackKey.path = (char*) path;
    333 
    334     /* fetch the data from the cache */
    335     umtx_lock(&usprepMutex);
    336     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    337     umtx_unlock(&usprepMutex);
    338 
    339     if(profile == NULL){
    340         UStringPrepKey* key   = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
    341         if(key == NULL){
    342             *status = U_MEMORY_ALLOCATION_ERROR;
    343             return NULL;
    344         }
    345         /* else load the data and put the data in the cache */
    346         profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
    347         if(profile == NULL){
    348             *status = U_MEMORY_ALLOCATION_ERROR;
    349             uprv_free(key);
    350             return NULL;
    351         }
    352 
    353         /* initialize the data struct members */
    354         uprv_memset(profile->indexes,0,sizeof(profile->indexes));
    355         profile->mappingData = NULL;
    356         profile->sprepData   = NULL;
    357         profile->refCount    = 0;
    358 
    359         /* initialize the  key memebers */
    360         key->name  = (char*) uprv_malloc(uprv_strlen(name)+1);
    361         if(key->name == NULL){
    362             *status = U_MEMORY_ALLOCATION_ERROR;
    363             uprv_free(key);
    364             uprv_free(profile);
    365             return NULL;
    366         }
    367 
    368         uprv_strcpy(key->name, name);
    369 
    370         key->path=NULL;
    371 
    372         if(path != NULL){
    373             key->path      = (char*) uprv_malloc(uprv_strlen(path)+1);
    374             if(key->path == NULL){
    375                 *status = U_MEMORY_ALLOCATION_ERROR;
    376                 uprv_free(key->name);
    377                 uprv_free(key);
    378                 uprv_free(profile);
    379                 return NULL;
    380             }
    381             uprv_strcpy(key->path, path);
    382         }
    383 
    384         /* load the data */
    385         if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
    386             uprv_free(key->path);
    387             uprv_free(key->name);
    388             uprv_free(key);
    389             uprv_free(profile);
    390             return NULL;
    391         }
    392 
    393         /* get the options */
    394         profile->doNFKC            = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
    395         profile->checkBiDi         = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
    396 
    397         if(profile->checkBiDi) {
    398             profile->bdp = ubidi_getSingleton(status);
    399             if(U_FAILURE(*status)) {
    400                 usprep_unload(profile);
    401                 uprv_free(key->path);
    402                 uprv_free(key->name);
    403                 uprv_free(key);
    404                 uprv_free(profile);
    405                 return NULL;
    406             }
    407         } else {
    408             profile->bdp = NULL;
    409         }
    410 
    411         umtx_lock(&usprepMutex);
    412         /* add the data object to the cache */
    413         uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
    414         umtx_unlock(&usprepMutex);
    415     }
    416     umtx_lock(&usprepMutex);
    417     /* increment the refcount */
    418     profile->refCount++;
    419     umtx_unlock(&usprepMutex);
    420 
    421     return profile;
    422 }
    423 
    424 U_CAPI UStringPrepProfile* U_EXPORT2
    425 usprep_open(const char* path,
    426             const char* name,
    427             UErrorCode* status){
    428 
    429     if(status == NULL || U_FAILURE(*status)){
    430         return NULL;
    431     }
    432     /* initialize the mutex */
    433     usprep_init();
    434 
    435     /* initialize the profile struct members */
    436     return usprep_getProfile(path,name,status);
    437 }
    438 
    439 U_CAPI UStringPrepProfile* U_EXPORT2
    440 usprep_openByType(UStringPrepProfileType type,
    441 				  UErrorCode* status) {
    442     if(status == NULL || U_FAILURE(*status)){
    443         return NULL;
    444     }
    445     int32_t index = (int32_t)type;
    446     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
    447         *status = U_ILLEGAL_ARGUMENT_ERROR;
    448         return NULL;
    449     }
    450     return usprep_open(NULL, PROFILE_NAMES[index], status);
    451 }
    452 
    453 U_CAPI void U_EXPORT2
    454 usprep_close(UStringPrepProfile* profile){
    455     if(profile==NULL){
    456         return;
    457     }
    458 
    459     umtx_lock(&usprepMutex);
    460     /* decrement the ref count*/
    461     if(profile->refCount > 0){
    462         profile->refCount--;
    463     }
    464     umtx_unlock(&usprepMutex);
    465 
    466 }
    467 
    468 U_CFUNC void
    469 uprv_syntaxError(const UChar* rules,
    470                  int32_t pos,
    471                  int32_t rulesLen,
    472                  UParseError* parseError){
    473     if(parseError == NULL){
    474         return;
    475     }
    476     parseError->offset = pos;
    477     parseError->line = 0 ; // we are not using line numbers
    478 
    479     // for pre-context
    480     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    481     int32_t limit = pos;
    482 
    483     u_memcpy(parseError->preContext,rules+start,limit-start);
    484     //null terminate the buffer
    485     parseError->preContext[limit-start] = 0;
    486 
    487     // for post-context; include error rules[pos]
    488     start = pos;
    489     limit = start + (U_PARSE_CONTEXT_LEN-1);
    490     if (limit > rulesLen) {
    491         limit = rulesLen;
    492     }
    493     if (start < rulesLen) {
    494         u_memcpy(parseError->postContext,rules+start,limit-start);
    495     }
    496     //null terminate the buffer
    497     parseError->postContext[limit-start]= 0;
    498 }
    499 
    500 
    501 static inline UStringPrepType
    502 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
    503 
    504     UStringPrepType type;
    505     if(trieWord == 0){
    506         /*
    507          * Initial value stored in the mapping table
    508          * just return USPREP_TYPE_LIMIT .. so that
    509          * the source codepoint is copied to the destination
    510          */
    511         type = USPREP_TYPE_LIMIT;
    512         isIndex =FALSE;
    513         value = 0;
    514     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
    515         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
    516         isIndex =FALSE;
    517         value = 0;
    518     }else{
    519         /* get the type */
    520         type = USPREP_MAP;
    521         /* ascertain if the value is index or delta */
    522         if(trieWord & 0x02){
    523             isIndex = TRUE;
    524             value = trieWord  >> 2; //mask off the lower 2 bits and shift
    525         }else{
    526             isIndex = FALSE;
    527             value = (int16_t)trieWord;
    528             value =  (value >> 2);
    529         }
    530 
    531         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
    532             type = USPREP_DELETE;
    533             isIndex =FALSE;
    534             value = 0;
    535         }
    536     }
    537     return type;
    538 }
    539 
    540 
    541 
    542 static int32_t
    543 usprep_map(  const UStringPrepProfile* profile,
    544              const UChar* src, int32_t srcLength,
    545              UChar* dest, int32_t destCapacity,
    546              int32_t options,
    547              UParseError* parseError,
    548              UErrorCode* status ){
    549 
    550     uint16_t result;
    551     int32_t destIndex=0;
    552     int32_t srcIndex;
    553     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
    554     UStringPrepType type;
    555     int16_t value;
    556     UBool isIndex;
    557     const int32_t* indexes = profile->indexes;
    558 
    559     // no error checking the caller check for error and arguments
    560     // no string length check the caller finds out the string length
    561 
    562     for(srcIndex=0;srcIndex<srcLength;){
    563         UChar32 ch;
    564 
    565         U16_NEXT(src,srcIndex,srcLength,ch);
    566 
    567         result=0;
    568 
    569         UTRIE_GET16(&profile->sprepTrie,ch,result);
    570 
    571         type = getValues(result, value, isIndex);
    572 
    573         // check if the source codepoint is unassigned
    574         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
    575 
    576             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
    577             *status = U_STRINGPREP_UNASSIGNED_ERROR;
    578             return 0;
    579 
    580         }else if(type == USPREP_MAP){
    581 
    582             int32_t index, length;
    583 
    584             if(isIndex){
    585                 index = value;
    586                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    587                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    588                     length = 1;
    589                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    590                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    591                     length = 2;
    592                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    593                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    594                     length = 3;
    595                 }else{
    596                     length = profile->mappingData[index++];
    597 
    598                 }
    599 
    600                 /* copy mapping to destination */
    601                 for(int32_t i=0; i< length; i++){
    602                     if(destIndex < destCapacity  ){
    603                         dest[destIndex] = profile->mappingData[index+i];
    604                     }
    605                     destIndex++; /* for pre-flighting */
    606                 }
    607                 continue;
    608             }else{
    609                 // subtract the delta to arrive at the code point
    610                 ch -= value;
    611             }
    612 
    613         }else if(type==USPREP_DELETE){
    614              // just consume the codepoint and contine
    615             continue;
    616         }
    617         //copy the code point into destination
    618         if(ch <= 0xFFFF){
    619             if(destIndex < destCapacity ){
    620                 dest[destIndex] = (UChar)ch;
    621             }
    622             destIndex++;
    623         }else{
    624             if(destIndex+1 < destCapacity ){
    625                 dest[destIndex]   = U16_LEAD(ch);
    626                 dest[destIndex+1] = U16_TRAIL(ch);
    627             }
    628             destIndex +=2;
    629         }
    630 
    631     }
    632 
    633     return u_terminateUChars(dest, destCapacity, destIndex, status);
    634 }
    635 
    636 
    637 static int32_t
    638 usprep_normalize(   const UChar* src, int32_t srcLength,
    639                     UChar* dest, int32_t destCapacity,
    640                     UErrorCode* status ){
    641     /*
    642      * Option UNORM_BEFORE_PRI_29:
    643      *
    644      * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
    645      * requires strict adherence to Unicode 3.2 normalization,
    646      * including buggy composition from before fixing Public Review Issue #29.
    647      * Note that this results in some valid but nonsensical text to be
    648      * either corrupted or rejected, depending on the text.
    649      * See http://www.unicode.org/review/resolved-pri.html#pri29
    650      * See unorm.cpp and cnormtst.c
    651      */
    652     return unorm_normalize(
    653         src, srcLength,
    654         UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
    655         dest, destCapacity,
    656         status);
    657 }
    658 
    659 
    660  /*
    661    1) Map -- For each character in the input, check if it has a mapping
    662       and, if so, replace it with its mapping.
    663 
    664    2) Normalize -- Possibly normalize the result of step 1 using Unicode
    665       normalization.
    666 
    667    3) Prohibit -- Check for any characters that are not allowed in the
    668       output.  If any are found, return an error.
    669 
    670    4) Check bidi -- Possibly check for right-to-left characters, and if
    671       any are found, make sure that the whole string satisfies the
    672       requirements for bidirectional strings.  If the string does not
    673       satisfy the requirements for bidirectional strings, return an
    674       error.
    675       [Unicode3.2] defines several bidirectional categories; each character
    676        has one bidirectional category assigned to it.  For the purposes of
    677        the requirements below, an "RandALCat character" is a character that
    678        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
    679        is a character that has Unicode bidirectional category "L".  Note
    680 
    681 
    682        that there are many characters which fall in neither of the above
    683        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
    684        this because they have bidirectional category "EN".
    685 
    686        In any profile that specifies bidirectional character handling, all
    687        three of the following requirements MUST be met:
    688 
    689        1) The characters in section 5.8 MUST be prohibited.
    690 
    691        2) If a string contains any RandALCat character, the string MUST NOT
    692           contain any LCat character.
    693 
    694        3) If a string contains any RandALCat character, a RandALCat
    695           character MUST be the first character of the string, and a
    696           RandALCat character MUST be the last character of the string.
    697 */
    698 
    699 #define MAX_STACK_BUFFER_SIZE 300
    700 
    701 
    702 U_CAPI int32_t U_EXPORT2
    703 usprep_prepare(   const UStringPrepProfile* profile,
    704                   const UChar* src, int32_t srcLength,
    705                   UChar* dest, int32_t destCapacity,
    706                   int32_t options,
    707                   UParseError* parseError,
    708                   UErrorCode* status ){
    709 
    710     // check error status
    711     if(status == NULL || U_FAILURE(*status)){
    712         return 0;
    713     }
    714 
    715     //check arguments
    716     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    717         *status=U_ILLEGAL_ARGUMENT_ERROR;
    718         return 0;
    719     }
    720 
    721     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
    722     UChar *b1 = b1Stack, *b2 = b2Stack;
    723     int32_t b1Len, b2Len=0,
    724             b1Capacity = MAX_STACK_BUFFER_SIZE ,
    725             b2Capacity = MAX_STACK_BUFFER_SIZE;
    726     uint16_t result;
    727     int32_t b2Index = 0;
    728     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    729     UBool leftToRight=FALSE, rightToLeft=FALSE;
    730     int32_t rtlPos =-1, ltrPos =-1;
    731 
    732     //get the string length
    733     if(srcLength == -1){
    734         srcLength = u_strlen(src);
    735     }
    736     // map
    737     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
    738 
    739     if(*status == U_BUFFER_OVERFLOW_ERROR){
    740         // redo processing of string
    741         /* we do not have enough room so grow the buffer*/
    742         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
    743         if(b1==NULL){
    744             *status = U_MEMORY_ALLOCATION_ERROR;
    745             goto CLEANUP;
    746         }
    747 
    748         *status = U_ZERO_ERROR; // reset error
    749 
    750         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
    751 
    752     }
    753 
    754     // normalize
    755     if(profile->doNFKC == TRUE){
    756         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
    757 
    758         if(*status == U_BUFFER_OVERFLOW_ERROR){
    759             // redo processing of string
    760             /* we do not have enough room so grow the buffer*/
    761             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
    762             if(b2==NULL){
    763                 *status = U_MEMORY_ALLOCATION_ERROR;
    764                 goto CLEANUP;
    765             }
    766 
    767             *status = U_ZERO_ERROR; // reset error
    768 
    769             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
    770 
    771         }
    772 
    773     }else{
    774         b2 = b1;
    775         b2Len = b1Len;
    776     }
    777 
    778 
    779     if(U_FAILURE(*status)){
    780         goto CLEANUP;
    781     }
    782 
    783     UChar32 ch;
    784     UStringPrepType type;
    785     int16_t value;
    786     UBool isIndex;
    787 
    788     // Prohibit and checkBiDi in one pass
    789     for(b2Index=0; b2Index<b2Len;){
    790 
    791         ch = 0;
    792 
    793         U16_NEXT(b2, b2Index, b2Len, ch);
    794 
    795         UTRIE_GET16(&profile->sprepTrie,ch,result);
    796 
    797         type = getValues(result, value, isIndex);
    798 
    799         if( type == USPREP_PROHIBITED ||
    800             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
    801            ){
    802             *status = U_STRINGPREP_PROHIBITED_ERROR;
    803             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
    804             goto CLEANUP;
    805         }
    806 
    807         if(profile->checkBiDi) {
    808             direction = ubidi_getClass(profile->bdp, ch);
    809             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
    810                 firstCharDir = direction;
    811             }
    812             if(direction == U_LEFT_TO_RIGHT){
    813                 leftToRight = TRUE;
    814                 ltrPos = b2Index-1;
    815             }
    816             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    817                 rightToLeft = TRUE;
    818                 rtlPos = b2Index-1;
    819             }
    820         }
    821     }
    822     if(profile->checkBiDi == TRUE){
    823         // satisfy 2
    824         if( leftToRight == TRUE && rightToLeft == TRUE){
    825             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    826             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
    827             goto CLEANUP;
    828         }
    829 
    830         //satisfy 3
    831         if( rightToLeft == TRUE &&
    832             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    833               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    834            ){
    835             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    836             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
    837             return FALSE;
    838         }
    839     }
    840     if(b2Len>0 && b2Len <= destCapacity){
    841         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
    842     }
    843 
    844 CLEANUP:
    845     if(b1!=b1Stack){
    846         uprv_free(b1);
    847         b1=NULL;
    848     }
    849 
    850     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
    851         uprv_free(b2);
    852         b2=NULL;
    853     }
    854     return u_terminateUChars(dest, destCapacity, b2Len, status);
    855 }
    856 
    857 
    858 /* data swapping ------------------------------------------------------------ */
    859 
    860 U_CAPI int32_t U_EXPORT2
    861 usprep_swap(const UDataSwapper *ds,
    862             const void *inData, int32_t length, void *outData,
    863             UErrorCode *pErrorCode) {
    864     const UDataInfo *pInfo;
    865     int32_t headerSize;
    866 
    867     const uint8_t *inBytes;
    868     uint8_t *outBytes;
    869 
    870     const int32_t *inIndexes;
    871     int32_t indexes[16];
    872 
    873     int32_t i, offset, count, size;
    874 
    875     /* udata_swapDataHeader checks the arguments */
    876     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    877     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    878         return 0;
    879     }
    880 
    881     /* check data format and format version */
    882     pInfo=(const UDataInfo *)((const char *)inData+4);
    883     if(!(
    884         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    885         pInfo->dataFormat[1]==0x50 &&
    886         pInfo->dataFormat[2]==0x52 &&
    887         pInfo->dataFormat[3]==0x50 &&
    888         pInfo->formatVersion[0]==3
    889     )) {
    890         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
    891                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    892                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    893                          pInfo->formatVersion[0]);
    894         *pErrorCode=U_UNSUPPORTED_ERROR;
    895         return 0;
    896     }
    897 
    898     inBytes=(const uint8_t *)inData+headerSize;
    899     outBytes=(uint8_t *)outData+headerSize;
    900 
    901     inIndexes=(const int32_t *)inBytes;
    902 
    903     if(length>=0) {
    904         length-=headerSize;
    905         if(length<16*4) {
    906             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
    907                              length);
    908             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    909             return 0;
    910         }
    911     }
    912 
    913     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
    914     for(i=0; i<16; ++i) {
    915         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    916     }
    917 
    918     /* calculate the total length of the data */
    919     size=
    920         16*4+ /* size of indexes[] */
    921         indexes[_SPREP_INDEX_TRIE_SIZE]+
    922         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    923 
    924     if(length>=0) {
    925         if(length<size) {
    926             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
    927                              length);
    928             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    929             return 0;
    930         }
    931 
    932         /* copy the data for inaccessible bytes */
    933         if(inBytes!=outBytes) {
    934             uprv_memcpy(outBytes, inBytes, size);
    935         }
    936 
    937         offset=0;
    938 
    939         /* swap the int32_t indexes[] */
    940         count=16*4;
    941         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    942         offset+=count;
    943 
    944         /* swap the UTrie */
    945         count=indexes[_SPREP_INDEX_TRIE_SIZE];
    946         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    947         offset+=count;
    948 
    949         /* swap the uint16_t mappingTable[] */
    950         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    951         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    952         offset+=count;
    953     }
    954 
    955     return headerSize+size;
    956 }
    957 
    958 #endif /* #if !UCONFIG_NO_IDNA */
    959