Home | History | Annotate | Download | only in common
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2010, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  usprep.cpp
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003jul2
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_IDNA
     20 
     21 #include "unicode/usprep.h"
     22 
     23 #include "unicode/unorm.h"
     24 #include "unicode/ustring.h"
     25 #include "unicode/uchar.h"
     26 #include "unicode/uversion.h"
     27 #include "umutex.h"
     28 #include "cmemory.h"
     29 #include "sprpimpl.h"
     30 #include "ustr_imp.h"
     31 #include "uhash.h"
     32 #include "cstring.h"
     33 #include "udataswp.h"
     34 #include "ucln_cmn.h"
     35 #include "ubidi_props.h"
     36 
     37 U_NAMESPACE_USE
     38 
     39 U_CDECL_BEGIN
     40 
     41 /*
     42 Static cache for already opened StringPrep profiles
     43 */
     44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
     45 
     46 static UMTX usprepMutex = NULL;
     47 
     48 /* format version of spp file */
     49 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     50 
     51 /* the Unicode version of the sprep data */
     52 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     53 
     54 /* Profile names must be aligned to UStringPrepProfileType */
     55 static const char *PROFILE_NAMES[] = {
     56     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
     57     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
     58     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
     59     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
     60     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
     61     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
     62     "rfc3722",      /* USPREP_RFC3722_ISCSI */
     63     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
     64     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
     65     "rfc4011",      /* USPREP_RFC4011_MIB */
     66     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
     67     "rfc4505",      /* USPREP_RFC4505_TRACE */
     68     "rfc4518",      /* USPREP_RFC4518_LDAP */
     69     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
     70 };
     71 
     72 static UBool U_CALLCONV
     73 isSPrepAcceptable(void * /* context */,
     74              const char * /* type */,
     75              const char * /* name */,
     76              const UDataInfo *pInfo) {
     77     if(
     78         pInfo->size>=20 &&
     79         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     80         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     81         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
     82         pInfo->dataFormat[1]==0x50 &&
     83         pInfo->dataFormat[2]==0x52 &&
     84         pInfo->dataFormat[3]==0x50 &&
     85         pInfo->formatVersion[0]==3 &&
     86         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     87         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     88     ) {
     89         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     90         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     91         return TRUE;
     92     } else {
     93         return FALSE;
     94     }
     95 }
     96 
     97 static int32_t U_CALLCONV
     98 getSPrepFoldingOffset(uint32_t data) {
     99 
    100     return (int32_t)data;
    101 
    102 }
    103 
    104 /* hashes an entry  */
    105 static int32_t U_CALLCONV
    106 hashEntry(const UHashTok parm) {
    107     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
    108     UHashTok namekey, pathkey;
    109     namekey.pointer = b->name;
    110     pathkey.pointer = b->path;
    111     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
    112 }
    113 
    114 /* compares two entries */
    115 static UBool U_CALLCONV
    116 compareEntries(const UHashTok p1, const UHashTok p2) {
    117     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
    118     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
    119     UHashTok name1, name2, path1, path2;
    120     name1.pointer = b1->name;
    121     name2.pointer = b2->name;
    122     path1.pointer = b1->path;
    123     path2.pointer = b2->path;
    124     return ((UBool)(uhash_compareChars(name1, name2) &
    125         uhash_compareChars(path1, path2)));
    126 }
    127 
    128 static void
    129 usprep_unload(UStringPrepProfile* data){
    130     udata_close(data->sprepData);
    131 }
    132 
    133 static int32_t
    134 usprep_internal_flushCache(UBool noRefCount){
    135     UStringPrepProfile *profile = NULL;
    136     UStringPrepKey  *key  = NULL;
    137     int32_t pos = -1;
    138     int32_t deletedNum = 0;
    139     const UHashElement *e;
    140 
    141     /*
    142      * if shared data hasn't even been lazy evaluated yet
    143      * return 0
    144      */
    145     umtx_lock(&usprepMutex);
    146     if (SHARED_DATA_HASHTABLE == NULL) {
    147         umtx_unlock(&usprepMutex);
    148         return 0;
    149     }
    150 
    151     /*creates an enumeration to iterate through every element in the table */
    152     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
    153     {
    154         profile = (UStringPrepProfile *) e->value.pointer;
    155         key  = (UStringPrepKey *) e->key.pointer;
    156 
    157         if ((noRefCount== FALSE && profile->refCount == 0) ||
    158              noRefCount== TRUE) {
    159             deletedNum++;
    160             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
    161 
    162             /* unload the data */
    163             usprep_unload(profile);
    164 
    165             if(key->name != NULL) {
    166                 uprv_free(key->name);
    167                 key->name=NULL;
    168             }
    169             if(key->path != NULL) {
    170                 uprv_free(key->path);
    171                 key->path=NULL;
    172             }
    173             uprv_free(profile);
    174             uprv_free(key);
    175         }
    176 
    177     }
    178     umtx_unlock(&usprepMutex);
    179 
    180     return deletedNum;
    181 }
    182 
    183 /* Works just like ucnv_flushCache()
    184 static int32_t
    185 usprep_flushCache(){
    186     return usprep_internal_flushCache(FALSE);
    187 }
    188 */
    189 
    190 static UBool U_CALLCONV usprep_cleanup(void){
    191     if (SHARED_DATA_HASHTABLE != NULL) {
    192         usprep_internal_flushCache(TRUE);
    193         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
    194             uhash_close(SHARED_DATA_HASHTABLE);
    195             SHARED_DATA_HASHTABLE = NULL;
    196         }
    197     }
    198 
    199     umtx_destroy(&usprepMutex);             /* Don't worry about destroying the mutex even  */
    200                                             /*  if the hash table still exists.  The mutex  */
    201                                             /*  will lazily re-init  itself if needed.      */
    202     return (SHARED_DATA_HASHTABLE == NULL);
    203 }
    204 U_CDECL_END
    205 
    206 
    207 /** Initializes the cache for resources */
    208 static void
    209 initCache(UErrorCode *status) {
    210     UBool makeCache;
    211     UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE ==  NULL), makeCache);
    212     if(makeCache) {
    213         UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
    214         if (U_SUCCESS(*status)) {
    215             umtx_lock(&usprepMutex);
    216             if(SHARED_DATA_HASHTABLE == NULL) {
    217                 SHARED_DATA_HASHTABLE = newCache;
    218                 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
    219                 newCache = NULL;
    220             }
    221             umtx_unlock(&usprepMutex);
    222         }
    223         if(newCache != NULL) {
    224             uhash_close(newCache);
    225         }
    226     }
    227 }
    228 
    229 static UBool U_CALLCONV
    230 loadData(UStringPrepProfile* profile,
    231          const char* path,
    232          const char* name,
    233          const char* type,
    234          UErrorCode* errorCode) {
    235     /* load Unicode SPREP data from file */
    236     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    237     UDataMemory *dataMemory;
    238     const int32_t *p=NULL;
    239     const uint8_t *pb;
    240     UVersionInfo normUnicodeVersion;
    241     int32_t normUniVer, sprepUniVer, normCorrVer;
    242 
    243     if(errorCode==NULL || U_FAILURE(*errorCode)) {
    244         return 0;
    245     }
    246 
    247     /* open the data outside the mutex block */
    248     //TODO: change the path
    249     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    250     if(U_FAILURE(*errorCode)) {
    251         return FALSE;
    252     }
    253 
    254     p=(const int32_t *)udata_getMemory(dataMemory);
    255     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    256     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    257     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
    258 
    259 
    260     if(U_FAILURE(*errorCode)) {
    261         udata_close(dataMemory);
    262         return FALSE;
    263     }
    264 
    265     /* in the mutex block, set the data for this process */
    266     umtx_lock(&usprepMutex);
    267     if(profile->sprepData==NULL) {
    268         profile->sprepData=dataMemory;
    269         dataMemory=NULL;
    270         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
    271         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    272     } else {
    273         p=(const int32_t *)udata_getMemory(profile->sprepData);
    274     }
    275     umtx_unlock(&usprepMutex);
    276     /* initialize some variables */
    277     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    278 
    279     u_getUnicodeVersion(normUnicodeVersion);
    280     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
    281                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    282     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
    283                   (dataVersion[2] << 8 ) + (dataVersion[3]);
    284     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    285 
    286     if(U_FAILURE(*errorCode)){
    287         udata_close(dataMemory);
    288         return FALSE;
    289     }
    290     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
    291         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
    292         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
    293       ){
    294         *errorCode = U_INVALID_FORMAT_ERROR;
    295         udata_close(dataMemory);
    296         return FALSE;
    297     }
    298     profile->isDataLoaded = TRUE;
    299 
    300     /* if a different thread set it first, then close the extra data */
    301     if(dataMemory!=NULL) {
    302         udata_close(dataMemory); /* NULL if it was set correctly */
    303     }
    304 
    305 
    306     return profile->isDataLoaded;
    307 }
    308 
    309 static UStringPrepProfile*
    310 usprep_getProfile(const char* path,
    311                   const char* name,
    312                   UErrorCode *status){
    313 
    314     UStringPrepProfile* profile = NULL;
    315 
    316     initCache(status);
    317 
    318     if(U_FAILURE(*status)){
    319         return NULL;
    320     }
    321 
    322     UStringPrepKey stackKey;
    323     /*
    324      * const is cast way to save malloc, strcpy and free calls
    325      * we use the passed in pointers for fetching the data from the
    326      * hash table which is safe
    327      */
    328     stackKey.name = (char*) name;
    329     stackKey.path = (char*) path;
    330 
    331     /* fetch the data from the cache */
    332     umtx_lock(&usprepMutex);
    333     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    334     if(profile != NULL) {
    335         profile->refCount++;
    336     }
    337     umtx_unlock(&usprepMutex);
    338 
    339     if(profile == NULL) {
    340         /* else load the data and put the data in the cache */
    341         LocalMemory<UStringPrepProfile> newProfile;
    342         if(newProfile.allocateInsteadAndReset() == NULL) {
    343             *status = U_MEMORY_ALLOCATION_ERROR;
    344             return NULL;
    345         }
    346 
    347         /* load the data */
    348         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
    349             return NULL;
    350         }
    351 
    352         /* get the options */
    353         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
    354         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
    355 
    356         if(newProfile->checkBiDi) {
    357             newProfile->bdp = ubidi_getSingleton(status);
    358             if(U_FAILURE(*status)) {
    359                 usprep_unload(newProfile.getAlias());
    360                 return NULL;
    361             }
    362         }
    363 
    364         LocalMemory<UStringPrepKey> key;
    365         LocalMemory<char> keyName;
    366         LocalMemory<char> keyPath;
    367         if( key.allocateInsteadAndReset() == NULL ||
    368             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
    369             (path != NULL &&
    370              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
    371          ) {
    372             *status = U_MEMORY_ALLOCATION_ERROR;
    373             usprep_unload(newProfile.getAlias());
    374             return NULL;
    375         }
    376 
    377         /* initialize the key members */
    378         key->name = keyName.orphan();
    379         uprv_strcpy(key->name, name);
    380         if(path != NULL){
    381             key->path = keyPath.orphan();
    382             uprv_strcpy(key->path, path);
    383         }
    384 
    385         profile = newProfile.orphan();
    386         umtx_lock(&usprepMutex);
    387         /* add the data object to the cache */
    388         profile->refCount = 1;
    389         uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
    390         umtx_unlock(&usprepMutex);
    391     }
    392 
    393     return profile;
    394 }
    395 
    396 U_CAPI UStringPrepProfile* U_EXPORT2
    397 usprep_open(const char* path,
    398             const char* name,
    399             UErrorCode* status){
    400 
    401     if(status == NULL || U_FAILURE(*status)){
    402         return NULL;
    403     }
    404 
    405     /* initialize the profile struct members */
    406     return usprep_getProfile(path,name,status);
    407 }
    408 
    409 U_CAPI UStringPrepProfile* U_EXPORT2
    410 usprep_openByType(UStringPrepProfileType type,
    411 				  UErrorCode* status) {
    412     if(status == NULL || U_FAILURE(*status)){
    413         return NULL;
    414     }
    415     int32_t index = (int32_t)type;
    416     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
    417         *status = U_ILLEGAL_ARGUMENT_ERROR;
    418         return NULL;
    419     }
    420     return usprep_open(NULL, PROFILE_NAMES[index], status);
    421 }
    422 
    423 U_CAPI void U_EXPORT2
    424 usprep_close(UStringPrepProfile* profile){
    425     if(profile==NULL){
    426         return;
    427     }
    428 
    429     umtx_lock(&usprepMutex);
    430     /* decrement the ref count*/
    431     if(profile->refCount > 0){
    432         profile->refCount--;
    433     }
    434     umtx_unlock(&usprepMutex);
    435 
    436 }
    437 
    438 U_CFUNC void
    439 uprv_syntaxError(const UChar* rules,
    440                  int32_t pos,
    441                  int32_t rulesLen,
    442                  UParseError* parseError){
    443     if(parseError == NULL){
    444         return;
    445     }
    446     parseError->offset = pos;
    447     parseError->line = 0 ; // we are not using line numbers
    448 
    449     // for pre-context
    450     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    451     int32_t limit = pos;
    452 
    453     u_memcpy(parseError->preContext,rules+start,limit-start);
    454     //null terminate the buffer
    455     parseError->preContext[limit-start] = 0;
    456 
    457     // for post-context; include error rules[pos]
    458     start = pos;
    459     limit = start + (U_PARSE_CONTEXT_LEN-1);
    460     if (limit > rulesLen) {
    461         limit = rulesLen;
    462     }
    463     if (start < rulesLen) {
    464         u_memcpy(parseError->postContext,rules+start,limit-start);
    465     }
    466     //null terminate the buffer
    467     parseError->postContext[limit-start]= 0;
    468 }
    469 
    470 
    471 static inline UStringPrepType
    472 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
    473 
    474     UStringPrepType type;
    475     if(trieWord == 0){
    476         /*
    477          * Initial value stored in the mapping table
    478          * just return USPREP_TYPE_LIMIT .. so that
    479          * the source codepoint is copied to the destination
    480          */
    481         type = USPREP_TYPE_LIMIT;
    482         isIndex =FALSE;
    483         value = 0;
    484     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
    485         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
    486         isIndex =FALSE;
    487         value = 0;
    488     }else{
    489         /* get the type */
    490         type = USPREP_MAP;
    491         /* ascertain if the value is index or delta */
    492         if(trieWord & 0x02){
    493             isIndex = TRUE;
    494             value = trieWord  >> 2; //mask off the lower 2 bits and shift
    495         }else{
    496             isIndex = FALSE;
    497             value = (int16_t)trieWord;
    498             value =  (value >> 2);
    499         }
    500 
    501         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
    502             type = USPREP_DELETE;
    503             isIndex =FALSE;
    504             value = 0;
    505         }
    506     }
    507     return type;
    508 }
    509 
    510 
    511 
    512 static int32_t
    513 usprep_map(  const UStringPrepProfile* profile,
    514              const UChar* src, int32_t srcLength,
    515              UChar* dest, int32_t destCapacity,
    516              int32_t options,
    517              UParseError* parseError,
    518              UErrorCode* status ){
    519 
    520     uint16_t result;
    521     int32_t destIndex=0;
    522     int32_t srcIndex;
    523     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
    524     UStringPrepType type;
    525     int16_t value;
    526     UBool isIndex;
    527     const int32_t* indexes = profile->indexes;
    528 
    529     // no error checking the caller check for error and arguments
    530     // no string length check the caller finds out the string length
    531 
    532     for(srcIndex=0;srcIndex<srcLength;){
    533         UChar32 ch;
    534 
    535         U16_NEXT(src,srcIndex,srcLength,ch);
    536 
    537         result=0;
    538 
    539         UTRIE_GET16(&profile->sprepTrie,ch,result);
    540 
    541         type = getValues(result, value, isIndex);
    542 
    543         // check if the source codepoint is unassigned
    544         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
    545 
    546             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
    547             *status = U_STRINGPREP_UNASSIGNED_ERROR;
    548             return 0;
    549 
    550         }else if(type == USPREP_MAP){
    551 
    552             int32_t index, length;
    553 
    554             if(isIndex){
    555                 index = value;
    556                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    557                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    558                     length = 1;
    559                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    560                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    561                     length = 2;
    562                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    563                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    564                     length = 3;
    565                 }else{
    566                     length = profile->mappingData[index++];
    567 
    568                 }
    569 
    570                 /* copy mapping to destination */
    571                 for(int32_t i=0; i< length; i++){
    572                     if(destIndex < destCapacity  ){
    573                         dest[destIndex] = profile->mappingData[index+i];
    574                     }
    575                     destIndex++; /* for pre-flighting */
    576                 }
    577                 continue;
    578             }else{
    579                 // subtract the delta to arrive at the code point
    580                 ch -= value;
    581             }
    582 
    583         }else if(type==USPREP_DELETE){
    584              // just consume the codepoint and contine
    585             continue;
    586         }
    587         //copy the code point into destination
    588         if(ch <= 0xFFFF){
    589             if(destIndex < destCapacity ){
    590                 dest[destIndex] = (UChar)ch;
    591             }
    592             destIndex++;
    593         }else{
    594             if(destIndex+1 < destCapacity ){
    595                 dest[destIndex]   = U16_LEAD(ch);
    596                 dest[destIndex+1] = U16_TRAIL(ch);
    597             }
    598             destIndex +=2;
    599         }
    600 
    601     }
    602 
    603     return u_terminateUChars(dest, destCapacity, destIndex, status);
    604 }
    605 
    606 
    607 static int32_t
    608 usprep_normalize(   const UChar* src, int32_t srcLength,
    609                     UChar* dest, int32_t destCapacity,
    610                     UErrorCode* status ){
    611     return unorm_normalize(
    612         src, srcLength,
    613         UNORM_NFKC, UNORM_UNICODE_3_2,
    614         dest, destCapacity,
    615         status);
    616 }
    617 
    618 
    619  /*
    620    1) Map -- For each character in the input, check if it has a mapping
    621       and, if so, replace it with its mapping.
    622 
    623    2) Normalize -- Possibly normalize the result of step 1 using Unicode
    624       normalization.
    625 
    626    3) Prohibit -- Check for any characters that are not allowed in the
    627       output.  If any are found, return an error.
    628 
    629    4) Check bidi -- Possibly check for right-to-left characters, and if
    630       any are found, make sure that the whole string satisfies the
    631       requirements for bidirectional strings.  If the string does not
    632       satisfy the requirements for bidirectional strings, return an
    633       error.
    634       [Unicode3.2] defines several bidirectional categories; each character
    635        has one bidirectional category assigned to it.  For the purposes of
    636        the requirements below, an "RandALCat character" is a character that
    637        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
    638        is a character that has Unicode bidirectional category "L".  Note
    639 
    640 
    641        that there are many characters which fall in neither of the above
    642        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
    643        this because they have bidirectional category "EN".
    644 
    645        In any profile that specifies bidirectional character handling, all
    646        three of the following requirements MUST be met:
    647 
    648        1) The characters in section 5.8 MUST be prohibited.
    649 
    650        2) If a string contains any RandALCat character, the string MUST NOT
    651           contain any LCat character.
    652 
    653        3) If a string contains any RandALCat character, a RandALCat
    654           character MUST be the first character of the string, and a
    655           RandALCat character MUST be the last character of the string.
    656 */
    657 
    658 #define MAX_STACK_BUFFER_SIZE 300
    659 
    660 
    661 U_CAPI int32_t U_EXPORT2
    662 usprep_prepare(   const UStringPrepProfile* profile,
    663                   const UChar* src, int32_t srcLength,
    664                   UChar* dest, int32_t destCapacity,
    665                   int32_t options,
    666                   UParseError* parseError,
    667                   UErrorCode* status ){
    668 
    669     // check error status
    670     if(status == NULL || U_FAILURE(*status)){
    671         return 0;
    672     }
    673 
    674     //check arguments
    675     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    676         *status=U_ILLEGAL_ARGUMENT_ERROR;
    677         return 0;
    678     }
    679 
    680     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
    681     UChar *b1 = b1Stack, *b2 = b2Stack;
    682     int32_t b1Len, b2Len=0,
    683             b1Capacity = MAX_STACK_BUFFER_SIZE ,
    684             b2Capacity = MAX_STACK_BUFFER_SIZE;
    685     uint16_t result;
    686     int32_t b2Index = 0;
    687     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    688     UBool leftToRight=FALSE, rightToLeft=FALSE;
    689     int32_t rtlPos =-1, ltrPos =-1;
    690 
    691     //get the string length
    692     if(srcLength == -1){
    693         srcLength = u_strlen(src);
    694     }
    695     // map
    696     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
    697 
    698     if(*status == U_BUFFER_OVERFLOW_ERROR){
    699         // redo processing of string
    700         /* we do not have enough room so grow the buffer*/
    701         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
    702         if(b1==NULL){
    703             *status = U_MEMORY_ALLOCATION_ERROR;
    704             goto CLEANUP;
    705         }
    706 
    707         *status = U_ZERO_ERROR; // reset error
    708 
    709         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
    710 
    711     }
    712 
    713     // normalize
    714     if(profile->doNFKC == TRUE){
    715         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
    716 
    717         if(*status == U_BUFFER_OVERFLOW_ERROR){
    718             // redo processing of string
    719             /* we do not have enough room so grow the buffer*/
    720             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
    721             if(b2==NULL){
    722                 *status = U_MEMORY_ALLOCATION_ERROR;
    723                 goto CLEANUP;
    724             }
    725 
    726             *status = U_ZERO_ERROR; // reset error
    727 
    728             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
    729 
    730         }
    731 
    732     }else{
    733         b2 = b1;
    734         b2Len = b1Len;
    735     }
    736 
    737 
    738     if(U_FAILURE(*status)){
    739         goto CLEANUP;
    740     }
    741 
    742     UChar32 ch;
    743     UStringPrepType type;
    744     int16_t value;
    745     UBool isIndex;
    746 
    747     // Prohibit and checkBiDi in one pass
    748     for(b2Index=0; b2Index<b2Len;){
    749 
    750         ch = 0;
    751 
    752         U16_NEXT(b2, b2Index, b2Len, ch);
    753 
    754         UTRIE_GET16(&profile->sprepTrie,ch,result);
    755 
    756         type = getValues(result, value, isIndex);
    757 
    758         if( type == USPREP_PROHIBITED ||
    759             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
    760            ){
    761             *status = U_STRINGPREP_PROHIBITED_ERROR;
    762             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
    763             goto CLEANUP;
    764         }
    765 
    766         if(profile->checkBiDi) {
    767             direction = ubidi_getClass(profile->bdp, ch);
    768             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
    769                 firstCharDir = direction;
    770             }
    771             if(direction == U_LEFT_TO_RIGHT){
    772                 leftToRight = TRUE;
    773                 ltrPos = b2Index-1;
    774             }
    775             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    776                 rightToLeft = TRUE;
    777                 rtlPos = b2Index-1;
    778             }
    779         }
    780     }
    781     if(profile->checkBiDi == TRUE){
    782         // satisfy 2
    783         if( leftToRight == TRUE && rightToLeft == TRUE){
    784             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    785             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
    786             goto CLEANUP;
    787         }
    788 
    789         //satisfy 3
    790         if( rightToLeft == TRUE &&
    791             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    792               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    793            ){
    794             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    795             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
    796             return FALSE;
    797         }
    798     }
    799     if(b2Len>0 && b2Len <= destCapacity){
    800         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
    801     }
    802 
    803 CLEANUP:
    804     if(b1!=b1Stack){
    805         uprv_free(b1);
    806         b1=NULL;
    807     }
    808 
    809     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
    810         uprv_free(b2);
    811         b2=NULL;
    812     }
    813     return u_terminateUChars(dest, destCapacity, b2Len, status);
    814 }
    815 
    816 
    817 /* data swapping ------------------------------------------------------------ */
    818 
    819 U_CAPI int32_t U_EXPORT2
    820 usprep_swap(const UDataSwapper *ds,
    821             const void *inData, int32_t length, void *outData,
    822             UErrorCode *pErrorCode) {
    823     const UDataInfo *pInfo;
    824     int32_t headerSize;
    825 
    826     const uint8_t *inBytes;
    827     uint8_t *outBytes;
    828 
    829     const int32_t *inIndexes;
    830     int32_t indexes[16];
    831 
    832     int32_t i, offset, count, size;
    833 
    834     /* udata_swapDataHeader checks the arguments */
    835     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    836     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    837         return 0;
    838     }
    839 
    840     /* check data format and format version */
    841     pInfo=(const UDataInfo *)((const char *)inData+4);
    842     if(!(
    843         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    844         pInfo->dataFormat[1]==0x50 &&
    845         pInfo->dataFormat[2]==0x52 &&
    846         pInfo->dataFormat[3]==0x50 &&
    847         pInfo->formatVersion[0]==3
    848     )) {
    849         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
    850                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    851                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    852                          pInfo->formatVersion[0]);
    853         *pErrorCode=U_UNSUPPORTED_ERROR;
    854         return 0;
    855     }
    856 
    857     inBytes=(const uint8_t *)inData+headerSize;
    858     outBytes=(uint8_t *)outData+headerSize;
    859 
    860     inIndexes=(const int32_t *)inBytes;
    861 
    862     if(length>=0) {
    863         length-=headerSize;
    864         if(length<16*4) {
    865             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
    866                              length);
    867             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    868             return 0;
    869         }
    870     }
    871 
    872     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
    873     for(i=0; i<16; ++i) {
    874         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    875     }
    876 
    877     /* calculate the total length of the data */
    878     size=
    879         16*4+ /* size of indexes[] */
    880         indexes[_SPREP_INDEX_TRIE_SIZE]+
    881         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    882 
    883     if(length>=0) {
    884         if(length<size) {
    885             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
    886                              length);
    887             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    888             return 0;
    889         }
    890 
    891         /* copy the data for inaccessible bytes */
    892         if(inBytes!=outBytes) {
    893             uprv_memcpy(outBytes, inBytes, size);
    894         }
    895 
    896         offset=0;
    897 
    898         /* swap the int32_t indexes[] */
    899         count=16*4;
    900         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    901         offset+=count;
    902 
    903         /* swap the UTrie */
    904         count=indexes[_SPREP_INDEX_TRIE_SIZE];
    905         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    906         offset+=count;
    907 
    908         /* swap the uint16_t mappingTable[] */
    909         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    910         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    911         offset+=count;
    912     }
    913 
    914     return headerSize+size;
    915 }
    916 
    917 #endif /* #if !UCONFIG_NO_IDNA */
    918