Home | History | Annotate | Download | only in common
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2010, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  usprep.cpp
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003jul2
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_IDNA
     20 
     21 #include "unicode/usprep.h"
     22 
     23 #include "unicode/unorm.h"
     24 #include "unicode/ustring.h"
     25 #include "unicode/uchar.h"
     26 #include "unicode/uversion.h"
     27 #include "umutex.h"
     28 #include "cmemory.h"
     29 #include "sprpimpl.h"
     30 #include "ustr_imp.h"
     31 #include "uhash.h"
     32 #include "cstring.h"
     33 #include "udataswp.h"
     34 #include "ucln_cmn.h"
     35 #include "ubidi_props.h"
     36 
     37 U_NAMESPACE_USE
     38 
     39 U_CDECL_BEGIN
     40 
     41 /*
     42 Static cache for already opened StringPrep profiles
     43 */
     44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
     45 
     46 static UMTX usprepMutex = NULL;
     47 
     48 /* format version of spp file */
     49 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     50 
     51 /* the Unicode version of the sprep data */
     52 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     53 
     54 /* Profile names must be aligned to UStringPrepProfileType */
     55 static const char *PROFILE_NAMES[] = {
     56     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
     57     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
     58     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
     59     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
     60     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
     61     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
     62     "rfc3722",      /* USPREP_RFC3722_ISCSI */
     63     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
     64     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
     65     "rfc4011",      /* USPREP_RFC4011_MIB */
     66     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
     67     "rfc4505",      /* USPREP_RFC4505_TRACE */
     68     "rfc4518",      /* USPREP_RFC4518_LDAP */
     69     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
     70 };
     71 
     72 static UBool U_CALLCONV
     73 isSPrepAcceptable(void * /* context */,
     74              const char * /* type */,
     75              const char * /* name */,
     76              const UDataInfo *pInfo) {
     77     if(
     78         pInfo->size>=20 &&
     79         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     80         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     81         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
     82         pInfo->dataFormat[1]==0x50 &&
     83         pInfo->dataFormat[2]==0x52 &&
     84         pInfo->dataFormat[3]==0x50 &&
     85         pInfo->formatVersion[0]==3 &&
     86         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     87         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     88     ) {
     89         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     90         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     91         return TRUE;
     92     } else {
     93         return FALSE;
     94     }
     95 }
     96 
     97 static int32_t U_CALLCONV
     98 getSPrepFoldingOffset(uint32_t data) {
     99 
    100     return (int32_t)data;
    101 
    102 }
    103 
    104 /* hashes an entry  */
    105 static int32_t U_CALLCONV
    106 hashEntry(const UHashTok parm) {
    107     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
    108     UHashTok namekey, pathkey;
    109     namekey.pointer = b->name;
    110     pathkey.pointer = b->path;
    111     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
    112 }
    113 
    114 /* compares two entries */
    115 static UBool U_CALLCONV
    116 compareEntries(const UHashTok p1, const UHashTok p2) {
    117     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
    118     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
    119     UHashTok name1, name2, path1, path2;
    120     name1.pointer = b1->name;
    121     name2.pointer = b2->name;
    122     path1.pointer = b1->path;
    123     path2.pointer = b2->path;
    124     return ((UBool)(uhash_compareChars(name1, name2) &
    125         uhash_compareChars(path1, path2)));
    126 }
    127 
    128 static void
    129 usprep_unload(UStringPrepProfile* data){
    130     udata_close(data->sprepData);
    131 }
    132 
    133 static int32_t
    134 usprep_internal_flushCache(UBool noRefCount){
    135     UStringPrepProfile *profile = NULL;
    136     UStringPrepKey  *key  = NULL;
    137     int32_t pos = -1;
    138     int32_t deletedNum = 0;
    139     const UHashElement *e;
    140 
    141     /*
    142      * if shared data hasn't even been lazy evaluated yet
    143      * return 0
    144      */
    145     umtx_lock(&usprepMutex);
    146     if (SHARED_DATA_HASHTABLE == NULL) {
    147         umtx_unlock(&usprepMutex);
    148         return 0;
    149     }
    150 
    151     /*creates an enumeration to iterate through every element in the table */
    152     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
    153     {
    154         profile = (UStringPrepProfile *) e->value.pointer;
    155         key  = (UStringPrepKey *) e->key.pointer;
    156 
    157         if ((noRefCount== FALSE && profile->refCount == 0) ||
    158              noRefCount== TRUE) {
    159             deletedNum++;
    160             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
    161 
    162             /* unload the data */
    163             usprep_unload(profile);
    164 
    165             if(key->name != NULL) {
    166                 uprv_free(key->name);
    167                 key->name=NULL;
    168             }
    169             if(key->path != NULL) {
    170                 uprv_free(key->path);
    171                 key->path=NULL;
    172             }
    173             uprv_free(profile);
    174             uprv_free(key);
    175         }
    176 
    177     }
    178     umtx_unlock(&usprepMutex);
    179 
    180     return deletedNum;
    181 }
    182 
    183 /* Works just like ucnv_flushCache()
    184 static int32_t
    185 usprep_flushCache(){
    186     return usprep_internal_flushCache(FALSE);
    187 }
    188 */
    189 
    190 static UBool U_CALLCONV usprep_cleanup(void){
    191     if (SHARED_DATA_HASHTABLE != NULL) {
    192         usprep_internal_flushCache(TRUE);
    193         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
    194             uhash_close(SHARED_DATA_HASHTABLE);
    195             SHARED_DATA_HASHTABLE = NULL;
    196         }
    197     }
    198 
    199     umtx_destroy(&usprepMutex);             /* Don't worry about destroying the mutex even  */
    200                                             /*  if the hash table still exists.  The mutex  */
    201                                             /*  will lazily re-init  itself if needed.      */
    202     return (SHARED_DATA_HASHTABLE == NULL);
    203 }
    204 U_CDECL_END
    205 
    206 
    207 /** Initializes the cache for resources */
    208 static void
    209 initCache(UErrorCode *status) {
    210     UBool makeCache;
    211     UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE ==  NULL), makeCache);
    212     if(makeCache) {
    213         UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
    214         if (U_SUCCESS(*status)) {
    215             umtx_lock(&usprepMutex);
    216             if(SHARED_DATA_HASHTABLE == NULL) {
    217                 SHARED_DATA_HASHTABLE = newCache;
    218                 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
    219                 newCache = NULL;
    220             }
    221             umtx_unlock(&usprepMutex);
    222         }
    223         if(newCache != NULL) {
    224             uhash_close(newCache);
    225         }
    226     }
    227 }
    228 
    229 static UBool U_CALLCONV
    230 loadData(UStringPrepProfile* profile,
    231          const char* path,
    232          const char* name,
    233          const char* type,
    234          UErrorCode* errorCode) {
    235     /* load Unicode SPREP data from file */
    236     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    237     UDataMemory *dataMemory;
    238     const int32_t *p=NULL;
    239     const uint8_t *pb;
    240     UVersionInfo normUnicodeVersion;
    241     int32_t normUniVer, sprepUniVer, normCorrVer;
    242 
    243     if(errorCode==NULL || U_FAILURE(*errorCode)) {
    244         return 0;
    245     }
    246 
    247     /* open the data outside the mutex block */
    248     //TODO: change the path
    249     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    250     if(U_FAILURE(*errorCode)) {
    251         return FALSE;
    252     }
    253 
    254     p=(const int32_t *)udata_getMemory(dataMemory);
    255     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    256     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    257     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
    258 
    259 
    260     if(U_FAILURE(*errorCode)) {
    261         udata_close(dataMemory);
    262         return FALSE;
    263     }
    264 
    265     /* in the mutex block, set the data for this process */
    266     umtx_lock(&usprepMutex);
    267     if(profile->sprepData==NULL) {
    268         profile->sprepData=dataMemory;
    269         dataMemory=NULL;
    270         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
    271         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    272     } else {
    273         p=(const int32_t *)udata_getMemory(profile->sprepData);
    274     }
    275     umtx_unlock(&usprepMutex);
    276     /* initialize some variables */
    277     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    278 
    279     u_getUnicodeVersion(normUnicodeVersion);
    280     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
    281                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    282     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
    283                   (dataVersion[2] << 8 ) + (dataVersion[3]);
    284     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    285 
    286     if(U_FAILURE(*errorCode)){
    287         udata_close(dataMemory);
    288         return FALSE;
    289     }
    290     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
    291         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
    292         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
    293       ){
    294         *errorCode = U_INVALID_FORMAT_ERROR;
    295         udata_close(dataMemory);
    296         return FALSE;
    297     }
    298     profile->isDataLoaded = TRUE;
    299 
    300     /* if a different thread set it first, then close the extra data */
    301     if(dataMemory!=NULL) {
    302         udata_close(dataMemory); /* NULL if it was set correctly */
    303     }
    304 
    305 
    306     return profile->isDataLoaded;
    307 }
    308 
    309 static UStringPrepProfile*
    310 usprep_getProfile(const char* path,
    311                   const char* name,
    312                   UErrorCode *status){
    313 
    314     UStringPrepProfile* profile = NULL;
    315 
    316     initCache(status);
    317 
    318     if(U_FAILURE(*status)){
    319         return NULL;
    320     }
    321 
    322     UStringPrepKey stackKey;
    323     /*
    324      * const is cast way to save malloc, strcpy and free calls
    325      * we use the passed in pointers for fetching the data from the
    326      * hash table which is safe
    327      */
    328     stackKey.name = (char*) name;
    329     stackKey.path = (char*) path;
    330 
    331     /* fetch the data from the cache */
    332     umtx_lock(&usprepMutex);
    333     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    334     if(profile != NULL) {
    335         profile->refCount++;
    336     }
    337     umtx_unlock(&usprepMutex);
    338 
    339     if(profile == NULL) {
    340         /* else load the data and put the data in the cache */
    341         LocalMemory<UStringPrepProfile> newProfile;
    342         if(newProfile.allocateInsteadAndReset() == NULL) {
    343             *status = U_MEMORY_ALLOCATION_ERROR;
    344             return NULL;
    345         }
    346 
    347         /* load the data */
    348         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
    349             return NULL;
    350         }
    351 
    352         /* get the options */
    353         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
    354         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
    355 
    356         if(newProfile->checkBiDi) {
    357             newProfile->bdp = ubidi_getSingleton();
    358         }
    359 
    360         LocalMemory<UStringPrepKey> key;
    361         LocalMemory<char> keyName;
    362         LocalMemory<char> keyPath;
    363         if( key.allocateInsteadAndReset() == NULL ||
    364             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
    365             (path != NULL &&
    366              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
    367          ) {
    368             *status = U_MEMORY_ALLOCATION_ERROR;
    369             usprep_unload(newProfile.getAlias());
    370             return NULL;
    371         }
    372 
    373         umtx_lock(&usprepMutex);
    374         // If another thread already inserted the same key/value, refcount and cleanup our thread data
    375         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    376         if(profile != NULL) {
    377             profile->refCount++;
    378             usprep_unload(newProfile.getAlias());
    379         }
    380         else {
    381             /* initialize the key members */
    382             key->name = keyName.orphan();
    383             uprv_strcpy(key->name, name);
    384             if(path != NULL){
    385                 key->path = keyPath.orphan();
    386                 uprv_strcpy(key->path, path);
    387             }
    388             profile = newProfile.orphan();
    389 
    390             /* add the data object to the cache */
    391             profile->refCount = 1;
    392             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
    393         }
    394         umtx_unlock(&usprepMutex);
    395     }
    396 
    397     return profile;
    398 }
    399 
    400 U_CAPI UStringPrepProfile* U_EXPORT2
    401 usprep_open(const char* path,
    402             const char* name,
    403             UErrorCode* status){
    404 
    405     if(status == NULL || U_FAILURE(*status)){
    406         return NULL;
    407     }
    408 
    409     /* initialize the profile struct members */
    410     return usprep_getProfile(path,name,status);
    411 }
    412 
    413 U_CAPI UStringPrepProfile* U_EXPORT2
    414 usprep_openByType(UStringPrepProfileType type,
    415 				  UErrorCode* status) {
    416     if(status == NULL || U_FAILURE(*status)){
    417         return NULL;
    418     }
    419     int32_t index = (int32_t)type;
    420     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
    421         *status = U_ILLEGAL_ARGUMENT_ERROR;
    422         return NULL;
    423     }
    424     return usprep_open(NULL, PROFILE_NAMES[index], status);
    425 }
    426 
    427 U_CAPI void U_EXPORT2
    428 usprep_close(UStringPrepProfile* profile){
    429     if(profile==NULL){
    430         return;
    431     }
    432 
    433     umtx_lock(&usprepMutex);
    434     /* decrement the ref count*/
    435     if(profile->refCount > 0){
    436         profile->refCount--;
    437     }
    438     umtx_unlock(&usprepMutex);
    439 
    440 }
    441 
    442 U_CFUNC void
    443 uprv_syntaxError(const UChar* rules,
    444                  int32_t pos,
    445                  int32_t rulesLen,
    446                  UParseError* parseError){
    447     if(parseError == NULL){
    448         return;
    449     }
    450     parseError->offset = pos;
    451     parseError->line = 0 ; // we are not using line numbers
    452 
    453     // for pre-context
    454     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    455     int32_t limit = pos;
    456 
    457     u_memcpy(parseError->preContext,rules+start,limit-start);
    458     //null terminate the buffer
    459     parseError->preContext[limit-start] = 0;
    460 
    461     // for post-context; include error rules[pos]
    462     start = pos;
    463     limit = start + (U_PARSE_CONTEXT_LEN-1);
    464     if (limit > rulesLen) {
    465         limit = rulesLen;
    466     }
    467     if (start < rulesLen) {
    468         u_memcpy(parseError->postContext,rules+start,limit-start);
    469     }
    470     //null terminate the buffer
    471     parseError->postContext[limit-start]= 0;
    472 }
    473 
    474 
    475 static inline UStringPrepType
    476 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
    477 
    478     UStringPrepType type;
    479     if(trieWord == 0){
    480         /*
    481          * Initial value stored in the mapping table
    482          * just return USPREP_TYPE_LIMIT .. so that
    483          * the source codepoint is copied to the destination
    484          */
    485         type = USPREP_TYPE_LIMIT;
    486         isIndex =FALSE;
    487         value = 0;
    488     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
    489         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
    490         isIndex =FALSE;
    491         value = 0;
    492     }else{
    493         /* get the type */
    494         type = USPREP_MAP;
    495         /* ascertain if the value is index or delta */
    496         if(trieWord & 0x02){
    497             isIndex = TRUE;
    498             value = trieWord  >> 2; //mask off the lower 2 bits and shift
    499         }else{
    500             isIndex = FALSE;
    501             value = (int16_t)trieWord;
    502             value =  (value >> 2);
    503         }
    504 
    505         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
    506             type = USPREP_DELETE;
    507             isIndex =FALSE;
    508             value = 0;
    509         }
    510     }
    511     return type;
    512 }
    513 
    514 
    515 
    516 static int32_t
    517 usprep_map(  const UStringPrepProfile* profile,
    518              const UChar* src, int32_t srcLength,
    519              UChar* dest, int32_t destCapacity,
    520              int32_t options,
    521              UParseError* parseError,
    522              UErrorCode* status ){
    523 
    524     uint16_t result;
    525     int32_t destIndex=0;
    526     int32_t srcIndex;
    527     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
    528     UStringPrepType type;
    529     int16_t value;
    530     UBool isIndex;
    531     const int32_t* indexes = profile->indexes;
    532 
    533     // no error checking the caller check for error and arguments
    534     // no string length check the caller finds out the string length
    535 
    536     for(srcIndex=0;srcIndex<srcLength;){
    537         UChar32 ch;
    538 
    539         U16_NEXT(src,srcIndex,srcLength,ch);
    540 
    541         result=0;
    542 
    543         UTRIE_GET16(&profile->sprepTrie,ch,result);
    544 
    545         type = getValues(result, value, isIndex);
    546 
    547         // check if the source codepoint is unassigned
    548         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
    549 
    550             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
    551             *status = U_STRINGPREP_UNASSIGNED_ERROR;
    552             return 0;
    553 
    554         }else if(type == USPREP_MAP){
    555 
    556             int32_t index, length;
    557 
    558             if(isIndex){
    559                 index = value;
    560                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    561                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    562                     length = 1;
    563                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    564                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    565                     length = 2;
    566                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    567                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    568                     length = 3;
    569                 }else{
    570                     length = profile->mappingData[index++];
    571 
    572                 }
    573 
    574                 /* copy mapping to destination */
    575                 for(int32_t i=0; i< length; i++){
    576                     if(destIndex < destCapacity  ){
    577                         dest[destIndex] = profile->mappingData[index+i];
    578                     }
    579                     destIndex++; /* for pre-flighting */
    580                 }
    581                 continue;
    582             }else{
    583                 // subtract the delta to arrive at the code point
    584                 ch -= value;
    585             }
    586 
    587         }else if(type==USPREP_DELETE){
    588              // just consume the codepoint and contine
    589             continue;
    590         }
    591         //copy the code point into destination
    592         if(ch <= 0xFFFF){
    593             if(destIndex < destCapacity ){
    594                 dest[destIndex] = (UChar)ch;
    595             }
    596             destIndex++;
    597         }else{
    598             if(destIndex+1 < destCapacity ){
    599                 dest[destIndex]   = U16_LEAD(ch);
    600                 dest[destIndex+1] = U16_TRAIL(ch);
    601             }
    602             destIndex +=2;
    603         }
    604 
    605     }
    606 
    607     return u_terminateUChars(dest, destCapacity, destIndex, status);
    608 }
    609 
    610 
    611 static int32_t
    612 usprep_normalize(   const UChar* src, int32_t srcLength,
    613                     UChar* dest, int32_t destCapacity,
    614                     UErrorCode* status ){
    615     return unorm_normalize(
    616         src, srcLength,
    617         UNORM_NFKC, UNORM_UNICODE_3_2,
    618         dest, destCapacity,
    619         status);
    620 }
    621 
    622 
    623  /*
    624    1) Map -- For each character in the input, check if it has a mapping
    625       and, if so, replace it with its mapping.
    626 
    627    2) Normalize -- Possibly normalize the result of step 1 using Unicode
    628       normalization.
    629 
    630    3) Prohibit -- Check for any characters that are not allowed in the
    631       output.  If any are found, return an error.
    632 
    633    4) Check bidi -- Possibly check for right-to-left characters, and if
    634       any are found, make sure that the whole string satisfies the
    635       requirements for bidirectional strings.  If the string does not
    636       satisfy the requirements for bidirectional strings, return an
    637       error.
    638       [Unicode3.2] defines several bidirectional categories; each character
    639        has one bidirectional category assigned to it.  For the purposes of
    640        the requirements below, an "RandALCat character" is a character that
    641        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
    642        is a character that has Unicode bidirectional category "L".  Note
    643 
    644 
    645        that there are many characters which fall in neither of the above
    646        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
    647        this because they have bidirectional category "EN".
    648 
    649        In any profile that specifies bidirectional character handling, all
    650        three of the following requirements MUST be met:
    651 
    652        1) The characters in section 5.8 MUST be prohibited.
    653 
    654        2) If a string contains any RandALCat character, the string MUST NOT
    655           contain any LCat character.
    656 
    657        3) If a string contains any RandALCat character, a RandALCat
    658           character MUST be the first character of the string, and a
    659           RandALCat character MUST be the last character of the string.
    660 */
    661 
    662 #define MAX_STACK_BUFFER_SIZE 300
    663 
    664 
    665 U_CAPI int32_t U_EXPORT2
    666 usprep_prepare(   const UStringPrepProfile* profile,
    667                   const UChar* src, int32_t srcLength,
    668                   UChar* dest, int32_t destCapacity,
    669                   int32_t options,
    670                   UParseError* parseError,
    671                   UErrorCode* status ){
    672 
    673     // check error status
    674     if(status == NULL || U_FAILURE(*status)){
    675         return 0;
    676     }
    677 
    678     //check arguments
    679     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    680         *status=U_ILLEGAL_ARGUMENT_ERROR;
    681         return 0;
    682     }
    683 
    684     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
    685     UChar *b1 = b1Stack, *b2 = b2Stack;
    686     int32_t b1Len, b2Len=0,
    687             b1Capacity = MAX_STACK_BUFFER_SIZE ,
    688             b2Capacity = MAX_STACK_BUFFER_SIZE;
    689     uint16_t result;
    690     int32_t b2Index = 0;
    691     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    692     UBool leftToRight=FALSE, rightToLeft=FALSE;
    693     int32_t rtlPos =-1, ltrPos =-1;
    694 
    695     //get the string length
    696     if(srcLength == -1){
    697         srcLength = u_strlen(src);
    698     }
    699     // map
    700     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
    701 
    702     if(*status == U_BUFFER_OVERFLOW_ERROR){
    703         // redo processing of string
    704         /* we do not have enough room so grow the buffer*/
    705         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
    706         if(b1==NULL){
    707             *status = U_MEMORY_ALLOCATION_ERROR;
    708             goto CLEANUP;
    709         }
    710 
    711         *status = U_ZERO_ERROR; // reset error
    712 
    713         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
    714 
    715     }
    716 
    717     // normalize
    718     if(profile->doNFKC == TRUE){
    719         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
    720 
    721         if(*status == U_BUFFER_OVERFLOW_ERROR){
    722             // redo processing of string
    723             /* we do not have enough room so grow the buffer*/
    724             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
    725             if(b2==NULL){
    726                 *status = U_MEMORY_ALLOCATION_ERROR;
    727                 goto CLEANUP;
    728             }
    729 
    730             *status = U_ZERO_ERROR; // reset error
    731 
    732             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
    733 
    734         }
    735 
    736     }else{
    737         b2 = b1;
    738         b2Len = b1Len;
    739     }
    740 
    741 
    742     if(U_FAILURE(*status)){
    743         goto CLEANUP;
    744     }
    745 
    746     UChar32 ch;
    747     UStringPrepType type;
    748     int16_t value;
    749     UBool isIndex;
    750 
    751     // Prohibit and checkBiDi in one pass
    752     for(b2Index=0; b2Index<b2Len;){
    753 
    754         ch = 0;
    755 
    756         U16_NEXT(b2, b2Index, b2Len, ch);
    757 
    758         UTRIE_GET16(&profile->sprepTrie,ch,result);
    759 
    760         type = getValues(result, value, isIndex);
    761 
    762         if( type == USPREP_PROHIBITED ||
    763             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
    764            ){
    765             *status = U_STRINGPREP_PROHIBITED_ERROR;
    766             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
    767             goto CLEANUP;
    768         }
    769 
    770         if(profile->checkBiDi) {
    771             direction = ubidi_getClass(profile->bdp, ch);
    772             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
    773                 firstCharDir = direction;
    774             }
    775             if(direction == U_LEFT_TO_RIGHT){
    776                 leftToRight = TRUE;
    777                 ltrPos = b2Index-1;
    778             }
    779             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    780                 rightToLeft = TRUE;
    781                 rtlPos = b2Index-1;
    782             }
    783         }
    784     }
    785     if(profile->checkBiDi == TRUE){
    786         // satisfy 2
    787         if( leftToRight == TRUE && rightToLeft == TRUE){
    788             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    789             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
    790             goto CLEANUP;
    791         }
    792 
    793         //satisfy 3
    794         if( rightToLeft == TRUE &&
    795             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    796               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    797            ){
    798             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    799             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
    800             return FALSE;
    801         }
    802     }
    803     if(b2Len>0 && b2Len <= destCapacity){
    804         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
    805     }
    806 
    807 CLEANUP:
    808     if(b1!=b1Stack){
    809         uprv_free(b1);
    810         b1=NULL;
    811     }
    812 
    813     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
    814         uprv_free(b2);
    815         b2=NULL;
    816     }
    817     return u_terminateUChars(dest, destCapacity, b2Len, status);
    818 }
    819 
    820 
    821 /* data swapping ------------------------------------------------------------ */
    822 
    823 U_CAPI int32_t U_EXPORT2
    824 usprep_swap(const UDataSwapper *ds,
    825             const void *inData, int32_t length, void *outData,
    826             UErrorCode *pErrorCode) {
    827     const UDataInfo *pInfo;
    828     int32_t headerSize;
    829 
    830     const uint8_t *inBytes;
    831     uint8_t *outBytes;
    832 
    833     const int32_t *inIndexes;
    834     int32_t indexes[16];
    835 
    836     int32_t i, offset, count, size;
    837 
    838     /* udata_swapDataHeader checks the arguments */
    839     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    840     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    841         return 0;
    842     }
    843 
    844     /* check data format and format version */
    845     pInfo=(const UDataInfo *)((const char *)inData+4);
    846     if(!(
    847         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    848         pInfo->dataFormat[1]==0x50 &&
    849         pInfo->dataFormat[2]==0x52 &&
    850         pInfo->dataFormat[3]==0x50 &&
    851         pInfo->formatVersion[0]==3
    852     )) {
    853         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
    854                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    855                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    856                          pInfo->formatVersion[0]);
    857         *pErrorCode=U_UNSUPPORTED_ERROR;
    858         return 0;
    859     }
    860 
    861     inBytes=(const uint8_t *)inData+headerSize;
    862     outBytes=(uint8_t *)outData+headerSize;
    863 
    864     inIndexes=(const int32_t *)inBytes;
    865 
    866     if(length>=0) {
    867         length-=headerSize;
    868         if(length<16*4) {
    869             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
    870                              length);
    871             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    872             return 0;
    873         }
    874     }
    875 
    876     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
    877     for(i=0; i<16; ++i) {
    878         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    879     }
    880 
    881     /* calculate the total length of the data */
    882     size=
    883         16*4+ /* size of indexes[] */
    884         indexes[_SPREP_INDEX_TRIE_SIZE]+
    885         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    886 
    887     if(length>=0) {
    888         if(length<size) {
    889             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
    890                              length);
    891             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    892             return 0;
    893         }
    894 
    895         /* copy the data for inaccessible bytes */
    896         if(inBytes!=outBytes) {
    897             uprv_memcpy(outBytes, inBytes, size);
    898         }
    899 
    900         offset=0;
    901 
    902         /* swap the int32_t indexes[] */
    903         count=16*4;
    904         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    905         offset+=count;
    906 
    907         /* swap the UTrie */
    908         count=indexes[_SPREP_INDEX_TRIE_SIZE];
    909         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    910         offset+=count;
    911 
    912         /* swap the uint16_t mappingTable[] */
    913         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    914         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    915         offset+=count;
    916     }
    917 
    918     return headerSize+size;
    919 }
    920 
    921 #endif /* #if !UCONFIG_NO_IDNA */
    922