Home | History | Annotate | Download | only in common
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2012, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  usprep.cpp
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003jul2
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_IDNA
     20 
     21 #include "unicode/usprep.h"
     22 
     23 #include "unicode/unorm.h"
     24 #include "unicode/ustring.h"
     25 #include "unicode/uchar.h"
     26 #include "unicode/uversion.h"
     27 #include "umutex.h"
     28 #include "cmemory.h"
     29 #include "sprpimpl.h"
     30 #include "ustr_imp.h"
     31 #include "uhash.h"
     32 #include "cstring.h"
     33 #include "udataswp.h"
     34 #include "ucln_cmn.h"
     35 #include "ubidi_props.h"
     36 
     37 U_NAMESPACE_USE
     38 
     39 U_CDECL_BEGIN
     40 
     41 /*
     42 Static cache for already opened StringPrep profiles
     43 */
     44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
     45 
     46 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
     47 
     48 /* format version of spp file */
     49 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     50 
     51 /* the Unicode version of the sprep data */
     52 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     53 
     54 /* Profile names must be aligned to UStringPrepProfileType */
     55 static const char * const PROFILE_NAMES[] = {
     56     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
     57     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
     58     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
     59     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
     60     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
     61     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
     62     "rfc3722",      /* USPREP_RFC3722_ISCSI */
     63     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
     64     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
     65     "rfc4011",      /* USPREP_RFC4011_MIB */
     66     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
     67     "rfc4505",      /* USPREP_RFC4505_TRACE */
     68     "rfc4518",      /* USPREP_RFC4518_LDAP */
     69     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
     70 };
     71 
     72 static UBool U_CALLCONV
     73 isSPrepAcceptable(void * /* context */,
     74              const char * /* type */,
     75              const char * /* name */,
     76              const UDataInfo *pInfo) {
     77     if(
     78         pInfo->size>=20 &&
     79         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     80         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     81         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
     82         pInfo->dataFormat[1]==0x50 &&
     83         pInfo->dataFormat[2]==0x52 &&
     84         pInfo->dataFormat[3]==0x50 &&
     85         pInfo->formatVersion[0]==3 &&
     86         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     87         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     88     ) {
     89         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     90         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     91         return TRUE;
     92     } else {
     93         return FALSE;
     94     }
     95 }
     96 
     97 static int32_t U_CALLCONV
     98 getSPrepFoldingOffset(uint32_t data) {
     99 
    100     return (int32_t)data;
    101 
    102 }
    103 
    104 /* hashes an entry  */
    105 static int32_t U_CALLCONV
    106 hashEntry(const UHashTok parm) {
    107     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
    108     UHashTok namekey, pathkey;
    109     namekey.pointer = b->name;
    110     pathkey.pointer = b->path;
    111     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
    112 }
    113 
    114 /* compares two entries */
    115 static UBool U_CALLCONV
    116 compareEntries(const UHashTok p1, const UHashTok p2) {
    117     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
    118     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
    119     UHashTok name1, name2, path1, path2;
    120     name1.pointer = b1->name;
    121     name2.pointer = b2->name;
    122     path1.pointer = b1->path;
    123     path2.pointer = b2->path;
    124     return ((UBool)(uhash_compareChars(name1, name2) &
    125         uhash_compareChars(path1, path2)));
    126 }
    127 
    128 static void
    129 usprep_unload(UStringPrepProfile* data){
    130     udata_close(data->sprepData);
    131 }
    132 
    133 static int32_t
    134 usprep_internal_flushCache(UBool noRefCount){
    135     UStringPrepProfile *profile = NULL;
    136     UStringPrepKey  *key  = NULL;
    137     int32_t pos = -1;
    138     int32_t deletedNum = 0;
    139     const UHashElement *e;
    140 
    141     /*
    142      * if shared data hasn't even been lazy evaluated yet
    143      * return 0
    144      */
    145     umtx_lock(&usprepMutex);
    146     if (SHARED_DATA_HASHTABLE == NULL) {
    147         umtx_unlock(&usprepMutex);
    148         return 0;
    149     }
    150 
    151     /*creates an enumeration to iterate through every element in the table */
    152     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
    153     {
    154         profile = (UStringPrepProfile *) e->value.pointer;
    155         key  = (UStringPrepKey *) e->key.pointer;
    156 
    157         if ((noRefCount== FALSE && profile->refCount == 0) ||
    158              noRefCount== TRUE) {
    159             deletedNum++;
    160             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
    161 
    162             /* unload the data */
    163             usprep_unload(profile);
    164 
    165             if(key->name != NULL) {
    166                 uprv_free(key->name);
    167                 key->name=NULL;
    168             }
    169             if(key->path != NULL) {
    170                 uprv_free(key->path);
    171                 key->path=NULL;
    172             }
    173             uprv_free(profile);
    174             uprv_free(key);
    175         }
    176 
    177     }
    178     umtx_unlock(&usprepMutex);
    179 
    180     return deletedNum;
    181 }
    182 
    183 /* Works just like ucnv_flushCache()
    184 static int32_t
    185 usprep_flushCache(){
    186     return usprep_internal_flushCache(FALSE);
    187 }
    188 */
    189 
    190 static UBool U_CALLCONV usprep_cleanup(void){
    191     if (SHARED_DATA_HASHTABLE != NULL) {
    192         usprep_internal_flushCache(TRUE);
    193         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
    194             uhash_close(SHARED_DATA_HASHTABLE);
    195             SHARED_DATA_HASHTABLE = NULL;
    196         }
    197     }
    198 
    199     return (SHARED_DATA_HASHTABLE == NULL);
    200 }
    201 U_CDECL_END
    202 
    203 
    204 /** Initializes the cache for resources */
    205 static void
    206 initCache(UErrorCode *status) {
    207     UBool makeCache;
    208     UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE ==  NULL), makeCache);
    209     if(makeCache) {
    210         UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
    211         if (U_SUCCESS(*status)) {
    212             umtx_lock(&usprepMutex);
    213             if(SHARED_DATA_HASHTABLE == NULL) {
    214                 SHARED_DATA_HASHTABLE = newCache;
    215                 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
    216                 newCache = NULL;
    217             }
    218             umtx_unlock(&usprepMutex);
    219         }
    220         if(newCache != NULL) {
    221             uhash_close(newCache);
    222         }
    223     }
    224 }
    225 
    226 static UBool U_CALLCONV
    227 loadData(UStringPrepProfile* profile,
    228          const char* path,
    229          const char* name,
    230          const char* type,
    231          UErrorCode* errorCode) {
    232     /* load Unicode SPREP data from file */
    233     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    234     UDataMemory *dataMemory;
    235     const int32_t *p=NULL;
    236     const uint8_t *pb;
    237     UVersionInfo normUnicodeVersion;
    238     int32_t normUniVer, sprepUniVer, normCorrVer;
    239 
    240     if(errorCode==NULL || U_FAILURE(*errorCode)) {
    241         return 0;
    242     }
    243 
    244     /* open the data outside the mutex block */
    245     //TODO: change the path
    246     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    247     if(U_FAILURE(*errorCode)) {
    248         return FALSE;
    249     }
    250 
    251     p=(const int32_t *)udata_getMemory(dataMemory);
    252     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    253     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    254     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
    255 
    256 
    257     if(U_FAILURE(*errorCode)) {
    258         udata_close(dataMemory);
    259         return FALSE;
    260     }
    261 
    262     /* in the mutex block, set the data for this process */
    263     umtx_lock(&usprepMutex);
    264     if(profile->sprepData==NULL) {
    265         profile->sprepData=dataMemory;
    266         dataMemory=NULL;
    267         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
    268         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    269     } else {
    270         p=(const int32_t *)udata_getMemory(profile->sprepData);
    271     }
    272     umtx_unlock(&usprepMutex);
    273     /* initialize some variables */
    274     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    275 
    276     u_getUnicodeVersion(normUnicodeVersion);
    277     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
    278                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    279     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
    280                   (dataVersion[2] << 8 ) + (dataVersion[3]);
    281     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    282 
    283     if(U_FAILURE(*errorCode)){
    284         udata_close(dataMemory);
    285         return FALSE;
    286     }
    287     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
    288         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
    289         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
    290       ){
    291         *errorCode = U_INVALID_FORMAT_ERROR;
    292         udata_close(dataMemory);
    293         return FALSE;
    294     }
    295     profile->isDataLoaded = TRUE;
    296 
    297     /* if a different thread set it first, then close the extra data */
    298     if(dataMemory!=NULL) {
    299         udata_close(dataMemory); /* NULL if it was set correctly */
    300     }
    301 
    302 
    303     return profile->isDataLoaded;
    304 }
    305 
    306 static UStringPrepProfile*
    307 usprep_getProfile(const char* path,
    308                   const char* name,
    309                   UErrorCode *status){
    310 
    311     UStringPrepProfile* profile = NULL;
    312 
    313     initCache(status);
    314 
    315     if(U_FAILURE(*status)){
    316         return NULL;
    317     }
    318 
    319     UStringPrepKey stackKey;
    320     /*
    321      * const is cast way to save malloc, strcpy and free calls
    322      * we use the passed in pointers for fetching the data from the
    323      * hash table which is safe
    324      */
    325     stackKey.name = (char*) name;
    326     stackKey.path = (char*) path;
    327 
    328     /* fetch the data from the cache */
    329     umtx_lock(&usprepMutex);
    330     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    331     if(profile != NULL) {
    332         profile->refCount++;
    333     }
    334     umtx_unlock(&usprepMutex);
    335 
    336     if(profile == NULL) {
    337         /* else load the data and put the data in the cache */
    338         LocalMemory<UStringPrepProfile> newProfile;
    339         if(newProfile.allocateInsteadAndReset() == NULL) {
    340             *status = U_MEMORY_ALLOCATION_ERROR;
    341             return NULL;
    342         }
    343 
    344         /* load the data */
    345         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
    346             return NULL;
    347         }
    348 
    349         /* get the options */
    350         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
    351         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
    352 
    353         if(newProfile->checkBiDi) {
    354             newProfile->bdp = ubidi_getSingleton();
    355         }
    356 
    357         LocalMemory<UStringPrepKey> key;
    358         LocalMemory<char> keyName;
    359         LocalMemory<char> keyPath;
    360         if( key.allocateInsteadAndReset() == NULL ||
    361             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
    362             (path != NULL &&
    363              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
    364          ) {
    365             *status = U_MEMORY_ALLOCATION_ERROR;
    366             usprep_unload(newProfile.getAlias());
    367             return NULL;
    368         }
    369 
    370         umtx_lock(&usprepMutex);
    371         // If another thread already inserted the same key/value, refcount and cleanup our thread data
    372         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    373         if(profile != NULL) {
    374             profile->refCount++;
    375             usprep_unload(newProfile.getAlias());
    376         }
    377         else {
    378             /* initialize the key members */
    379             key->name = keyName.orphan();
    380             uprv_strcpy(key->name, name);
    381             if(path != NULL){
    382                 key->path = keyPath.orphan();
    383                 uprv_strcpy(key->path, path);
    384             }
    385             profile = newProfile.orphan();
    386 
    387             /* add the data object to the cache */
    388             profile->refCount = 1;
    389             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
    390         }
    391         umtx_unlock(&usprepMutex);
    392     }
    393 
    394     return profile;
    395 }
    396 
    397 U_CAPI UStringPrepProfile* U_EXPORT2
    398 usprep_open(const char* path,
    399             const char* name,
    400             UErrorCode* status){
    401 
    402     if(status == NULL || U_FAILURE(*status)){
    403         return NULL;
    404     }
    405 
    406     /* initialize the profile struct members */
    407     return usprep_getProfile(path,name,status);
    408 }
    409 
    410 U_CAPI UStringPrepProfile* U_EXPORT2
    411 usprep_openByType(UStringPrepProfileType type,
    412 				  UErrorCode* status) {
    413     if(status == NULL || U_FAILURE(*status)){
    414         return NULL;
    415     }
    416     int32_t index = (int32_t)type;
    417     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
    418         *status = U_ILLEGAL_ARGUMENT_ERROR;
    419         return NULL;
    420     }
    421     return usprep_open(NULL, PROFILE_NAMES[index], status);
    422 }
    423 
    424 U_CAPI void U_EXPORT2
    425 usprep_close(UStringPrepProfile* profile){
    426     if(profile==NULL){
    427         return;
    428     }
    429 
    430     umtx_lock(&usprepMutex);
    431     /* decrement the ref count*/
    432     if(profile->refCount > 0){
    433         profile->refCount--;
    434     }
    435     umtx_unlock(&usprepMutex);
    436 
    437 }
    438 
    439 U_CFUNC void
    440 uprv_syntaxError(const UChar* rules,
    441                  int32_t pos,
    442                  int32_t rulesLen,
    443                  UParseError* parseError){
    444     if(parseError == NULL){
    445         return;
    446     }
    447     parseError->offset = pos;
    448     parseError->line = 0 ; // we are not using line numbers
    449 
    450     // for pre-context
    451     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    452     int32_t limit = pos;
    453 
    454     u_memcpy(parseError->preContext,rules+start,limit-start);
    455     //null terminate the buffer
    456     parseError->preContext[limit-start] = 0;
    457 
    458     // for post-context; include error rules[pos]
    459     start = pos;
    460     limit = start + (U_PARSE_CONTEXT_LEN-1);
    461     if (limit > rulesLen) {
    462         limit = rulesLen;
    463     }
    464     if (start < rulesLen) {
    465         u_memcpy(parseError->postContext,rules+start,limit-start);
    466     }
    467     //null terminate the buffer
    468     parseError->postContext[limit-start]= 0;
    469 }
    470 
    471 
    472 static inline UStringPrepType
    473 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
    474 
    475     UStringPrepType type;
    476     if(trieWord == 0){
    477         /*
    478          * Initial value stored in the mapping table
    479          * just return USPREP_TYPE_LIMIT .. so that
    480          * the source codepoint is copied to the destination
    481          */
    482         type = USPREP_TYPE_LIMIT;
    483         isIndex =FALSE;
    484         value = 0;
    485     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
    486         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
    487         isIndex =FALSE;
    488         value = 0;
    489     }else{
    490         /* get the type */
    491         type = USPREP_MAP;
    492         /* ascertain if the value is index or delta */
    493         if(trieWord & 0x02){
    494             isIndex = TRUE;
    495             value = trieWord  >> 2; //mask off the lower 2 bits and shift
    496         }else{
    497             isIndex = FALSE;
    498             value = (int16_t)trieWord;
    499             value =  (value >> 2);
    500         }
    501 
    502         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
    503             type = USPREP_DELETE;
    504             isIndex =FALSE;
    505             value = 0;
    506         }
    507     }
    508     return type;
    509 }
    510 
    511 
    512 
    513 static int32_t
    514 usprep_map(  const UStringPrepProfile* profile,
    515              const UChar* src, int32_t srcLength,
    516              UChar* dest, int32_t destCapacity,
    517              int32_t options,
    518              UParseError* parseError,
    519              UErrorCode* status ){
    520 
    521     uint16_t result;
    522     int32_t destIndex=0;
    523     int32_t srcIndex;
    524     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
    525     UStringPrepType type;
    526     int16_t value;
    527     UBool isIndex;
    528     const int32_t* indexes = profile->indexes;
    529 
    530     // no error checking the caller check for error and arguments
    531     // no string length check the caller finds out the string length
    532 
    533     for(srcIndex=0;srcIndex<srcLength;){
    534         UChar32 ch;
    535 
    536         U16_NEXT(src,srcIndex,srcLength,ch);
    537 
    538         result=0;
    539 
    540         UTRIE_GET16(&profile->sprepTrie,ch,result);
    541 
    542         type = getValues(result, value, isIndex);
    543 
    544         // check if the source codepoint is unassigned
    545         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
    546 
    547             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
    548             *status = U_STRINGPREP_UNASSIGNED_ERROR;
    549             return 0;
    550 
    551         }else if(type == USPREP_MAP){
    552 
    553             int32_t index, length;
    554 
    555             if(isIndex){
    556                 index = value;
    557                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    558                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    559                     length = 1;
    560                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    561                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    562                     length = 2;
    563                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    564                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    565                     length = 3;
    566                 }else{
    567                     length = profile->mappingData[index++];
    568 
    569                 }
    570 
    571                 /* copy mapping to destination */
    572                 for(int32_t i=0; i< length; i++){
    573                     if(destIndex < destCapacity  ){
    574                         dest[destIndex] = profile->mappingData[index+i];
    575                     }
    576                     destIndex++; /* for pre-flighting */
    577                 }
    578                 continue;
    579             }else{
    580                 // subtract the delta to arrive at the code point
    581                 ch -= value;
    582             }
    583 
    584         }else if(type==USPREP_DELETE){
    585              // just consume the codepoint and contine
    586             continue;
    587         }
    588         //copy the code point into destination
    589         if(ch <= 0xFFFF){
    590             if(destIndex < destCapacity ){
    591                 dest[destIndex] = (UChar)ch;
    592             }
    593             destIndex++;
    594         }else{
    595             if(destIndex+1 < destCapacity ){
    596                 dest[destIndex]   = U16_LEAD(ch);
    597                 dest[destIndex+1] = U16_TRAIL(ch);
    598             }
    599             destIndex +=2;
    600         }
    601 
    602     }
    603 
    604     return u_terminateUChars(dest, destCapacity, destIndex, status);
    605 }
    606 
    607 
    608 static int32_t
    609 usprep_normalize(   const UChar* src, int32_t srcLength,
    610                     UChar* dest, int32_t destCapacity,
    611                     UErrorCode* status ){
    612     return unorm_normalize(
    613         src, srcLength,
    614         UNORM_NFKC, UNORM_UNICODE_3_2,
    615         dest, destCapacity,
    616         status);
    617 }
    618 
    619 
    620  /*
    621    1) Map -- For each character in the input, check if it has a mapping
    622       and, if so, replace it with its mapping.
    623 
    624    2) Normalize -- Possibly normalize the result of step 1 using Unicode
    625       normalization.
    626 
    627    3) Prohibit -- Check for any characters that are not allowed in the
    628       output.  If any are found, return an error.
    629 
    630    4) Check bidi -- Possibly check for right-to-left characters, and if
    631       any are found, make sure that the whole string satisfies the
    632       requirements for bidirectional strings.  If the string does not
    633       satisfy the requirements for bidirectional strings, return an
    634       error.
    635       [Unicode3.2] defines several bidirectional categories; each character
    636        has one bidirectional category assigned to it.  For the purposes of
    637        the requirements below, an "RandALCat character" is a character that
    638        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
    639        is a character that has Unicode bidirectional category "L".  Note
    640 
    641 
    642        that there are many characters which fall in neither of the above
    643        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
    644        this because they have bidirectional category "EN".
    645 
    646        In any profile that specifies bidirectional character handling, all
    647        three of the following requirements MUST be met:
    648 
    649        1) The characters in section 5.8 MUST be prohibited.
    650 
    651        2) If a string contains any RandALCat character, the string MUST NOT
    652           contain any LCat character.
    653 
    654        3) If a string contains any RandALCat character, a RandALCat
    655           character MUST be the first character of the string, and a
    656           RandALCat character MUST be the last character of the string.
    657 */
    658 
    659 #define MAX_STACK_BUFFER_SIZE 300
    660 
    661 
    662 U_CAPI int32_t U_EXPORT2
    663 usprep_prepare(   const UStringPrepProfile* profile,
    664                   const UChar* src, int32_t srcLength,
    665                   UChar* dest, int32_t destCapacity,
    666                   int32_t options,
    667                   UParseError* parseError,
    668                   UErrorCode* status ){
    669 
    670     // check error status
    671     if(status == NULL || U_FAILURE(*status)){
    672         return 0;
    673     }
    674 
    675     //check arguments
    676     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    677         *status=U_ILLEGAL_ARGUMENT_ERROR;
    678         return 0;
    679     }
    680 
    681     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
    682     UChar *b1 = b1Stack, *b2 = b2Stack;
    683     int32_t b1Len, b2Len=0,
    684             b1Capacity = MAX_STACK_BUFFER_SIZE ,
    685             b2Capacity = MAX_STACK_BUFFER_SIZE;
    686     uint16_t result;
    687     int32_t b2Index = 0;
    688     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    689     UBool leftToRight=FALSE, rightToLeft=FALSE;
    690     int32_t rtlPos =-1, ltrPos =-1;
    691 
    692     //get the string length
    693     if(srcLength == -1){
    694         srcLength = u_strlen(src);
    695     }
    696     // map
    697     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
    698 
    699     if(*status == U_BUFFER_OVERFLOW_ERROR){
    700         // redo processing of string
    701         /* we do not have enough room so grow the buffer*/
    702         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
    703         if(b1==NULL){
    704             *status = U_MEMORY_ALLOCATION_ERROR;
    705             goto CLEANUP;
    706         }
    707 
    708         *status = U_ZERO_ERROR; // reset error
    709 
    710         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
    711 
    712     }
    713 
    714     // normalize
    715     if(profile->doNFKC == TRUE){
    716         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
    717 
    718         if(*status == U_BUFFER_OVERFLOW_ERROR){
    719             // redo processing of string
    720             /* we do not have enough room so grow the buffer*/
    721             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
    722             if(b2==NULL){
    723                 *status = U_MEMORY_ALLOCATION_ERROR;
    724                 goto CLEANUP;
    725             }
    726 
    727             *status = U_ZERO_ERROR; // reset error
    728 
    729             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
    730 
    731         }
    732 
    733     }else{
    734         b2 = b1;
    735         b2Len = b1Len;
    736     }
    737 
    738 
    739     if(U_FAILURE(*status)){
    740         goto CLEANUP;
    741     }
    742 
    743     UChar32 ch;
    744     UStringPrepType type;
    745     int16_t value;
    746     UBool isIndex;
    747 
    748     // Prohibit and checkBiDi in one pass
    749     for(b2Index=0; b2Index<b2Len;){
    750 
    751         ch = 0;
    752 
    753         U16_NEXT(b2, b2Index, b2Len, ch);
    754 
    755         UTRIE_GET16(&profile->sprepTrie,ch,result);
    756 
    757         type = getValues(result, value, isIndex);
    758 
    759         if( type == USPREP_PROHIBITED ||
    760             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
    761            ){
    762             *status = U_STRINGPREP_PROHIBITED_ERROR;
    763             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
    764             goto CLEANUP;
    765         }
    766 
    767         if(profile->checkBiDi) {
    768             direction = ubidi_getClass(profile->bdp, ch);
    769             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
    770                 firstCharDir = direction;
    771             }
    772             if(direction == U_LEFT_TO_RIGHT){
    773                 leftToRight = TRUE;
    774                 ltrPos = b2Index-1;
    775             }
    776             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    777                 rightToLeft = TRUE;
    778                 rtlPos = b2Index-1;
    779             }
    780         }
    781     }
    782     if(profile->checkBiDi == TRUE){
    783         // satisfy 2
    784         if( leftToRight == TRUE && rightToLeft == TRUE){
    785             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    786             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
    787             goto CLEANUP;
    788         }
    789 
    790         //satisfy 3
    791         if( rightToLeft == TRUE &&
    792             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    793               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    794            ){
    795             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    796             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
    797             return FALSE;
    798         }
    799     }
    800     if(b2Len>0 && b2Len <= destCapacity){
    801         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
    802     }
    803 
    804 CLEANUP:
    805     if(b1!=b1Stack){
    806         uprv_free(b1);
    807         b1=NULL;
    808     }
    809 
    810     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
    811         uprv_free(b2);
    812         b2=NULL;
    813     }
    814     return u_terminateUChars(dest, destCapacity, b2Len, status);
    815 }
    816 
    817 
    818 /* data swapping ------------------------------------------------------------ */
    819 
    820 U_CAPI int32_t U_EXPORT2
    821 usprep_swap(const UDataSwapper *ds,
    822             const void *inData, int32_t length, void *outData,
    823             UErrorCode *pErrorCode) {
    824     const UDataInfo *pInfo;
    825     int32_t headerSize;
    826 
    827     const uint8_t *inBytes;
    828     uint8_t *outBytes;
    829 
    830     const int32_t *inIndexes;
    831     int32_t indexes[16];
    832 
    833     int32_t i, offset, count, size;
    834 
    835     /* udata_swapDataHeader checks the arguments */
    836     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    837     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    838         return 0;
    839     }
    840 
    841     /* check data format and format version */
    842     pInfo=(const UDataInfo *)((const char *)inData+4);
    843     if(!(
    844         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    845         pInfo->dataFormat[1]==0x50 &&
    846         pInfo->dataFormat[2]==0x52 &&
    847         pInfo->dataFormat[3]==0x50 &&
    848         pInfo->formatVersion[0]==3
    849     )) {
    850         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
    851                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    852                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    853                          pInfo->formatVersion[0]);
    854         *pErrorCode=U_UNSUPPORTED_ERROR;
    855         return 0;
    856     }
    857 
    858     inBytes=(const uint8_t *)inData+headerSize;
    859     outBytes=(uint8_t *)outData+headerSize;
    860 
    861     inIndexes=(const int32_t *)inBytes;
    862 
    863     if(length>=0) {
    864         length-=headerSize;
    865         if(length<16*4) {
    866             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
    867                              length);
    868             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    869             return 0;
    870         }
    871     }
    872 
    873     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
    874     for(i=0; i<16; ++i) {
    875         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    876     }
    877 
    878     /* calculate the total length of the data */
    879     size=
    880         16*4+ /* size of indexes[] */
    881         indexes[_SPREP_INDEX_TRIE_SIZE]+
    882         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    883 
    884     if(length>=0) {
    885         if(length<size) {
    886             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
    887                              length);
    888             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    889             return 0;
    890         }
    891 
    892         /* copy the data for inaccessible bytes */
    893         if(inBytes!=outBytes) {
    894             uprv_memcpy(outBytes, inBytes, size);
    895         }
    896 
    897         offset=0;
    898 
    899         /* swap the int32_t indexes[] */
    900         count=16*4;
    901         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    902         offset+=count;
    903 
    904         /* swap the UTrie */
    905         count=indexes[_SPREP_INDEX_TRIE_SIZE];
    906         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    907         offset+=count;
    908 
    909         /* swap the uint16_t mappingTable[] */
    910         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    911         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    912         offset+=count;
    913     }
    914 
    915     return headerSize+size;
    916 }
    917 
    918 #endif /* #if !UCONFIG_NO_IDNA */
    919