Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4  *******************************************************************************
      5  *
      6  *   Copyright (C) 2003-2016, International Business Machines
      7  *   Corporation and others.  All Rights Reserved.
      8  *
      9  *******************************************************************************
     10  *   file name:  usprep.cpp
     11  *   encoding:   UTF-8
     12  *   tab size:   8 (not used)
     13  *   indentation:4
     14  *
     15  *   created on: 2003jul2
     16  *   created by: Ram Viswanadha
     17  */
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_IDNA
     22 
     23 #include "unicode/usprep.h"
     24 
     25 #include "unicode/normalizer2.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/uchar.h"
     28 #include "unicode/uversion.h"
     29 #include "umutex.h"
     30 #include "cmemory.h"
     31 #include "sprpimpl.h"
     32 #include "ustr_imp.h"
     33 #include "uhash.h"
     34 #include "cstring.h"
     35 #include "udataswp.h"
     36 #include "ucln_cmn.h"
     37 #include "ubidi_props.h"
     38 #include "uprops.h"
     39 
     40 U_NAMESPACE_USE
     41 
     42 U_CDECL_BEGIN
     43 
     44 /*
     45 Static cache for already opened StringPrep profiles
     46 */
     47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
     48 static icu::UInitOnce gSharedDataInitOnce;
     49 
     50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
     51 
     52 /* format version of spp file */
     53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     54 
     55 /* the Unicode version of the sprep data */
     56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     57 
     58 /* Profile names must be aligned to UStringPrepProfileType */
     59 static const char * const PROFILE_NAMES[] = {
     60     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
     61     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
     62     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
     63     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
     64     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
     65     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
     66     "rfc3722",      /* USPREP_RFC3722_ISCSI */
     67     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
     68     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
     69     "rfc4011",      /* USPREP_RFC4011_MIB */
     70     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
     71     "rfc4505",      /* USPREP_RFC4505_TRACE */
     72     "rfc4518",      /* USPREP_RFC4518_LDAP */
     73     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
     74 };
     75 
     76 static UBool U_CALLCONV
     77 isSPrepAcceptable(void * /* context */,
     78              const char * /* type */,
     79              const char * /* name */,
     80              const UDataInfo *pInfo) {
     81     if(
     82         pInfo->size>=20 &&
     83         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     84         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     85         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
     86         pInfo->dataFormat[1]==0x50 &&
     87         pInfo->dataFormat[2]==0x52 &&
     88         pInfo->dataFormat[3]==0x50 &&
     89         pInfo->formatVersion[0]==3 &&
     90         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     91         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     92     ) {
     93         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     94         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     95         return TRUE;
     96     } else {
     97         return FALSE;
     98     }
     99 }
    100 
    101 static int32_t U_CALLCONV
    102 getSPrepFoldingOffset(uint32_t data) {
    103 
    104     return (int32_t)data;
    105 
    106 }
    107 
    108 /* hashes an entry  */
    109 static int32_t U_CALLCONV
    110 hashEntry(const UHashTok parm) {
    111     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
    112     UHashTok namekey, pathkey;
    113     namekey.pointer = b->name;
    114     pathkey.pointer = b->path;
    115     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
    116 }
    117 
    118 /* compares two entries */
    119 static UBool U_CALLCONV
    120 compareEntries(const UHashTok p1, const UHashTok p2) {
    121     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
    122     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
    123     UHashTok name1, name2, path1, path2;
    124     name1.pointer = b1->name;
    125     name2.pointer = b2->name;
    126     path1.pointer = b1->path;
    127     path2.pointer = b2->path;
    128     return ((UBool)(uhash_compareChars(name1, name2) &
    129         uhash_compareChars(path1, path2)));
    130 }
    131 
    132 static void
    133 usprep_unload(UStringPrepProfile* data){
    134     udata_close(data->sprepData);
    135 }
    136 
    137 static int32_t
    138 usprep_internal_flushCache(UBool noRefCount){
    139     UStringPrepProfile *profile = NULL;
    140     UStringPrepKey  *key  = NULL;
    141     int32_t pos = UHASH_FIRST;
    142     int32_t deletedNum = 0;
    143     const UHashElement *e;
    144 
    145     /*
    146      * if shared data hasn't even been lazy evaluated yet
    147      * return 0
    148      */
    149     umtx_lock(&usprepMutex);
    150     if (SHARED_DATA_HASHTABLE == NULL) {
    151         umtx_unlock(&usprepMutex);
    152         return 0;
    153     }
    154 
    155     /*creates an enumeration to iterate through every element in the table */
    156     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
    157     {
    158         profile = (UStringPrepProfile *) e->value.pointer;
    159         key  = (UStringPrepKey *) e->key.pointer;
    160 
    161         if ((noRefCount== FALSE && profile->refCount == 0) ||
    162              noRefCount== TRUE) {
    163             deletedNum++;
    164             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
    165 
    166             /* unload the data */
    167             usprep_unload(profile);
    168 
    169             if(key->name != NULL) {
    170                 uprv_free(key->name);
    171                 key->name=NULL;
    172             }
    173             if(key->path != NULL) {
    174                 uprv_free(key->path);
    175                 key->path=NULL;
    176             }
    177             uprv_free(profile);
    178             uprv_free(key);
    179         }
    180 
    181     }
    182     umtx_unlock(&usprepMutex);
    183 
    184     return deletedNum;
    185 }
    186 
    187 /* Works just like ucnv_flushCache()
    188 static int32_t
    189 usprep_flushCache(){
    190     return usprep_internal_flushCache(FALSE);
    191 }
    192 */
    193 
    194 static UBool U_CALLCONV usprep_cleanup(void){
    195     if (SHARED_DATA_HASHTABLE != NULL) {
    196         usprep_internal_flushCache(TRUE);
    197         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
    198             uhash_close(SHARED_DATA_HASHTABLE);
    199             SHARED_DATA_HASHTABLE = NULL;
    200         }
    201     }
    202     gSharedDataInitOnce.reset();
    203     return (SHARED_DATA_HASHTABLE == NULL);
    204 }
    205 U_CDECL_END
    206 
    207 
    208 /** Initializes the cache for resources */
    209 static void U_CALLCONV
    210 createCache(UErrorCode &status) {
    211     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
    212     if (U_FAILURE(status)) {
    213         SHARED_DATA_HASHTABLE = NULL;
    214     }
    215     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
    216 }
    217 
    218 static void
    219 initCache(UErrorCode *status) {
    220     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
    221 }
    222 
    223 static UBool U_CALLCONV
    224 loadData(UStringPrepProfile* profile,
    225          const char* path,
    226          const char* name,
    227          const char* type,
    228          UErrorCode* errorCode) {
    229     /* load Unicode SPREP data from file */
    230     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    231     UDataMemory *dataMemory;
    232     const int32_t *p=NULL;
    233     const uint8_t *pb;
    234     UVersionInfo normUnicodeVersion;
    235     int32_t normUniVer, sprepUniVer, normCorrVer;
    236 
    237     if(errorCode==NULL || U_FAILURE(*errorCode)) {
    238         return 0;
    239     }
    240 
    241     /* open the data outside the mutex block */
    242     //TODO: change the path
    243     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    244     if(U_FAILURE(*errorCode)) {
    245         return FALSE;
    246     }
    247 
    248     p=(const int32_t *)udata_getMemory(dataMemory);
    249     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    250     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    251     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
    252 
    253 
    254     if(U_FAILURE(*errorCode)) {
    255         udata_close(dataMemory);
    256         return FALSE;
    257     }
    258 
    259     /* in the mutex block, set the data for this process */
    260     umtx_lock(&usprepMutex);
    261     if(profile->sprepData==NULL) {
    262         profile->sprepData=dataMemory;
    263         dataMemory=NULL;
    264         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
    265         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    266     } else {
    267         p=(const int32_t *)udata_getMemory(profile->sprepData);
    268     }
    269     umtx_unlock(&usprepMutex);
    270     /* initialize some variables */
    271     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    272 
    273     u_getUnicodeVersion(normUnicodeVersion);
    274     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
    275                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    276     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
    277                   (dataVersion[2] << 8 ) + (dataVersion[3]);
    278     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    279 
    280     if(U_FAILURE(*errorCode)){
    281         udata_close(dataMemory);
    282         return FALSE;
    283     }
    284     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
    285         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
    286         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
    287       ){
    288         *errorCode = U_INVALID_FORMAT_ERROR;
    289         udata_close(dataMemory);
    290         return FALSE;
    291     }
    292     profile->isDataLoaded = TRUE;
    293 
    294     /* if a different thread set it first, then close the extra data */
    295     if(dataMemory!=NULL) {
    296         udata_close(dataMemory); /* NULL if it was set correctly */
    297     }
    298 
    299 
    300     return profile->isDataLoaded;
    301 }
    302 
    303 static UStringPrepProfile*
    304 usprep_getProfile(const char* path,
    305                   const char* name,
    306                   UErrorCode *status){
    307 
    308     UStringPrepProfile* profile = NULL;
    309 
    310     initCache(status);
    311 
    312     if(U_FAILURE(*status)){
    313         return NULL;
    314     }
    315 
    316     UStringPrepKey stackKey;
    317     /*
    318      * const is cast way to save malloc, strcpy and free calls
    319      * we use the passed in pointers for fetching the data from the
    320      * hash table which is safe
    321      */
    322     stackKey.name = (char*) name;
    323     stackKey.path = (char*) path;
    324 
    325     /* fetch the data from the cache */
    326     umtx_lock(&usprepMutex);
    327     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    328     if(profile != NULL) {
    329         profile->refCount++;
    330     }
    331     umtx_unlock(&usprepMutex);
    332 
    333     if(profile == NULL) {
    334         /* else load the data and put the data in the cache */
    335         LocalMemory<UStringPrepProfile> newProfile;
    336         if(newProfile.allocateInsteadAndReset() == NULL) {
    337             *status = U_MEMORY_ALLOCATION_ERROR;
    338             return NULL;
    339         }
    340 
    341         /* load the data */
    342         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
    343             return NULL;
    344         }
    345 
    346         /* get the options */
    347         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
    348         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
    349 
    350         if(newProfile->checkBiDi) {
    351             newProfile->bdp = ubidi_getSingleton();
    352         }
    353 
    354         LocalMemory<UStringPrepKey> key;
    355         LocalMemory<char> keyName;
    356         LocalMemory<char> keyPath;
    357         if( key.allocateInsteadAndReset() == NULL ||
    358             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
    359             (path != NULL &&
    360              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
    361          ) {
    362             *status = U_MEMORY_ALLOCATION_ERROR;
    363             usprep_unload(newProfile.getAlias());
    364             return NULL;
    365         }
    366 
    367         umtx_lock(&usprepMutex);
    368         // If another thread already inserted the same key/value, refcount and cleanup our thread data
    369         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    370         if(profile != NULL) {
    371             profile->refCount++;
    372             usprep_unload(newProfile.getAlias());
    373         }
    374         else {
    375             /* initialize the key members */
    376             key->name = keyName.orphan();
    377             uprv_strcpy(key->name, name);
    378             if(path != NULL){
    379                 key->path = keyPath.orphan();
    380                 uprv_strcpy(key->path, path);
    381             }
    382             profile = newProfile.orphan();
    383 
    384             /* add the data object to the cache */
    385             profile->refCount = 1;
    386             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
    387         }
    388         umtx_unlock(&usprepMutex);
    389     }
    390 
    391     return profile;
    392 }
    393 
    394 U_CAPI UStringPrepProfile* U_EXPORT2
    395 usprep_open(const char* path,
    396             const char* name,
    397             UErrorCode* status){
    398 
    399     if(status == NULL || U_FAILURE(*status)){
    400         return NULL;
    401     }
    402 
    403     /* initialize the profile struct members */
    404     return usprep_getProfile(path,name,status);
    405 }
    406 
    407 U_CAPI UStringPrepProfile* U_EXPORT2
    408 usprep_openByType(UStringPrepProfileType type,
    409 				  UErrorCode* status) {
    410     if(status == NULL || U_FAILURE(*status)){
    411         return NULL;
    412     }
    413     int32_t index = (int32_t)type;
    414     if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
    415         *status = U_ILLEGAL_ARGUMENT_ERROR;
    416         return NULL;
    417     }
    418     return usprep_open(NULL, PROFILE_NAMES[index], status);
    419 }
    420 
    421 U_CAPI void U_EXPORT2
    422 usprep_close(UStringPrepProfile* profile){
    423     if(profile==NULL){
    424         return;
    425     }
    426 
    427     umtx_lock(&usprepMutex);
    428     /* decrement the ref count*/
    429     if(profile->refCount > 0){
    430         profile->refCount--;
    431     }
    432     umtx_unlock(&usprepMutex);
    433 
    434 }
    435 
    436 U_CFUNC void
    437 uprv_syntaxError(const UChar* rules,
    438                  int32_t pos,
    439                  int32_t rulesLen,
    440                  UParseError* parseError){
    441     if(parseError == NULL){
    442         return;
    443     }
    444     parseError->offset = pos;
    445     parseError->line = 0 ; // we are not using line numbers
    446 
    447     // for pre-context
    448     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    449     int32_t limit = pos;
    450 
    451     u_memcpy(parseError->preContext,rules+start,limit-start);
    452     //null terminate the buffer
    453     parseError->preContext[limit-start] = 0;
    454 
    455     // for post-context; include error rules[pos]
    456     start = pos;
    457     limit = start + (U_PARSE_CONTEXT_LEN-1);
    458     if (limit > rulesLen) {
    459         limit = rulesLen;
    460     }
    461     if (start < rulesLen) {
    462         u_memcpy(parseError->postContext,rules+start,limit-start);
    463     }
    464     //null terminate the buffer
    465     parseError->postContext[limit-start]= 0;
    466 }
    467 
    468 
    469 static inline UStringPrepType
    470 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
    471 
    472     UStringPrepType type;
    473     if(trieWord == 0){
    474         /*
    475          * Initial value stored in the mapping table
    476          * just return USPREP_TYPE_LIMIT .. so that
    477          * the source codepoint is copied to the destination
    478          */
    479         type = USPREP_TYPE_LIMIT;
    480         isIndex =FALSE;
    481         value = 0;
    482     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
    483         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
    484         isIndex =FALSE;
    485         value = 0;
    486     }else{
    487         /* get the type */
    488         type = USPREP_MAP;
    489         /* ascertain if the value is index or delta */
    490         if(trieWord & 0x02){
    491             isIndex = TRUE;
    492             value = trieWord  >> 2; //mask off the lower 2 bits and shift
    493         }else{
    494             isIndex = FALSE;
    495             value = (int16_t)trieWord;
    496             value =  (value >> 2);
    497         }
    498 
    499         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
    500             type = USPREP_DELETE;
    501             isIndex =FALSE;
    502             value = 0;
    503         }
    504     }
    505     return type;
    506 }
    507 
    508 // TODO: change to writing to UnicodeString not UChar *
    509 static int32_t
    510 usprep_map(  const UStringPrepProfile* profile,
    511              const UChar* src, int32_t srcLength,
    512              UChar* dest, int32_t destCapacity,
    513              int32_t options,
    514              UParseError* parseError,
    515              UErrorCode* status ){
    516 
    517     uint16_t result;
    518     int32_t destIndex=0;
    519     int32_t srcIndex;
    520     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
    521     UStringPrepType type;
    522     int16_t value;
    523     UBool isIndex;
    524     const int32_t* indexes = profile->indexes;
    525 
    526     // no error checking the caller check for error and arguments
    527     // no string length check the caller finds out the string length
    528 
    529     for(srcIndex=0;srcIndex<srcLength;){
    530         UChar32 ch;
    531 
    532         U16_NEXT(src,srcIndex,srcLength,ch);
    533 
    534         result=0;
    535 
    536         UTRIE_GET16(&profile->sprepTrie,ch,result);
    537 
    538         type = getValues(result, value, isIndex);
    539 
    540         // check if the source codepoint is unassigned
    541         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
    542 
    543             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
    544             *status = U_STRINGPREP_UNASSIGNED_ERROR;
    545             return 0;
    546 
    547         }else if(type == USPREP_MAP){
    548 
    549             int32_t index, length;
    550 
    551             if(isIndex){
    552                 index = value;
    553                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    554                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    555                     length = 1;
    556                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    557                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    558                     length = 2;
    559                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    560                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    561                     length = 3;
    562                 }else{
    563                     length = profile->mappingData[index++];
    564 
    565                 }
    566 
    567                 /* copy mapping to destination */
    568                 for(int32_t i=0; i< length; i++){
    569                     if(destIndex < destCapacity  ){
    570                         dest[destIndex] = profile->mappingData[index+i];
    571                     }
    572                     destIndex++; /* for pre-flighting */
    573                 }
    574                 continue;
    575             }else{
    576                 // subtract the delta to arrive at the code point
    577                 ch -= value;
    578             }
    579 
    580         }else if(type==USPREP_DELETE){
    581              // just consume the codepoint and contine
    582             continue;
    583         }
    584         //copy the code point into destination
    585         if(ch <= 0xFFFF){
    586             if(destIndex < destCapacity ){
    587                 dest[destIndex] = (UChar)ch;
    588             }
    589             destIndex++;
    590         }else{
    591             if(destIndex+1 < destCapacity ){
    592                 dest[destIndex]   = U16_LEAD(ch);
    593                 dest[destIndex+1] = U16_TRAIL(ch);
    594             }
    595             destIndex +=2;
    596         }
    597 
    598     }
    599 
    600     return u_terminateUChars(dest, destCapacity, destIndex, status);
    601 }
    602 
    603 /*
    604    1) Map -- For each character in the input, check if it has a mapping
    605       and, if so, replace it with its mapping.
    606 
    607    2) Normalize -- Possibly normalize the result of step 1 using Unicode
    608       normalization.
    609 
    610    3) Prohibit -- Check for any characters that are not allowed in the
    611       output.  If any are found, return an error.
    612 
    613    4) Check bidi -- Possibly check for right-to-left characters, and if
    614       any are found, make sure that the whole string satisfies the
    615       requirements for bidirectional strings.  If the string does not
    616       satisfy the requirements for bidirectional strings, return an
    617       error.
    618       [Unicode3.2] defines several bidirectional categories; each character
    619        has one bidirectional category assigned to it.  For the purposes of
    620        the requirements below, an "RandALCat character" is a character that
    621        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
    622        is a character that has Unicode bidirectional category "L".  Note
    623 
    624 
    625        that there are many characters which fall in neither of the above
    626        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
    627        this because they have bidirectional category "EN".
    628 
    629        In any profile that specifies bidirectional character handling, all
    630        three of the following requirements MUST be met:
    631 
    632        1) The characters in section 5.8 MUST be prohibited.
    633 
    634        2) If a string contains any RandALCat character, the string MUST NOT
    635           contain any LCat character.
    636 
    637        3) If a string contains any RandALCat character, a RandALCat
    638           character MUST be the first character of the string, and a
    639           RandALCat character MUST be the last character of the string.
    640 */
    641 U_CAPI int32_t U_EXPORT2
    642 usprep_prepare(   const UStringPrepProfile* profile,
    643                   const UChar* src, int32_t srcLength,
    644                   UChar* dest, int32_t destCapacity,
    645                   int32_t options,
    646                   UParseError* parseError,
    647                   UErrorCode* status ){
    648 
    649     // check error status
    650     if(U_FAILURE(*status)){
    651         return 0;
    652     }
    653 
    654     //check arguments
    655     if(profile==NULL ||
    656             (src==NULL ? srcLength!=0 : srcLength<-1) ||
    657             (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
    658         *status=U_ILLEGAL_ARGUMENT_ERROR;
    659         return 0;
    660     }
    661 
    662     //get the string length
    663     if(srcLength < 0){
    664         srcLength = u_strlen(src);
    665     }
    666     // map
    667     UnicodeString s1;
    668     UChar *b1 = s1.getBuffer(srcLength);
    669     if(b1==NULL){
    670         *status = U_MEMORY_ALLOCATION_ERROR;
    671         return 0;
    672     }
    673     int32_t b1Len = usprep_map(profile, src, srcLength,
    674                                b1, s1.getCapacity(), options, parseError, status);
    675     s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
    676 
    677     if(*status == U_BUFFER_OVERFLOW_ERROR){
    678         // redo processing of string
    679         /* we do not have enough room so grow the buffer*/
    680         b1 = s1.getBuffer(b1Len);
    681         if(b1==NULL){
    682             *status = U_MEMORY_ALLOCATION_ERROR;
    683             return 0;
    684         }
    685 
    686         *status = U_ZERO_ERROR; // reset error
    687         b1Len = usprep_map(profile, src, srcLength,
    688                            b1, s1.getCapacity(), options, parseError, status);
    689         s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
    690     }
    691     if(U_FAILURE(*status)){
    692         return 0;
    693     }
    694 
    695     // normalize
    696     UnicodeString s2;
    697     if(profile->doNFKC){
    698         const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
    699         FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
    700         if(U_FAILURE(*status)){
    701             return 0;
    702         }
    703         fn2.normalize(s1, s2, *status);
    704     }else{
    705         s2.fastCopyFrom(s1);
    706     }
    707     if(U_FAILURE(*status)){
    708         return 0;
    709     }
    710 
    711     // Prohibit and checkBiDi in one pass
    712     const UChar *b2 = s2.getBuffer();
    713     int32_t b2Len = s2.length();
    714     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    715     UBool leftToRight=FALSE, rightToLeft=FALSE;
    716     int32_t rtlPos =-1, ltrPos =-1;
    717 
    718     for(int32_t b2Index=0; b2Index<b2Len;){
    719         UChar32 ch = 0;
    720         U16_NEXT(b2, b2Index, b2Len, ch);
    721 
    722         uint16_t result;
    723         UTRIE_GET16(&profile->sprepTrie,ch,result);
    724 
    725         int16_t value;
    726         UBool isIndex;
    727         UStringPrepType type = getValues(result, value, isIndex);
    728 
    729         if( type == USPREP_PROHIBITED ||
    730             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
    731            ){
    732             *status = U_STRINGPREP_PROHIBITED_ERROR;
    733             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
    734             return 0;
    735         }
    736 
    737         if(profile->checkBiDi) {
    738             direction = ubidi_getClass(profile->bdp, ch);
    739             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
    740                 firstCharDir = direction;
    741             }
    742             if(direction == U_LEFT_TO_RIGHT){
    743                 leftToRight = TRUE;
    744                 ltrPos = b2Index-1;
    745             }
    746             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    747                 rightToLeft = TRUE;
    748                 rtlPos = b2Index-1;
    749             }
    750         }
    751     }
    752     if(profile->checkBiDi == TRUE){
    753         // satisfy 2
    754         if( leftToRight == TRUE && rightToLeft == TRUE){
    755             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    756             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
    757             return 0;
    758         }
    759 
    760         //satisfy 3
    761         if( rightToLeft == TRUE &&
    762             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    763               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    764            ){
    765             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    766             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
    767             return FALSE;
    768         }
    769     }
    770     return s2.extract(dest, destCapacity, *status);
    771 }
    772 
    773 
    774 /* data swapping ------------------------------------------------------------ */
    775 
    776 U_CAPI int32_t U_EXPORT2
    777 usprep_swap(const UDataSwapper *ds,
    778             const void *inData, int32_t length, void *outData,
    779             UErrorCode *pErrorCode) {
    780     const UDataInfo *pInfo;
    781     int32_t headerSize;
    782 
    783     const uint8_t *inBytes;
    784     uint8_t *outBytes;
    785 
    786     const int32_t *inIndexes;
    787     int32_t indexes[16];
    788 
    789     int32_t i, offset, count, size;
    790 
    791     /* udata_swapDataHeader checks the arguments */
    792     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    793     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    794         return 0;
    795     }
    796 
    797     /* check data format and format version */
    798     pInfo=(const UDataInfo *)((const char *)inData+4);
    799     if(!(
    800         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    801         pInfo->dataFormat[1]==0x50 &&
    802         pInfo->dataFormat[2]==0x52 &&
    803         pInfo->dataFormat[3]==0x50 &&
    804         pInfo->formatVersion[0]==3
    805     )) {
    806         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
    807                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    808                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    809                          pInfo->formatVersion[0]);
    810         *pErrorCode=U_UNSUPPORTED_ERROR;
    811         return 0;
    812     }
    813 
    814     inBytes=(const uint8_t *)inData+headerSize;
    815     outBytes=(uint8_t *)outData+headerSize;
    816 
    817     inIndexes=(const int32_t *)inBytes;
    818 
    819     if(length>=0) {
    820         length-=headerSize;
    821         if(length<16*4) {
    822             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
    823                              length);
    824             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    825             return 0;
    826         }
    827     }
    828 
    829     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
    830     for(i=0; i<16; ++i) {
    831         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    832     }
    833 
    834     /* calculate the total length of the data */
    835     size=
    836         16*4+ /* size of indexes[] */
    837         indexes[_SPREP_INDEX_TRIE_SIZE]+
    838         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    839 
    840     if(length>=0) {
    841         if(length<size) {
    842             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
    843                              length);
    844             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    845             return 0;
    846         }
    847 
    848         /* copy the data for inaccessible bytes */
    849         if(inBytes!=outBytes) {
    850             uprv_memcpy(outBytes, inBytes, size);
    851         }
    852 
    853         offset=0;
    854 
    855         /* swap the int32_t indexes[] */
    856         count=16*4;
    857         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    858         offset+=count;
    859 
    860         /* swap the UTrie */
    861         count=indexes[_SPREP_INDEX_TRIE_SIZE];
    862         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    863         offset+=count;
    864 
    865         /* swap the uint16_t mappingTable[] */
    866         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    867         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    868         //offset+=count;
    869     }
    870 
    871     return headerSize+size;
    872 }
    873 
    874 #endif /* #if !UCONFIG_NO_IDNA */
    875