Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4  *******************************************************************************
      5  *
      6  *   Copyright (C) 2003-2016, International Business Machines
      7  *   Corporation and others.  All Rights Reserved.
      8  *
      9  *******************************************************************************
     10  *   file name:  usprep.cpp
     11  *   encoding:   UTF-8
     12  *   tab size:   8 (not used)
     13  *   indentation:4
     14  *
     15  *   created on: 2003jul2
     16  *   created by: Ram Viswanadha
     17  */
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_IDNA
     22 
     23 #include "unicode/usprep.h"
     24 
     25 #include "unicode/normalizer2.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/uchar.h"
     28 #include "unicode/uversion.h"
     29 #include "umutex.h"
     30 #include "cmemory.h"
     31 #include "sprpimpl.h"
     32 #include "ustr_imp.h"
     33 #include "uhash.h"
     34 #include "cstring.h"
     35 #include "udataswp.h"
     36 #include "ucln_cmn.h"
     37 #include "ubidi_props.h"
     38 #include "uprops.h"
     39 
     40 U_NAMESPACE_USE
     41 
     42 U_CDECL_BEGIN
     43 
     44 /*
     45 Static cache for already opened StringPrep profiles
     46 */
     47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
     48 static icu::UInitOnce gSharedDataInitOnce;
     49 
     50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
     51 
     52 /* format version of spp file */
     53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     54 
     55 /* the Unicode version of the sprep data */
     56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     57 
     58 /* Profile names must be aligned to UStringPrepProfileType */
     59 static const char * const PROFILE_NAMES[] = {
     60     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
     61     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
     62     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
     63     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
     64     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
     65     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
     66     "rfc3722",      /* USPREP_RFC3722_ISCSI */
     67     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
     68     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
     69     "rfc4011",      /* USPREP_RFC4011_MIB */
     70     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
     71     "rfc4505",      /* USPREP_RFC4505_TRACE */
     72     "rfc4518",      /* USPREP_RFC4518_LDAP */
     73     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
     74 };
     75 
     76 static UBool U_CALLCONV
     77 isSPrepAcceptable(void * /* context */,
     78              const char * /* type */,
     79              const char * /* name */,
     80              const UDataInfo *pInfo) {
     81     if(
     82         pInfo->size>=20 &&
     83         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     84         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     85         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
     86         pInfo->dataFormat[1]==0x50 &&
     87         pInfo->dataFormat[2]==0x52 &&
     88         pInfo->dataFormat[3]==0x50 &&
     89         pInfo->formatVersion[0]==3 &&
     90         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     91         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     92     ) {
     93         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     94         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     95         return TRUE;
     96     } else {
     97         return FALSE;
     98     }
     99 }
    100 
    101 static int32_t U_CALLCONV
    102 getSPrepFoldingOffset(uint32_t data) {
    103 
    104     return (int32_t)data;
    105 
    106 }
    107 
    108 /* hashes an entry  */
    109 static int32_t U_CALLCONV
    110 hashEntry(const UHashTok parm) {
    111     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
    112     UHashTok namekey, pathkey;
    113     namekey.pointer = b->name;
    114     pathkey.pointer = b->path;
    115     uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
    116             37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
    117     return static_cast<int32_t>(unsignedHash);
    118 }
    119 
    120 /* compares two entries */
    121 static UBool U_CALLCONV
    122 compareEntries(const UHashTok p1, const UHashTok p2) {
    123     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
    124     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
    125     UHashTok name1, name2, path1, path2;
    126     name1.pointer = b1->name;
    127     name2.pointer = b2->name;
    128     path1.pointer = b1->path;
    129     path2.pointer = b2->path;
    130     return ((UBool)(uhash_compareChars(name1, name2) &
    131         uhash_compareChars(path1, path2)));
    132 }
    133 
    134 static void
    135 usprep_unload(UStringPrepProfile* data){
    136     udata_close(data->sprepData);
    137 }
    138 
    139 static int32_t
    140 usprep_internal_flushCache(UBool noRefCount){
    141     UStringPrepProfile *profile = NULL;
    142     UStringPrepKey  *key  = NULL;
    143     int32_t pos = UHASH_FIRST;
    144     int32_t deletedNum = 0;
    145     const UHashElement *e;
    146 
    147     /*
    148      * if shared data hasn't even been lazy evaluated yet
    149      * return 0
    150      */
    151     umtx_lock(&usprepMutex);
    152     if (SHARED_DATA_HASHTABLE == NULL) {
    153         umtx_unlock(&usprepMutex);
    154         return 0;
    155     }
    156 
    157     /*creates an enumeration to iterate through every element in the table */
    158     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
    159     {
    160         profile = (UStringPrepProfile *) e->value.pointer;
    161         key  = (UStringPrepKey *) e->key.pointer;
    162 
    163         if ((noRefCount== FALSE && profile->refCount == 0) ||
    164              noRefCount== TRUE) {
    165             deletedNum++;
    166             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
    167 
    168             /* unload the data */
    169             usprep_unload(profile);
    170 
    171             if(key->name != NULL) {
    172                 uprv_free(key->name);
    173                 key->name=NULL;
    174             }
    175             if(key->path != NULL) {
    176                 uprv_free(key->path);
    177                 key->path=NULL;
    178             }
    179             uprv_free(profile);
    180             uprv_free(key);
    181         }
    182 
    183     }
    184     umtx_unlock(&usprepMutex);
    185 
    186     return deletedNum;
    187 }
    188 
    189 /* Works just like ucnv_flushCache()
    190 static int32_t
    191 usprep_flushCache(){
    192     return usprep_internal_flushCache(FALSE);
    193 }
    194 */
    195 
    196 static UBool U_CALLCONV usprep_cleanup(void){
    197     if (SHARED_DATA_HASHTABLE != NULL) {
    198         usprep_internal_flushCache(TRUE);
    199         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
    200             uhash_close(SHARED_DATA_HASHTABLE);
    201             SHARED_DATA_HASHTABLE = NULL;
    202         }
    203     }
    204     gSharedDataInitOnce.reset();
    205     return (SHARED_DATA_HASHTABLE == NULL);
    206 }
    207 U_CDECL_END
    208 
    209 
    210 /** Initializes the cache for resources */
    211 static void U_CALLCONV
    212 createCache(UErrorCode &status) {
    213     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
    214     if (U_FAILURE(status)) {
    215         SHARED_DATA_HASHTABLE = NULL;
    216     }
    217     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
    218 }
    219 
    220 static void
    221 initCache(UErrorCode *status) {
    222     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
    223 }
    224 
    225 static UBool U_CALLCONV
    226 loadData(UStringPrepProfile* profile,
    227          const char* path,
    228          const char* name,
    229          const char* type,
    230          UErrorCode* errorCode) {
    231     /* load Unicode SPREP data from file */
    232     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    233     UDataMemory *dataMemory;
    234     const int32_t *p=NULL;
    235     const uint8_t *pb;
    236     UVersionInfo normUnicodeVersion;
    237     int32_t normUniVer, sprepUniVer, normCorrVer;
    238 
    239     if(errorCode==NULL || U_FAILURE(*errorCode)) {
    240         return 0;
    241     }
    242 
    243     /* open the data outside the mutex block */
    244     //TODO: change the path
    245     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    246     if(U_FAILURE(*errorCode)) {
    247         return FALSE;
    248     }
    249 
    250     p=(const int32_t *)udata_getMemory(dataMemory);
    251     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    252     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    253     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
    254 
    255 
    256     if(U_FAILURE(*errorCode)) {
    257         udata_close(dataMemory);
    258         return FALSE;
    259     }
    260 
    261     /* in the mutex block, set the data for this process */
    262     umtx_lock(&usprepMutex);
    263     if(profile->sprepData==NULL) {
    264         profile->sprepData=dataMemory;
    265         dataMemory=NULL;
    266         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
    267         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    268     } else {
    269         p=(const int32_t *)udata_getMemory(profile->sprepData);
    270     }
    271     umtx_unlock(&usprepMutex);
    272     /* initialize some variables */
    273     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    274 
    275     u_getUnicodeVersion(normUnicodeVersion);
    276     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
    277                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    278     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
    279                   (dataVersion[2] << 8 ) + (dataVersion[3]);
    280     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    281 
    282     if(U_FAILURE(*errorCode)){
    283         udata_close(dataMemory);
    284         return FALSE;
    285     }
    286     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
    287         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
    288         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
    289       ){
    290         *errorCode = U_INVALID_FORMAT_ERROR;
    291         udata_close(dataMemory);
    292         return FALSE;
    293     }
    294     profile->isDataLoaded = TRUE;
    295 
    296     /* if a different thread set it first, then close the extra data */
    297     if(dataMemory!=NULL) {
    298         udata_close(dataMemory); /* NULL if it was set correctly */
    299     }
    300 
    301 
    302     return profile->isDataLoaded;
    303 }
    304 
    305 static UStringPrepProfile*
    306 usprep_getProfile(const char* path,
    307                   const char* name,
    308                   UErrorCode *status){
    309 
    310     UStringPrepProfile* profile = NULL;
    311 
    312     initCache(status);
    313 
    314     if(U_FAILURE(*status)){
    315         return NULL;
    316     }
    317 
    318     UStringPrepKey stackKey;
    319     /*
    320      * const is cast way to save malloc, strcpy and free calls
    321      * we use the passed in pointers for fetching the data from the
    322      * hash table which is safe
    323      */
    324     stackKey.name = (char*) name;
    325     stackKey.path = (char*) path;
    326 
    327     /* fetch the data from the cache */
    328     umtx_lock(&usprepMutex);
    329     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    330     if(profile != NULL) {
    331         profile->refCount++;
    332     }
    333     umtx_unlock(&usprepMutex);
    334 
    335     if(profile == NULL) {
    336         /* else load the data and put the data in the cache */
    337         LocalMemory<UStringPrepProfile> newProfile;
    338         if(newProfile.allocateInsteadAndReset() == NULL) {
    339             *status = U_MEMORY_ALLOCATION_ERROR;
    340             return NULL;
    341         }
    342 
    343         /* load the data */
    344         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
    345             return NULL;
    346         }
    347 
    348         /* get the options */
    349         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
    350         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
    351 
    352         LocalMemory<UStringPrepKey> key;
    353         LocalMemory<char> keyName;
    354         LocalMemory<char> keyPath;
    355         if( key.allocateInsteadAndReset() == NULL ||
    356             keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
    357             (path != NULL &&
    358              keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
    359          ) {
    360             *status = U_MEMORY_ALLOCATION_ERROR;
    361             usprep_unload(newProfile.getAlias());
    362             return NULL;
    363         }
    364 
    365         umtx_lock(&usprepMutex);
    366         // If another thread already inserted the same key/value, refcount and cleanup our thread data
    367         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    368         if(profile != NULL) {
    369             profile->refCount++;
    370             usprep_unload(newProfile.getAlias());
    371         }
    372         else {
    373             /* initialize the key members */
    374             key->name = keyName.orphan();
    375             uprv_strcpy(key->name, name);
    376             if(path != NULL){
    377                 key->path = keyPath.orphan();
    378                 uprv_strcpy(key->path, path);
    379             }
    380             profile = newProfile.orphan();
    381 
    382             /* add the data object to the cache */
    383             profile->refCount = 1;
    384             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
    385         }
    386         umtx_unlock(&usprepMutex);
    387     }
    388 
    389     return profile;
    390 }
    391 
    392 U_CAPI UStringPrepProfile* U_EXPORT2
    393 usprep_open(const char* path,
    394             const char* name,
    395             UErrorCode* status){
    396 
    397     if(status == NULL || U_FAILURE(*status)){
    398         return NULL;
    399     }
    400 
    401     /* initialize the profile struct members */
    402     return usprep_getProfile(path,name,status);
    403 }
    404 
    405 U_CAPI UStringPrepProfile* U_EXPORT2
    406 usprep_openByType(UStringPrepProfileType type,
    407 				  UErrorCode* status) {
    408     if(status == NULL || U_FAILURE(*status)){
    409         return NULL;
    410     }
    411     int32_t index = (int32_t)type;
    412     if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
    413         *status = U_ILLEGAL_ARGUMENT_ERROR;
    414         return NULL;
    415     }
    416     return usprep_open(NULL, PROFILE_NAMES[index], status);
    417 }
    418 
    419 U_CAPI void U_EXPORT2
    420 usprep_close(UStringPrepProfile* profile){
    421     if(profile==NULL){
    422         return;
    423     }
    424 
    425     umtx_lock(&usprepMutex);
    426     /* decrement the ref count*/
    427     if(profile->refCount > 0){
    428         profile->refCount--;
    429     }
    430     umtx_unlock(&usprepMutex);
    431 
    432 }
    433 
    434 U_CFUNC void
    435 uprv_syntaxError(const UChar* rules,
    436                  int32_t pos,
    437                  int32_t rulesLen,
    438                  UParseError* parseError){
    439     if(parseError == NULL){
    440         return;
    441     }
    442     parseError->offset = pos;
    443     parseError->line = 0 ; // we are not using line numbers
    444 
    445     // for pre-context
    446     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    447     int32_t limit = pos;
    448 
    449     u_memcpy(parseError->preContext,rules+start,limit-start);
    450     //null terminate the buffer
    451     parseError->preContext[limit-start] = 0;
    452 
    453     // for post-context; include error rules[pos]
    454     start = pos;
    455     limit = start + (U_PARSE_CONTEXT_LEN-1);
    456     if (limit > rulesLen) {
    457         limit = rulesLen;
    458     }
    459     if (start < rulesLen) {
    460         u_memcpy(parseError->postContext,rules+start,limit-start);
    461     }
    462     //null terminate the buffer
    463     parseError->postContext[limit-start]= 0;
    464 }
    465 
    466 
    467 static inline UStringPrepType
    468 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
    469 
    470     UStringPrepType type;
    471     if(trieWord == 0){
    472         /*
    473          * Initial value stored in the mapping table
    474          * just return USPREP_TYPE_LIMIT .. so that
    475          * the source codepoint is copied to the destination
    476          */
    477         type = USPREP_TYPE_LIMIT;
    478         isIndex =FALSE;
    479         value = 0;
    480     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
    481         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
    482         isIndex =FALSE;
    483         value = 0;
    484     }else{
    485         /* get the type */
    486         type = USPREP_MAP;
    487         /* ascertain if the value is index or delta */
    488         if(trieWord & 0x02){
    489             isIndex = TRUE;
    490             value = trieWord  >> 2; //mask off the lower 2 bits and shift
    491         }else{
    492             isIndex = FALSE;
    493             value = (int16_t)trieWord;
    494             value =  (value >> 2);
    495         }
    496 
    497         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
    498             type = USPREP_DELETE;
    499             isIndex =FALSE;
    500             value = 0;
    501         }
    502     }
    503     return type;
    504 }
    505 
    506 // TODO: change to writing to UnicodeString not UChar *
    507 static int32_t
    508 usprep_map(  const UStringPrepProfile* profile,
    509              const UChar* src, int32_t srcLength,
    510              UChar* dest, int32_t destCapacity,
    511              int32_t options,
    512              UParseError* parseError,
    513              UErrorCode* status ){
    514 
    515     uint16_t result;
    516     int32_t destIndex=0;
    517     int32_t srcIndex;
    518     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
    519     UStringPrepType type;
    520     int16_t value;
    521     UBool isIndex;
    522     const int32_t* indexes = profile->indexes;
    523 
    524     // no error checking the caller check for error and arguments
    525     // no string length check the caller finds out the string length
    526 
    527     for(srcIndex=0;srcIndex<srcLength;){
    528         UChar32 ch;
    529 
    530         U16_NEXT(src,srcIndex,srcLength,ch);
    531 
    532         result=0;
    533 
    534         UTRIE_GET16(&profile->sprepTrie,ch,result);
    535 
    536         type = getValues(result, value, isIndex);
    537 
    538         // check if the source codepoint is unassigned
    539         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
    540 
    541             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
    542             *status = U_STRINGPREP_UNASSIGNED_ERROR;
    543             return 0;
    544 
    545         }else if(type == USPREP_MAP){
    546 
    547             int32_t index, length;
    548 
    549             if(isIndex){
    550                 index = value;
    551                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    552                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    553                     length = 1;
    554                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    555                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    556                     length = 2;
    557                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    558                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    559                     length = 3;
    560                 }else{
    561                     length = profile->mappingData[index++];
    562 
    563                 }
    564 
    565                 /* copy mapping to destination */
    566                 for(int32_t i=0; i< length; i++){
    567                     if(destIndex < destCapacity  ){
    568                         dest[destIndex] = profile->mappingData[index+i];
    569                     }
    570                     destIndex++; /* for pre-flighting */
    571                 }
    572                 continue;
    573             }else{
    574                 // subtract the delta to arrive at the code point
    575                 ch -= value;
    576             }
    577 
    578         }else if(type==USPREP_DELETE){
    579              // just consume the codepoint and contine
    580             continue;
    581         }
    582         //copy the code point into destination
    583         if(ch <= 0xFFFF){
    584             if(destIndex < destCapacity ){
    585                 dest[destIndex] = (UChar)ch;
    586             }
    587             destIndex++;
    588         }else{
    589             if(destIndex+1 < destCapacity ){
    590                 dest[destIndex]   = U16_LEAD(ch);
    591                 dest[destIndex+1] = U16_TRAIL(ch);
    592             }
    593             destIndex +=2;
    594         }
    595 
    596     }
    597 
    598     return u_terminateUChars(dest, destCapacity, destIndex, status);
    599 }
    600 
    601 /*
    602    1) Map -- For each character in the input, check if it has a mapping
    603       and, if so, replace it with its mapping.
    604 
    605    2) Normalize -- Possibly normalize the result of step 1 using Unicode
    606       normalization.
    607 
    608    3) Prohibit -- Check for any characters that are not allowed in the
    609       output.  If any are found, return an error.
    610 
    611    4) Check bidi -- Possibly check for right-to-left characters, and if
    612       any are found, make sure that the whole string satisfies the
    613       requirements for bidirectional strings.  If the string does not
    614       satisfy the requirements for bidirectional strings, return an
    615       error.
    616       [Unicode3.2] defines several bidirectional categories; each character
    617        has one bidirectional category assigned to it.  For the purposes of
    618        the requirements below, an "RandALCat character" is a character that
    619        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
    620        is a character that has Unicode bidirectional category "L".  Note
    621 
    622 
    623        that there are many characters which fall in neither of the above
    624        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
    625        this because they have bidirectional category "EN".
    626 
    627        In any profile that specifies bidirectional character handling, all
    628        three of the following requirements MUST be met:
    629 
    630        1) The characters in section 5.8 MUST be prohibited.
    631 
    632        2) If a string contains any RandALCat character, the string MUST NOT
    633           contain any LCat character.
    634 
    635        3) If a string contains any RandALCat character, a RandALCat
    636           character MUST be the first character of the string, and a
    637           RandALCat character MUST be the last character of the string.
    638 */
    639 U_CAPI int32_t U_EXPORT2
    640 usprep_prepare(   const UStringPrepProfile* profile,
    641                   const UChar* src, int32_t srcLength,
    642                   UChar* dest, int32_t destCapacity,
    643                   int32_t options,
    644                   UParseError* parseError,
    645                   UErrorCode* status ){
    646 
    647     // check error status
    648     if(U_FAILURE(*status)){
    649         return 0;
    650     }
    651 
    652     //check arguments
    653     if(profile==NULL ||
    654             (src==NULL ? srcLength!=0 : srcLength<-1) ||
    655             (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
    656         *status=U_ILLEGAL_ARGUMENT_ERROR;
    657         return 0;
    658     }
    659 
    660     //get the string length
    661     if(srcLength < 0){
    662         srcLength = u_strlen(src);
    663     }
    664     // map
    665     UnicodeString s1;
    666     UChar *b1 = s1.getBuffer(srcLength);
    667     if(b1==NULL){
    668         *status = U_MEMORY_ALLOCATION_ERROR;
    669         return 0;
    670     }
    671     int32_t b1Len = usprep_map(profile, src, srcLength,
    672                                b1, s1.getCapacity(), options, parseError, status);
    673     s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
    674 
    675     if(*status == U_BUFFER_OVERFLOW_ERROR){
    676         // redo processing of string
    677         /* we do not have enough room so grow the buffer*/
    678         b1 = s1.getBuffer(b1Len);
    679         if(b1==NULL){
    680             *status = U_MEMORY_ALLOCATION_ERROR;
    681             return 0;
    682         }
    683 
    684         *status = U_ZERO_ERROR; // reset error
    685         b1Len = usprep_map(profile, src, srcLength,
    686                            b1, s1.getCapacity(), options, parseError, status);
    687         s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
    688     }
    689     if(U_FAILURE(*status)){
    690         return 0;
    691     }
    692 
    693     // normalize
    694     UnicodeString s2;
    695     if(profile->doNFKC){
    696         const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
    697         FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
    698         if(U_FAILURE(*status)){
    699             return 0;
    700         }
    701         fn2.normalize(s1, s2, *status);
    702     }else{
    703         s2.fastCopyFrom(s1);
    704     }
    705     if(U_FAILURE(*status)){
    706         return 0;
    707     }
    708 
    709     // Prohibit and checkBiDi in one pass
    710     const UChar *b2 = s2.getBuffer();
    711     int32_t b2Len = s2.length();
    712     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    713     UBool leftToRight=FALSE, rightToLeft=FALSE;
    714     int32_t rtlPos =-1, ltrPos =-1;
    715 
    716     for(int32_t b2Index=0; b2Index<b2Len;){
    717         UChar32 ch = 0;
    718         U16_NEXT(b2, b2Index, b2Len, ch);
    719 
    720         uint16_t result;
    721         UTRIE_GET16(&profile->sprepTrie,ch,result);
    722 
    723         int16_t value;
    724         UBool isIndex;
    725         UStringPrepType type = getValues(result, value, isIndex);
    726 
    727         if( type == USPREP_PROHIBITED ||
    728             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
    729            ){
    730             *status = U_STRINGPREP_PROHIBITED_ERROR;
    731             uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
    732             return 0;
    733         }
    734 
    735         if(profile->checkBiDi) {
    736             direction = ubidi_getClass(ch);
    737             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
    738                 firstCharDir = direction;
    739             }
    740             if(direction == U_LEFT_TO_RIGHT){
    741                 leftToRight = TRUE;
    742                 ltrPos = b2Index-1;
    743             }
    744             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    745                 rightToLeft = TRUE;
    746                 rtlPos = b2Index-1;
    747             }
    748         }
    749     }
    750     if(profile->checkBiDi == TRUE){
    751         // satisfy 2
    752         if( leftToRight == TRUE && rightToLeft == TRUE){
    753             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    754             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
    755             return 0;
    756         }
    757 
    758         //satisfy 3
    759         if( rightToLeft == TRUE &&
    760             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    761               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    762            ){
    763             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    764             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
    765             return FALSE;
    766         }
    767     }
    768     return s2.extract(dest, destCapacity, *status);
    769 }
    770 
    771 
    772 /* data swapping ------------------------------------------------------------ */
    773 
    774 U_CAPI int32_t U_EXPORT2
    775 usprep_swap(const UDataSwapper *ds,
    776             const void *inData, int32_t length, void *outData,
    777             UErrorCode *pErrorCode) {
    778     const UDataInfo *pInfo;
    779     int32_t headerSize;
    780 
    781     const uint8_t *inBytes;
    782     uint8_t *outBytes;
    783 
    784     const int32_t *inIndexes;
    785     int32_t indexes[16];
    786 
    787     int32_t i, offset, count, size;
    788 
    789     /* udata_swapDataHeader checks the arguments */
    790     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    791     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    792         return 0;
    793     }
    794 
    795     /* check data format and format version */
    796     pInfo=(const UDataInfo *)((const char *)inData+4);
    797     if(!(
    798         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    799         pInfo->dataFormat[1]==0x50 &&
    800         pInfo->dataFormat[2]==0x52 &&
    801         pInfo->dataFormat[3]==0x50 &&
    802         pInfo->formatVersion[0]==3
    803     )) {
    804         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
    805                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    806                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    807                          pInfo->formatVersion[0]);
    808         *pErrorCode=U_UNSUPPORTED_ERROR;
    809         return 0;
    810     }
    811 
    812     inBytes=(const uint8_t *)inData+headerSize;
    813     outBytes=(uint8_t *)outData+headerSize;
    814 
    815     inIndexes=(const int32_t *)inBytes;
    816 
    817     if(length>=0) {
    818         length-=headerSize;
    819         if(length<16*4) {
    820             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
    821                              length);
    822             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    823             return 0;
    824         }
    825     }
    826 
    827     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
    828     for(i=0; i<16; ++i) {
    829         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    830     }
    831 
    832     /* calculate the total length of the data */
    833     size=
    834         16*4+ /* size of indexes[] */
    835         indexes[_SPREP_INDEX_TRIE_SIZE]+
    836         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    837 
    838     if(length>=0) {
    839         if(length<size) {
    840             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
    841                              length);
    842             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    843             return 0;
    844         }
    845 
    846         /* copy the data for inaccessible bytes */
    847         if(inBytes!=outBytes) {
    848             uprv_memcpy(outBytes, inBytes, size);
    849         }
    850 
    851         offset=0;
    852 
    853         /* swap the int32_t indexes[] */
    854         count=16*4;
    855         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    856         offset+=count;
    857 
    858         /* swap the UTrie */
    859         count=indexes[_SPREP_INDEX_TRIE_SIZE];
    860         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    861         offset+=count;
    862 
    863         /* swap the uint16_t mappingTable[] */
    864         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    865         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    866         //offset+=count;
    867     }
    868 
    869     return headerSize+size;
    870 }
    871 
    872 #endif /* #if !UCONFIG_NO_IDNA */
    873