Home | History | Annotate | Download | only in common
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2013, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  usprep.cpp
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003jul2
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_IDNA
     20 
     21 #include "unicode/usprep.h"
     22 
     23 #include "unicode/unorm.h"
     24 #include "unicode/ustring.h"
     25 #include "unicode/uchar.h"
     26 #include "unicode/uversion.h"
     27 #include "umutex.h"
     28 #include "cmemory.h"
     29 #include "sprpimpl.h"
     30 #include "ustr_imp.h"
     31 #include "uhash.h"
     32 #include "cstring.h"
     33 #include "udataswp.h"
     34 #include "ucln_cmn.h"
     35 #include "ubidi_props.h"
     36 
     37 U_NAMESPACE_USE
     38 
     39 U_CDECL_BEGIN
     40 
     41 /*
     42 Static cache for already opened StringPrep profiles
     43 */
     44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
     45 static icu::UInitOnce gSharedDataInitOnce;
     46 
     47 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
     48 
     49 /* format version of spp file */
     50 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     51 
     52 /* the Unicode version of the sprep data */
     53 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     54 
     55 /* Profile names must be aligned to UStringPrepProfileType */
     56 static const char * const PROFILE_NAMES[] = {
     57     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
     58     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
     59     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
     60     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
     61     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
     62     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
     63     "rfc3722",      /* USPREP_RFC3722_ISCSI */
     64     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
     65     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
     66     "rfc4011",      /* USPREP_RFC4011_MIB */
     67     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
     68     "rfc4505",      /* USPREP_RFC4505_TRACE */
     69     "rfc4518",      /* USPREP_RFC4518_LDAP */
     70     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
     71 };
     72 
     73 static UBool U_CALLCONV
     74 isSPrepAcceptable(void * /* context */,
     75              const char * /* type */,
     76              const char * /* name */,
     77              const UDataInfo *pInfo) {
     78     if(
     79         pInfo->size>=20 &&
     80         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     81         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     82         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
     83         pInfo->dataFormat[1]==0x50 &&
     84         pInfo->dataFormat[2]==0x52 &&
     85         pInfo->dataFormat[3]==0x50 &&
     86         pInfo->formatVersion[0]==3 &&
     87         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     88         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     89     ) {
     90         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     91         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     92         return TRUE;
     93     } else {
     94         return FALSE;
     95     }
     96 }
     97 
     98 static int32_t U_CALLCONV
     99 getSPrepFoldingOffset(uint32_t data) {
    100 
    101     return (int32_t)data;
    102 
    103 }
    104 
    105 /* hashes an entry  */
    106 static int32_t U_CALLCONV
    107 hashEntry(const UHashTok parm) {
    108     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
    109     UHashTok namekey, pathkey;
    110     namekey.pointer = b->name;
    111     pathkey.pointer = b->path;
    112     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
    113 }
    114 
    115 /* compares two entries */
    116 static UBool U_CALLCONV
    117 compareEntries(const UHashTok p1, const UHashTok p2) {
    118     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
    119     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
    120     UHashTok name1, name2, path1, path2;
    121     name1.pointer = b1->name;
    122     name2.pointer = b2->name;
    123     path1.pointer = b1->path;
    124     path2.pointer = b2->path;
    125     return ((UBool)(uhash_compareChars(name1, name2) &
    126         uhash_compareChars(path1, path2)));
    127 }
    128 
    129 static void
    130 usprep_unload(UStringPrepProfile* data){
    131     udata_close(data->sprepData);
    132 }
    133 
    134 static int32_t
    135 usprep_internal_flushCache(UBool noRefCount){
    136     UStringPrepProfile *profile = NULL;
    137     UStringPrepKey  *key  = NULL;
    138     int32_t pos = -1;
    139     int32_t deletedNum = 0;
    140     const UHashElement *e;
    141 
    142     /*
    143      * if shared data hasn't even been lazy evaluated yet
    144      * return 0
    145      */
    146     umtx_lock(&usprepMutex);
    147     if (SHARED_DATA_HASHTABLE == NULL) {
    148         umtx_unlock(&usprepMutex);
    149         return 0;
    150     }
    151 
    152     /*creates an enumeration to iterate through every element in the table */
    153     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
    154     {
    155         profile = (UStringPrepProfile *) e->value.pointer;
    156         key  = (UStringPrepKey *) e->key.pointer;
    157 
    158         if ((noRefCount== FALSE && profile->refCount == 0) ||
    159              noRefCount== TRUE) {
    160             deletedNum++;
    161             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
    162 
    163             /* unload the data */
    164             usprep_unload(profile);
    165 
    166             if(key->name != NULL) {
    167                 uprv_free(key->name);
    168                 key->name=NULL;
    169             }
    170             if(key->path != NULL) {
    171                 uprv_free(key->path);
    172                 key->path=NULL;
    173             }
    174             uprv_free(profile);
    175             uprv_free(key);
    176         }
    177 
    178     }
    179     umtx_unlock(&usprepMutex);
    180 
    181     return deletedNum;
    182 }
    183 
    184 /* Works just like ucnv_flushCache()
    185 static int32_t
    186 usprep_flushCache(){
    187     return usprep_internal_flushCache(FALSE);
    188 }
    189 */
    190 
    191 static UBool U_CALLCONV usprep_cleanup(void){
    192     if (SHARED_DATA_HASHTABLE != NULL) {
    193         usprep_internal_flushCache(TRUE);
    194         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
    195             uhash_close(SHARED_DATA_HASHTABLE);
    196             SHARED_DATA_HASHTABLE = NULL;
    197         }
    198     }
    199     gSharedDataInitOnce.reset();
    200     return (SHARED_DATA_HASHTABLE == NULL);
    201 }
    202 U_CDECL_END
    203 
    204 
    205 /** Initializes the cache for resources */
    206 static void U_CALLCONV
    207 createCache(UErrorCode &status) {
    208     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
    209     if (U_FAILURE(status)) {
    210         SHARED_DATA_HASHTABLE = NULL;
    211     }
    212     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
    213 }
    214 
    215 static void
    216 initCache(UErrorCode *status) {
    217     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
    218 }
    219 
    220 static UBool U_CALLCONV
    221 loadData(UStringPrepProfile* profile,
    222          const char* path,
    223          const char* name,
    224          const char* type,
    225          UErrorCode* errorCode) {
    226     /* load Unicode SPREP data from file */
    227     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    228     UDataMemory *dataMemory;
    229     const int32_t *p=NULL;
    230     const uint8_t *pb;
    231     UVersionInfo normUnicodeVersion;
    232     int32_t normUniVer, sprepUniVer, normCorrVer;
    233 
    234     if(errorCode==NULL || U_FAILURE(*errorCode)) {
    235         return 0;
    236     }
    237 
    238     /* open the data outside the mutex block */
    239     //TODO: change the path
    240     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    241     if(U_FAILURE(*errorCode)) {
    242         return FALSE;
    243     }
    244 
    245     p=(const int32_t *)udata_getMemory(dataMemory);
    246     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    247     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    248     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
    249 
    250 
    251     if(U_FAILURE(*errorCode)) {
    252         udata_close(dataMemory);
    253         return FALSE;
    254     }
    255 
    256     /* in the mutex block, set the data for this process */
    257     umtx_lock(&usprepMutex);
    258     if(profile->sprepData==NULL) {
    259         profile->sprepData=dataMemory;
    260         dataMemory=NULL;
    261         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
    262         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    263     } else {
    264         p=(const int32_t *)udata_getMemory(profile->sprepData);
    265     }
    266     umtx_unlock(&usprepMutex);
    267     /* initialize some variables */
    268     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    269 
    270     u_getUnicodeVersion(normUnicodeVersion);
    271     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
    272                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    273     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
    274                   (dataVersion[2] << 8 ) + (dataVersion[3]);
    275     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    276 
    277     if(U_FAILURE(*errorCode)){
    278         udata_close(dataMemory);
    279         return FALSE;
    280     }
    281     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
    282         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
    283         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
    284       ){
    285         *errorCode = U_INVALID_FORMAT_ERROR;
    286         udata_close(dataMemory);
    287         return FALSE;
    288     }
    289     profile->isDataLoaded = TRUE;
    290 
    291     /* if a different thread set it first, then close the extra data */
    292     if(dataMemory!=NULL) {
    293         udata_close(dataMemory); /* NULL if it was set correctly */
    294     }
    295 
    296 
    297     return profile->isDataLoaded;
    298 }
    299 
    300 static UStringPrepProfile*
    301 usprep_getProfile(const char* path,
    302                   const char* name,
    303                   UErrorCode *status){
    304 
    305     UStringPrepProfile* profile = NULL;
    306 
    307     initCache(status);
    308 
    309     if(U_FAILURE(*status)){
    310         return NULL;
    311     }
    312 
    313     UStringPrepKey stackKey;
    314     /*
    315      * const is cast way to save malloc, strcpy and free calls
    316      * we use the passed in pointers for fetching the data from the
    317      * hash table which is safe
    318      */
    319     stackKey.name = (char*) name;
    320     stackKey.path = (char*) path;
    321 
    322     /* fetch the data from the cache */
    323     umtx_lock(&usprepMutex);
    324     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    325     if(profile != NULL) {
    326         profile->refCount++;
    327     }
    328     umtx_unlock(&usprepMutex);
    329 
    330     if(profile == NULL) {
    331         /* else load the data and put the data in the cache */
    332         LocalMemory<UStringPrepProfile> newProfile;
    333         if(newProfile.allocateInsteadAndReset() == NULL) {
    334             *status = U_MEMORY_ALLOCATION_ERROR;
    335             return NULL;
    336         }
    337 
    338         /* load the data */
    339         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
    340             return NULL;
    341         }
    342 
    343         /* get the options */
    344         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
    345         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
    346 
    347         if(newProfile->checkBiDi) {
    348             newProfile->bdp = ubidi_getSingleton();
    349         }
    350 
    351         LocalMemory<UStringPrepKey> key;
    352         LocalMemory<char> keyName;
    353         LocalMemory<char> keyPath;
    354         if( key.allocateInsteadAndReset() == NULL ||
    355             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
    356             (path != NULL &&
    357              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
    358          ) {
    359             *status = U_MEMORY_ALLOCATION_ERROR;
    360             usprep_unload(newProfile.getAlias());
    361             return NULL;
    362         }
    363 
    364         umtx_lock(&usprepMutex);
    365         // If another thread already inserted the same key/value, refcount and cleanup our thread data
    366         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
    367         if(profile != NULL) {
    368             profile->refCount++;
    369             usprep_unload(newProfile.getAlias());
    370         }
    371         else {
    372             /* initialize the key members */
    373             key->name = keyName.orphan();
    374             uprv_strcpy(key->name, name);
    375             if(path != NULL){
    376                 key->path = keyPath.orphan();
    377                 uprv_strcpy(key->path, path);
    378             }
    379             profile = newProfile.orphan();
    380 
    381             /* add the data object to the cache */
    382             profile->refCount = 1;
    383             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
    384         }
    385         umtx_unlock(&usprepMutex);
    386     }
    387 
    388     return profile;
    389 }
    390 
    391 U_CAPI UStringPrepProfile* U_EXPORT2
    392 usprep_open(const char* path,
    393             const char* name,
    394             UErrorCode* status){
    395 
    396     if(status == NULL || U_FAILURE(*status)){
    397         return NULL;
    398     }
    399 
    400     /* initialize the profile struct members */
    401     return usprep_getProfile(path,name,status);
    402 }
    403 
    404 U_CAPI UStringPrepProfile* U_EXPORT2
    405 usprep_openByType(UStringPrepProfileType type,
    406 				  UErrorCode* status) {
    407     if(status == NULL || U_FAILURE(*status)){
    408         return NULL;
    409     }
    410     int32_t index = (int32_t)type;
    411     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
    412         *status = U_ILLEGAL_ARGUMENT_ERROR;
    413         return NULL;
    414     }
    415     return usprep_open(NULL, PROFILE_NAMES[index], status);
    416 }
    417 
    418 U_CAPI void U_EXPORT2
    419 usprep_close(UStringPrepProfile* profile){
    420     if(profile==NULL){
    421         return;
    422     }
    423 
    424     umtx_lock(&usprepMutex);
    425     /* decrement the ref count*/
    426     if(profile->refCount > 0){
    427         profile->refCount--;
    428     }
    429     umtx_unlock(&usprepMutex);
    430 
    431 }
    432 
    433 U_CFUNC void
    434 uprv_syntaxError(const UChar* rules,
    435                  int32_t pos,
    436                  int32_t rulesLen,
    437                  UParseError* parseError){
    438     if(parseError == NULL){
    439         return;
    440     }
    441     parseError->offset = pos;
    442     parseError->line = 0 ; // we are not using line numbers
    443 
    444     // for pre-context
    445     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    446     int32_t limit = pos;
    447 
    448     u_memcpy(parseError->preContext,rules+start,limit-start);
    449     //null terminate the buffer
    450     parseError->preContext[limit-start] = 0;
    451 
    452     // for post-context; include error rules[pos]
    453     start = pos;
    454     limit = start + (U_PARSE_CONTEXT_LEN-1);
    455     if (limit > rulesLen) {
    456         limit = rulesLen;
    457     }
    458     if (start < rulesLen) {
    459         u_memcpy(parseError->postContext,rules+start,limit-start);
    460     }
    461     //null terminate the buffer
    462     parseError->postContext[limit-start]= 0;
    463 }
    464 
    465 
    466 static inline UStringPrepType
    467 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
    468 
    469     UStringPrepType type;
    470     if(trieWord == 0){
    471         /*
    472          * Initial value stored in the mapping table
    473          * just return USPREP_TYPE_LIMIT .. so that
    474          * the source codepoint is copied to the destination
    475          */
    476         type = USPREP_TYPE_LIMIT;
    477         isIndex =FALSE;
    478         value = 0;
    479     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
    480         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
    481         isIndex =FALSE;
    482         value = 0;
    483     }else{
    484         /* get the type */
    485         type = USPREP_MAP;
    486         /* ascertain if the value is index or delta */
    487         if(trieWord & 0x02){
    488             isIndex = TRUE;
    489             value = trieWord  >> 2; //mask off the lower 2 bits and shift
    490         }else{
    491             isIndex = FALSE;
    492             value = (int16_t)trieWord;
    493             value =  (value >> 2);
    494         }
    495 
    496         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
    497             type = USPREP_DELETE;
    498             isIndex =FALSE;
    499             value = 0;
    500         }
    501     }
    502     return type;
    503 }
    504 
    505 
    506 
    507 static int32_t
    508 usprep_map(  const UStringPrepProfile* profile,
    509              const UChar* src, int32_t srcLength,
    510              UChar* dest, int32_t destCapacity,
    511              int32_t options,
    512              UParseError* parseError,
    513              UErrorCode* status ){
    514 
    515     uint16_t result;
    516     int32_t destIndex=0;
    517     int32_t srcIndex;
    518     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
    519     UStringPrepType type;
    520     int16_t value;
    521     UBool isIndex;
    522     const int32_t* indexes = profile->indexes;
    523 
    524     // no error checking the caller check for error and arguments
    525     // no string length check the caller finds out the string length
    526 
    527     for(srcIndex=0;srcIndex<srcLength;){
    528         UChar32 ch;
    529 
    530         U16_NEXT(src,srcIndex,srcLength,ch);
    531 
    532         result=0;
    533 
    534         UTRIE_GET16(&profile->sprepTrie,ch,result);
    535 
    536         type = getValues(result, value, isIndex);
    537 
    538         // check if the source codepoint is unassigned
    539         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
    540 
    541             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
    542             *status = U_STRINGPREP_UNASSIGNED_ERROR;
    543             return 0;
    544 
    545         }else if(type == USPREP_MAP){
    546 
    547             int32_t index, length;
    548 
    549             if(isIndex){
    550                 index = value;
    551                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    552                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    553                     length = 1;
    554                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    555                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    556                     length = 2;
    557                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    558                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    559                     length = 3;
    560                 }else{
    561                     length = profile->mappingData[index++];
    562 
    563                 }
    564 
    565                 /* copy mapping to destination */
    566                 for(int32_t i=0; i< length; i++){
    567                     if(destIndex < destCapacity  ){
    568                         dest[destIndex] = profile->mappingData[index+i];
    569                     }
    570                     destIndex++; /* for pre-flighting */
    571                 }
    572                 continue;
    573             }else{
    574                 // subtract the delta to arrive at the code point
    575                 ch -= value;
    576             }
    577 
    578         }else if(type==USPREP_DELETE){
    579              // just consume the codepoint and contine
    580             continue;
    581         }
    582         //copy the code point into destination
    583         if(ch <= 0xFFFF){
    584             if(destIndex < destCapacity ){
    585                 dest[destIndex] = (UChar)ch;
    586             }
    587             destIndex++;
    588         }else{
    589             if(destIndex+1 < destCapacity ){
    590                 dest[destIndex]   = U16_LEAD(ch);
    591                 dest[destIndex+1] = U16_TRAIL(ch);
    592             }
    593             destIndex +=2;
    594         }
    595 
    596     }
    597 
    598     return u_terminateUChars(dest, destCapacity, destIndex, status);
    599 }
    600 
    601 
    602 static int32_t
    603 usprep_normalize(   const UChar* src, int32_t srcLength,
    604                     UChar* dest, int32_t destCapacity,
    605                     UErrorCode* status ){
    606     return unorm_normalize(
    607         src, srcLength,
    608         UNORM_NFKC, UNORM_UNICODE_3_2,
    609         dest, destCapacity,
    610         status);
    611 }
    612 
    613 
    614  /*
    615    1) Map -- For each character in the input, check if it has a mapping
    616       and, if so, replace it with its mapping.
    617 
    618    2) Normalize -- Possibly normalize the result of step 1 using Unicode
    619       normalization.
    620 
    621    3) Prohibit -- Check for any characters that are not allowed in the
    622       output.  If any are found, return an error.
    623 
    624    4) Check bidi -- Possibly check for right-to-left characters, and if
    625       any are found, make sure that the whole string satisfies the
    626       requirements for bidirectional strings.  If the string does not
    627       satisfy the requirements for bidirectional strings, return an
    628       error.
    629       [Unicode3.2] defines several bidirectional categories; each character
    630        has one bidirectional category assigned to it.  For the purposes of
    631        the requirements below, an "RandALCat character" is a character that
    632        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
    633        is a character that has Unicode bidirectional category "L".  Note
    634 
    635 
    636        that there are many characters which fall in neither of the above
    637        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
    638        this because they have bidirectional category "EN".
    639 
    640        In any profile that specifies bidirectional character handling, all
    641        three of the following requirements MUST be met:
    642 
    643        1) The characters in section 5.8 MUST be prohibited.
    644 
    645        2) If a string contains any RandALCat character, the string MUST NOT
    646           contain any LCat character.
    647 
    648        3) If a string contains any RandALCat character, a RandALCat
    649           character MUST be the first character of the string, and a
    650           RandALCat character MUST be the last character of the string.
    651 */
    652 
    653 #define MAX_STACK_BUFFER_SIZE 300
    654 
    655 
    656 U_CAPI int32_t U_EXPORT2
    657 usprep_prepare(   const UStringPrepProfile* profile,
    658                   const UChar* src, int32_t srcLength,
    659                   UChar* dest, int32_t destCapacity,
    660                   int32_t options,
    661                   UParseError* parseError,
    662                   UErrorCode* status ){
    663 
    664     // check error status
    665     if(status == NULL || U_FAILURE(*status)){
    666         return 0;
    667     }
    668 
    669     //check arguments
    670     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    671         *status=U_ILLEGAL_ARGUMENT_ERROR;
    672         return 0;
    673     }
    674 
    675     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
    676     UChar *b1 = b1Stack, *b2 = b2Stack;
    677     int32_t b1Len, b2Len=0,
    678             b1Capacity = MAX_STACK_BUFFER_SIZE ,
    679             b2Capacity = MAX_STACK_BUFFER_SIZE;
    680     uint16_t result;
    681     int32_t b2Index = 0;
    682     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    683     UBool leftToRight=FALSE, rightToLeft=FALSE;
    684     int32_t rtlPos =-1, ltrPos =-1;
    685 
    686     //get the string length
    687     if(srcLength == -1){
    688         srcLength = u_strlen(src);
    689     }
    690     // map
    691     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
    692 
    693     if(*status == U_BUFFER_OVERFLOW_ERROR){
    694         // redo processing of string
    695         /* we do not have enough room so grow the buffer*/
    696         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
    697         if(b1==NULL){
    698             *status = U_MEMORY_ALLOCATION_ERROR;
    699             goto CLEANUP;
    700         }
    701 
    702         *status = U_ZERO_ERROR; // reset error
    703 
    704         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
    705 
    706     }
    707 
    708     // normalize
    709     if(profile->doNFKC == TRUE){
    710         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
    711 
    712         if(*status == U_BUFFER_OVERFLOW_ERROR){
    713             // redo processing of string
    714             /* we do not have enough room so grow the buffer*/
    715             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
    716             if(b2==NULL){
    717                 *status = U_MEMORY_ALLOCATION_ERROR;
    718                 goto CLEANUP;
    719             }
    720 
    721             *status = U_ZERO_ERROR; // reset error
    722 
    723             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
    724 
    725         }
    726 
    727     }else{
    728         b2 = b1;
    729         b2Len = b1Len;
    730     }
    731 
    732 
    733     if(U_FAILURE(*status)){
    734         goto CLEANUP;
    735     }
    736 
    737     UChar32 ch;
    738     UStringPrepType type;
    739     int16_t value;
    740     UBool isIndex;
    741 
    742     // Prohibit and checkBiDi in one pass
    743     for(b2Index=0; b2Index<b2Len;){
    744 
    745         ch = 0;
    746 
    747         U16_NEXT(b2, b2Index, b2Len, ch);
    748 
    749         UTRIE_GET16(&profile->sprepTrie,ch,result);
    750 
    751         type = getValues(result, value, isIndex);
    752 
    753         if( type == USPREP_PROHIBITED ||
    754             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
    755            ){
    756             *status = U_STRINGPREP_PROHIBITED_ERROR;
    757             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
    758             goto CLEANUP;
    759         }
    760 
    761         if(profile->checkBiDi) {
    762             direction = ubidi_getClass(profile->bdp, ch);
    763             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
    764                 firstCharDir = direction;
    765             }
    766             if(direction == U_LEFT_TO_RIGHT){
    767                 leftToRight = TRUE;
    768                 ltrPos = b2Index-1;
    769             }
    770             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    771                 rightToLeft = TRUE;
    772                 rtlPos = b2Index-1;
    773             }
    774         }
    775     }
    776     if(profile->checkBiDi == TRUE){
    777         // satisfy 2
    778         if( leftToRight == TRUE && rightToLeft == TRUE){
    779             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    780             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
    781             goto CLEANUP;
    782         }
    783 
    784         //satisfy 3
    785         if( rightToLeft == TRUE &&
    786             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    787               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    788            ){
    789             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    790             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
    791             return FALSE;
    792         }
    793     }
    794     if(b2Len>0 && b2Len <= destCapacity){
    795         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
    796     }
    797 
    798 CLEANUP:
    799     if(b1!=b1Stack){
    800         uprv_free(b1);
    801         b1=NULL;
    802     }
    803 
    804     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
    805         uprv_free(b2);
    806         b2=NULL;
    807     }
    808     return u_terminateUChars(dest, destCapacity, b2Len, status);
    809 }
    810 
    811 
    812 /* data swapping ------------------------------------------------------------ */
    813 
    814 U_CAPI int32_t U_EXPORT2
    815 usprep_swap(const UDataSwapper *ds,
    816             const void *inData, int32_t length, void *outData,
    817             UErrorCode *pErrorCode) {
    818     const UDataInfo *pInfo;
    819     int32_t headerSize;
    820 
    821     const uint8_t *inBytes;
    822     uint8_t *outBytes;
    823 
    824     const int32_t *inIndexes;
    825     int32_t indexes[16];
    826 
    827     int32_t i, offset, count, size;
    828 
    829     /* udata_swapDataHeader checks the arguments */
    830     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    831     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    832         return 0;
    833     }
    834 
    835     /* check data format and format version */
    836     pInfo=(const UDataInfo *)((const char *)inData+4);
    837     if(!(
    838         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    839         pInfo->dataFormat[1]==0x50 &&
    840         pInfo->dataFormat[2]==0x52 &&
    841         pInfo->dataFormat[3]==0x50 &&
    842         pInfo->formatVersion[0]==3
    843     )) {
    844         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
    845                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    846                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    847                          pInfo->formatVersion[0]);
    848         *pErrorCode=U_UNSUPPORTED_ERROR;
    849         return 0;
    850     }
    851 
    852     inBytes=(const uint8_t *)inData+headerSize;
    853     outBytes=(uint8_t *)outData+headerSize;
    854 
    855     inIndexes=(const int32_t *)inBytes;
    856 
    857     if(length>=0) {
    858         length-=headerSize;
    859         if(length<16*4) {
    860             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
    861                              length);
    862             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    863             return 0;
    864         }
    865     }
    866 
    867     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
    868     for(i=0; i<16; ++i) {
    869         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    870     }
    871 
    872     /* calculate the total length of the data */
    873     size=
    874         16*4+ /* size of indexes[] */
    875         indexes[_SPREP_INDEX_TRIE_SIZE]+
    876         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    877 
    878     if(length>=0) {
    879         if(length<size) {
    880             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
    881                              length);
    882             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    883             return 0;
    884         }
    885 
    886         /* copy the data for inaccessible bytes */
    887         if(inBytes!=outBytes) {
    888             uprv_memcpy(outBytes, inBytes, size);
    889         }
    890 
    891         offset=0;
    892 
    893         /* swap the int32_t indexes[] */
    894         count=16*4;
    895         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    896         offset+=count;
    897 
    898         /* swap the UTrie */
    899         count=indexes[_SPREP_INDEX_TRIE_SIZE];
    900         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    901         offset+=count;
    902 
    903         /* swap the uint16_t mappingTable[] */
    904         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    905         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    906         offset+=count;
    907     }
    908 
    909     return headerSize+size;
    910 }
    911 
    912 #endif /* #if !UCONFIG_NO_IDNA */
    913