Home | History | Annotate | Download | only in intltest
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2005, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  nptrans.h
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003feb1
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_TRANSLITERATION
     20 #if !UCONFIG_NO_IDNA
     21 
     22 #include "nptrans.h"
     23 #include "unicode/resbund.h"
     24 #include "unicode/uniset.h"
     25 #include "sprpimpl.h"
     26 #include "cmemory.h"
     27 #include "ustr_imp.h"
     28 #include "intltest.h"
     29 
     30 #ifdef DEBUG
     31 #include <stdio.h>
     32 #endif
     33 
     34 const char NamePrepTransform::fgClassID=0;
     35 
     36 //Factory method
     37 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
     38     NamePrepTransform* transform = new NamePrepTransform(parseError, status);
     39     if(U_FAILURE(status)){
     40         delete transform;
     41         return NULL;
     42     }
     43     return transform;
     44 }
     45 
     46 //constructor
     47 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
     48 : unassigned(), prohibited(), labelSeparatorSet(){
     49 
     50     mapping = NULL;
     51     bundle = NULL;
     52 
     53 
     54     const char* testDataName = IntlTest::loadTestData(status);
     55 
     56     if(U_FAILURE(status)){
     57         return;
     58     }
     59 
     60     bundle = ures_openDirect(testDataName,"idna_rules",&status);
     61 
     62     if(bundle != NULL && U_SUCCESS(status)){
     63         // create the mapping transliterator
     64         int32_t ruleLen = 0;
     65         const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
     66         int32_t mapRuleLen = 0;
     67         const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
     68         UnicodeString rule(mapRuleUChar, mapRuleLen);
     69         rule.append(ruleUChar, ruleLen);
     70 
     71         mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
     72                                                    UTRANS_FORWARD, parseError,status);
     73         if(U_FAILURE(status)) {
     74           return;
     75         }
     76 
     77         //create the unassigned set
     78         int32_t patternLen =0;
     79         const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
     80         unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
     81 
     82         //create prohibited set
     83         patternLen=0;
     84         pattern =  ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
     85         UnicodeString test(pattern,patternLen);
     86         prohibited.applyPattern(test,status);
     87 #ifdef DEBUG
     88         if(U_FAILURE(status)){
     89             printf("Construction of Unicode set failed\n");
     90         }
     91 
     92         if(U_SUCCESS(status)){
     93             if(prohibited.contains((UChar) 0x644)){
     94                 printf("The string contains 0x644 ... damn !!\n");
     95             }
     96             UnicodeString temp;
     97             prohibited.toPattern(temp,TRUE);
     98 
     99             for(int32_t i=0;i<temp.length();i++){
    100                 printf("%c", (char)temp.charAt(i));
    101             }
    102             printf("\n");
    103         }
    104 #endif
    105 
    106         //create label separator set
    107         patternLen=0;
    108         pattern =  ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
    109         labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
    110     }
    111 
    112     if(U_SUCCESS(status) &&
    113         (mapping == NULL)
    114       ){
    115         status = U_MEMORY_ALLOCATION_ERROR;
    116         delete mapping;
    117         ures_close(bundle);
    118         mapping = NULL;
    119         bundle = NULL;
    120     }
    121 
    122 }
    123 
    124 
    125 UBool NamePrepTransform::isProhibited(UChar32 ch){
    126     return (UBool)(ch != ASCII_SPACE);
    127 }
    128 
    129 NamePrepTransform::~NamePrepTransform(){
    130     delete mapping;
    131     mapping = NULL;
    132 
    133     //close the bundle
    134     ures_close(bundle);
    135     bundle = NULL;
    136 }
    137 
    138 
    139 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
    140                         UChar* dest, int32_t destCapacity,
    141                         UBool allowUnassigned,
    142                         UParseError* /*parseError*/,
    143                         UErrorCode& status ){
    144 
    145     if(U_FAILURE(status)){
    146         return 0;
    147     }
    148     //check arguments
    149     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    150         status=U_ILLEGAL_ARGUMENT_ERROR;
    151         return 0;
    152     }
    153 
    154     UnicodeString rsource(src,srcLength);
    155     // map the code points
    156     // transliteration also performs NFKC
    157     mapping->transliterate(rsource);
    158 
    159     const UChar* buffer = rsource.getBuffer();
    160     int32_t bufLen = rsource.length();
    161     // check if unassigned
    162     if(allowUnassigned == FALSE){
    163         int32_t bufIndex=0;
    164         UChar32 ch =0 ;
    165         for(;bufIndex<bufLen;){
    166             U16_NEXT(buffer, bufIndex, bufLen, ch);
    167             if(unassigned.contains(ch)){
    168                 status = U_IDNA_UNASSIGNED_ERROR;
    169                 return 0;
    170             }
    171         }
    172     }
    173     // check if there is enough room in the output
    174     if(bufLen < destCapacity){
    175         uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR);
    176     }
    177 
    178     return u_terminateUChars(dest, destCapacity, bufLen, &status);
    179 }
    180 
    181 
    182 #define MAX_BUFFER_SIZE 300
    183 
    184 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
    185                                     UChar* dest, int32_t destCapacity,
    186                                     UBool allowUnassigned,
    187                                     UParseError* parseError,
    188                                     UErrorCode& status ){
    189     // check error status
    190     if(U_FAILURE(status)){
    191         return 0;
    192     }
    193 
    194     //check arguments
    195     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    196         status=U_ILLEGAL_ARGUMENT_ERROR;
    197         return 0;
    198     }
    199 
    200     UChar b1Stack[MAX_BUFFER_SIZE];
    201     UChar *b1 = b1Stack;
    202     int32_t b1Len,b1Capacity = MAX_BUFFER_SIZE;
    203 
    204     int32_t b1Index = 0;
    205     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    206     UBool leftToRight=FALSE, rightToLeft=FALSE;
    207 
    208     b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned,parseError, status);
    209 
    210     if(status == U_BUFFER_OVERFLOW_ERROR){
    211         // redo processing of string
    212         /* we do not have enough room so grow the buffer*/
    213         if(!u_growBufferFromStatic(b1Stack,&b1,&b1Capacity,b1Len,0)){
    214             status = U_MEMORY_ALLOCATION_ERROR;
    215             goto CLEANUP;
    216         }
    217 
    218         status = U_ZERO_ERROR; // reset error
    219 
    220         b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status);
    221 
    222     }
    223 
    224     if(U_FAILURE(status)){
    225         goto CLEANUP;
    226     }
    227 
    228 
    229     for(; b1Index<b1Len; ){
    230 
    231         UChar32 ch = 0;
    232 
    233         U16_NEXT(b1, b1Index, b1Len, ch);
    234 
    235         if(prohibited.contains(ch) && ch!=0x0020){
    236             status = U_IDNA_PROHIBITED_ERROR;
    237             goto CLEANUP;
    238         }
    239 
    240         direction = u_charDirection(ch);
    241         if(firstCharDir==U_CHAR_DIRECTION_COUNT){
    242             firstCharDir = direction;
    243         }
    244         if(direction == U_LEFT_TO_RIGHT){
    245             leftToRight = TRUE;
    246         }
    247         if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    248             rightToLeft = TRUE;
    249         }
    250     }
    251 
    252     // satisfy 2
    253     if( leftToRight == TRUE && rightToLeft == TRUE){
    254         status = U_IDNA_CHECK_BIDI_ERROR;
    255         goto CLEANUP;
    256     }
    257 
    258     //satisfy 3
    259     if( rightToLeft == TRUE &&
    260         !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    261           (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    262        ){
    263         status = U_IDNA_CHECK_BIDI_ERROR;
    264         return FALSE;
    265     }
    266 
    267     if(b1Len <= destCapacity){
    268         uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR);
    269     }
    270 
    271 CLEANUP:
    272     if(b1!=b1Stack){
    273         uprv_free(b1);
    274     }
    275 
    276     return u_terminateUChars(dest, destCapacity, b1Len, &status);
    277 }
    278 
    279 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
    280     // check error status
    281     if(U_FAILURE(status)){
    282         return FALSE;
    283     }
    284 
    285     return labelSeparatorSet.contains(ch);
    286 }
    287 
    288 #endif /* #if !UCONFIG_NO_IDNA */
    289 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    290