Home | History | Annotate | Download | only in intltest
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2010, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  nptrans.h
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003feb1
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_TRANSLITERATION
     20 #if !UCONFIG_NO_IDNA
     21 
     22 #include "nptrans.h"
     23 #include "unicode/resbund.h"
     24 #include "unicode/uniset.h"
     25 #include "sprpimpl.h"
     26 #include "cmemory.h"
     27 #include "ustr_imp.h"
     28 #include "intltest.h"
     29 
     30 #ifdef DEBUG
     31 #include <stdio.h>
     32 #endif
     33 
     34 const char NamePrepTransform::fgClassID=0;
     35 
     36 //Factory method
     37 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
     38     NamePrepTransform* transform = new NamePrepTransform(parseError, status);
     39     if(U_FAILURE(status)){
     40         delete transform;
     41         return NULL;
     42     }
     43     return transform;
     44 }
     45 
     46 //constructor
     47 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
     48 : unassigned(), prohibited(), labelSeparatorSet(){
     49 
     50     mapping = NULL;
     51     bundle = NULL;
     52 
     53 
     54     const char* testDataName = IntlTest::loadTestData(status);
     55 
     56     if(U_FAILURE(status)){
     57         return;
     58     }
     59 
     60     bundle = ures_openDirect(testDataName,"idna_rules",&status);
     61 
     62     if(bundle != NULL && U_SUCCESS(status)){
     63         // create the mapping transliterator
     64         int32_t ruleLen = 0;
     65         const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
     66         int32_t mapRuleLen = 0;
     67         const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
     68         UnicodeString rule(mapRuleUChar, mapRuleLen);
     69         rule.append(ruleUChar, ruleLen);
     70 
     71         mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
     72                                                    UTRANS_FORWARD, parseError,status);
     73         if(U_FAILURE(status)) {
     74           return;
     75         }
     76 
     77         //create the unassigned set
     78         int32_t patternLen =0;
     79         const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
     80         unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
     81 
     82         //create prohibited set
     83         patternLen=0;
     84         pattern =  ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
     85         UnicodeString test(pattern,patternLen);
     86         prohibited.applyPattern(test,status);
     87 #ifdef DEBUG
     88         if(U_FAILURE(status)){
     89             printf("Construction of Unicode set failed\n");
     90         }
     91 
     92         if(U_SUCCESS(status)){
     93             if(prohibited.contains((UChar) 0x644)){
     94                 printf("The string contains 0x644 ... damn !!\n");
     95             }
     96             UnicodeString temp;
     97             prohibited.toPattern(temp,TRUE);
     98 
     99             for(int32_t i=0;i<temp.length();i++){
    100                 printf("%c", (char)temp.charAt(i));
    101             }
    102             printf("\n");
    103         }
    104 #endif
    105 
    106         //create label separator set
    107         patternLen=0;
    108         pattern =  ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
    109         labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
    110     }
    111 
    112     if(U_SUCCESS(status) &&
    113         (mapping == NULL)
    114       ){
    115         status = U_MEMORY_ALLOCATION_ERROR;
    116         delete mapping;
    117         ures_close(bundle);
    118         mapping = NULL;
    119         bundle = NULL;
    120     }
    121 
    122 }
    123 
    124 
    125 UBool NamePrepTransform::isProhibited(UChar32 ch){
    126     return (UBool)(ch != ASCII_SPACE);
    127 }
    128 
    129 NamePrepTransform::~NamePrepTransform(){
    130     delete mapping;
    131     mapping = NULL;
    132 
    133     //close the bundle
    134     ures_close(bundle);
    135     bundle = NULL;
    136 }
    137 
    138 
    139 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
    140                         UChar* dest, int32_t destCapacity,
    141                         UBool allowUnassigned,
    142                         UParseError* /*parseError*/,
    143                         UErrorCode& status ){
    144 
    145     if(U_FAILURE(status)){
    146         return 0;
    147     }
    148     //check arguments
    149     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    150         status=U_ILLEGAL_ARGUMENT_ERROR;
    151         return 0;
    152     }
    153 
    154     UnicodeString rsource(src,srcLength);
    155     // map the code points
    156     // transliteration also performs NFKC
    157     mapping->transliterate(rsource);
    158 
    159     const UChar* buffer = rsource.getBuffer();
    160     int32_t bufLen = rsource.length();
    161     // check if unassigned
    162     if(allowUnassigned == FALSE){
    163         int32_t bufIndex=0;
    164         UChar32 ch =0 ;
    165         for(;bufIndex<bufLen;){
    166             U16_NEXT(buffer, bufIndex, bufLen, ch);
    167             if(unassigned.contains(ch)){
    168                 status = U_IDNA_UNASSIGNED_ERROR;
    169                 return 0;
    170             }
    171         }
    172     }
    173     // check if there is enough room in the output
    174     if(bufLen < destCapacity){
    175         uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR);
    176     }
    177 
    178     return u_terminateUChars(dest, destCapacity, bufLen, &status);
    179 }
    180 
    181 
    182 #define MAX_BUFFER_SIZE 300
    183 
    184 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
    185                                     UChar* dest, int32_t destCapacity,
    186                                     UBool allowUnassigned,
    187                                     UParseError* parseError,
    188                                     UErrorCode& status ){
    189     // check error status
    190     if(U_FAILURE(status)){
    191         return 0;
    192     }
    193 
    194     //check arguments
    195     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
    196         status=U_ILLEGAL_ARGUMENT_ERROR;
    197         return 0;
    198     }
    199 
    200     UnicodeString b1String;
    201     UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
    202     int32_t b1Len;
    203 
    204     int32_t b1Index = 0;
    205     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    206     UBool leftToRight=FALSE, rightToLeft=FALSE;
    207 
    208     b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
    209     b1String.releaseBuffer(b1Len);
    210 
    211     if(status == U_BUFFER_OVERFLOW_ERROR){
    212         // redo processing of string
    213         /* we do not have enough room so grow the buffer*/
    214         b1 = b1String.getBuffer(b1Len);
    215         status = U_ZERO_ERROR; // reset error
    216         b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
    217         b1String.releaseBuffer(b1Len);
    218     }
    219 
    220     if(U_FAILURE(status)){
    221         b1Len = 0;
    222         goto CLEANUP;
    223     }
    224 
    225 
    226     for(; b1Index<b1Len; ){
    227 
    228         UChar32 ch = 0;
    229 
    230         U16_NEXT(b1, b1Index, b1Len, ch);
    231 
    232         if(prohibited.contains(ch) && ch!=0x0020){
    233             status = U_IDNA_PROHIBITED_ERROR;
    234             b1Len = 0;
    235             goto CLEANUP;
    236         }
    237 
    238         direction = u_charDirection(ch);
    239         if(firstCharDir==U_CHAR_DIRECTION_COUNT){
    240             firstCharDir = direction;
    241         }
    242         if(direction == U_LEFT_TO_RIGHT){
    243             leftToRight = TRUE;
    244         }
    245         if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    246             rightToLeft = TRUE;
    247         }
    248     }
    249 
    250     // satisfy 2
    251     if( leftToRight == TRUE && rightToLeft == TRUE){
    252         status = U_IDNA_CHECK_BIDI_ERROR;
    253         b1Len = 0;
    254         goto CLEANUP;
    255     }
    256 
    257     //satisfy 3
    258     if( rightToLeft == TRUE &&
    259         !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    260           (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    261        ){
    262         status = U_IDNA_CHECK_BIDI_ERROR;
    263         return FALSE;
    264     }
    265 
    266     if(b1Len <= destCapacity){
    267         uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR);
    268     }
    269 
    270 CLEANUP:
    271     return u_terminateUChars(dest, destCapacity, b1Len, &status);
    272 }
    273 
    274 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
    275     // check error status
    276     if(U_FAILURE(status)){
    277         return FALSE;
    278     }
    279 
    280     return labelSeparatorSet.contains(ch);
    281 }
    282 
    283 #endif /* #if !UCONFIG_NO_IDNA */
    284 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    285