1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: nptrans.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003feb1 14 * created by: Ram Viswanadha 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_TRANSLITERATION 20 #if !UCONFIG_NO_IDNA 21 22 #include "nptrans.h" 23 #include "unicode/resbund.h" 24 #include "unicode/uniset.h" 25 #include "sprpimpl.h" 26 #include "cmemory.h" 27 #include "ustr_imp.h" 28 #include "intltest.h" 29 30 #ifdef DEBUG 31 #include <stdio.h> 32 #endif 33 34 const char NamePrepTransform::fgClassID=0; 35 36 //Factory method 37 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){ 38 NamePrepTransform* transform = new NamePrepTransform(parseError, status); 39 if(U_FAILURE(status)){ 40 delete transform; 41 return NULL; 42 } 43 return transform; 44 } 45 46 //constructor 47 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status) 48 : unassigned(), prohibited(), labelSeparatorSet(){ 49 50 mapping = NULL; 51 bundle = NULL; 52 53 54 const char* testDataName = IntlTest::loadTestData(status); 55 56 if(U_FAILURE(status)){ 57 return; 58 } 59 60 bundle = ures_openDirect(testDataName,"idna_rules",&status); 61 62 if(bundle != NULL && U_SUCCESS(status)){ 63 // create the mapping transliterator 64 int32_t ruleLen = 0; 65 const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status); 66 int32_t mapRuleLen = 0; 67 const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status); 68 UnicodeString rule(mapRuleUChar, mapRuleLen); 69 rule.append(ruleUChar, ruleLen); 70 71 mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule, 72 UTRANS_FORWARD, parseError,status); 73 if(U_FAILURE(status)) { 74 return; 75 } 76 77 //create the unassigned set 78 int32_t patternLen =0; 79 const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status); 80 unassigned.applyPattern(UnicodeString(pattern, patternLen), status); 81 82 //create prohibited set 83 patternLen=0; 84 pattern = ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status); 85 UnicodeString test(pattern,patternLen); 86 prohibited.applyPattern(test,status); 87 #ifdef DEBUG 88 if(U_FAILURE(status)){ 89 printf("Construction of Unicode set failed\n"); 90 } 91 92 if(U_SUCCESS(status)){ 93 if(prohibited.contains((UChar) 0x644)){ 94 printf("The string contains 0x644 ... damn !!\n"); 95 } 96 UnicodeString temp; 97 prohibited.toPattern(temp,TRUE); 98 99 for(int32_t i=0;i<temp.length();i++){ 100 printf("%c", (char)temp.charAt(i)); 101 } 102 printf("\n"); 103 } 104 #endif 105 106 //create label separator set 107 patternLen=0; 108 pattern = ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status); 109 labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status); 110 } 111 112 if(U_SUCCESS(status) && 113 (mapping == NULL) 114 ){ 115 status = U_MEMORY_ALLOCATION_ERROR; 116 delete mapping; 117 ures_close(bundle); 118 mapping = NULL; 119 bundle = NULL; 120 } 121 122 } 123 124 125 UBool NamePrepTransform::isProhibited(UChar32 ch){ 126 return (UBool)(ch != ASCII_SPACE); 127 } 128 129 NamePrepTransform::~NamePrepTransform(){ 130 delete mapping; 131 mapping = NULL; 132 133 //close the bundle 134 ures_close(bundle); 135 bundle = NULL; 136 } 137 138 139 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength, 140 UChar* dest, int32_t destCapacity, 141 UBool allowUnassigned, 142 UParseError* /*parseError*/, 143 UErrorCode& status ){ 144 145 if(U_FAILURE(status)){ 146 return 0; 147 } 148 //check arguments 149 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 150 status=U_ILLEGAL_ARGUMENT_ERROR; 151 return 0; 152 } 153 154 UnicodeString rsource(src,srcLength); 155 // map the code points 156 // transliteration also performs NFKC 157 mapping->transliterate(rsource); 158 159 const UChar* buffer = rsource.getBuffer(); 160 int32_t bufLen = rsource.length(); 161 // check if unassigned 162 if(allowUnassigned == FALSE){ 163 int32_t bufIndex=0; 164 UChar32 ch =0 ; 165 for(;bufIndex<bufLen;){ 166 U16_NEXT(buffer, bufIndex, bufLen, ch); 167 if(unassigned.contains(ch)){ 168 status = U_IDNA_UNASSIGNED_ERROR; 169 return 0; 170 } 171 } 172 } 173 // check if there is enough room in the output 174 if(bufLen < destCapacity){ 175 uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR); 176 } 177 178 return u_terminateUChars(dest, destCapacity, bufLen, &status); 179 } 180 181 182 #define MAX_BUFFER_SIZE 300 183 184 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, 185 UChar* dest, int32_t destCapacity, 186 UBool allowUnassigned, 187 UParseError* parseError, 188 UErrorCode& status ){ 189 // check error status 190 if(U_FAILURE(status)){ 191 return 0; 192 } 193 194 //check arguments 195 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 196 status=U_ILLEGAL_ARGUMENT_ERROR; 197 return 0; 198 } 199 200 UnicodeString b1String; 201 UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE); 202 int32_t b1Len; 203 204 int32_t b1Index = 0; 205 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 206 UBool leftToRight=FALSE, rightToLeft=FALSE; 207 208 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status); 209 b1String.releaseBuffer(b1Len); 210 211 if(status == U_BUFFER_OVERFLOW_ERROR){ 212 // redo processing of string 213 /* we do not have enough room so grow the buffer*/ 214 b1 = b1String.getBuffer(b1Len); 215 status = U_ZERO_ERROR; // reset error 216 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status); 217 b1String.releaseBuffer(b1Len); 218 } 219 220 if(U_FAILURE(status)){ 221 b1Len = 0; 222 goto CLEANUP; 223 } 224 225 226 for(; b1Index<b1Len; ){ 227 228 UChar32 ch = 0; 229 230 U16_NEXT(b1, b1Index, b1Len, ch); 231 232 if(prohibited.contains(ch) && ch!=0x0020){ 233 status = U_IDNA_PROHIBITED_ERROR; 234 b1Len = 0; 235 goto CLEANUP; 236 } 237 238 direction = u_charDirection(ch); 239 if(firstCharDir==U_CHAR_DIRECTION_COUNT){ 240 firstCharDir = direction; 241 } 242 if(direction == U_LEFT_TO_RIGHT){ 243 leftToRight = TRUE; 244 } 245 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 246 rightToLeft = TRUE; 247 } 248 } 249 250 // satisfy 2 251 if( leftToRight == TRUE && rightToLeft == TRUE){ 252 status = U_IDNA_CHECK_BIDI_ERROR; 253 b1Len = 0; 254 goto CLEANUP; 255 } 256 257 //satisfy 3 258 if( rightToLeft == TRUE && 259 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 260 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 261 ){ 262 status = U_IDNA_CHECK_BIDI_ERROR; 263 return FALSE; 264 } 265 266 if(b1Len <= destCapacity){ 267 uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR); 268 } 269 270 CLEANUP: 271 return u_terminateUChars(dest, destCapacity, b1Len, &status); 272 } 273 274 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){ 275 // check error status 276 if(U_FAILURE(status)){ 277 return FALSE; 278 } 279 280 return labelSeparatorSet.contains(ch); 281 } 282 283 #endif /* #if !UCONFIG_NO_IDNA */ 284 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 285