1 /* 2 ******************************************************************************* 3 * Copyright (C) 1997-2009,2014 International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * Date Name Description 7 * 06/21/00 aliu Creation. 8 ******************************************************************************* 9 */ 10 11 #include "unicode/utypes.h" 12 13 #if !UCONFIG_NO_TRANSLITERATION 14 15 #include "unicode/utrans.h" 16 #include "unicode/putil.h" 17 #include "unicode/rep.h" 18 #include "unicode/translit.h" 19 #include "unicode/unifilt.h" 20 #include "unicode/uniset.h" 21 #include "unicode/ustring.h" 22 #include "unicode/uenum.h" 23 #include "unicode/uset.h" 24 #include "uenumimp.h" 25 #include "cpputils.h" 26 #include "rbt.h" 27 28 // Following macro is to be followed by <return value>';' or just ';' 29 #define utrans_ENTRY(s) if ((s)==NULL || U_FAILURE(*(s))) return 30 31 /******************************************************************** 32 * Replaceable-UReplaceableCallbacks glue 33 ********************************************************************/ 34 35 /** 36 * Make a UReplaceable + UReplaceableCallbacks into a Replaceable object. 37 */ 38 U_NAMESPACE_BEGIN 39 class ReplaceableGlue : public Replaceable { 40 41 UReplaceable *rep; 42 UReplaceableCallbacks *func; 43 44 public: 45 46 ReplaceableGlue(UReplaceable *replaceable, 47 UReplaceableCallbacks *funcCallback); 48 49 virtual ~ReplaceableGlue(); 50 51 virtual void handleReplaceBetween(int32_t start, 52 int32_t limit, 53 const UnicodeString& text); 54 55 virtual void extractBetween(int32_t start, 56 int32_t limit, 57 UnicodeString& target) const; 58 59 virtual void copy(int32_t start, int32_t limit, int32_t dest); 60 61 // virtual Replaceable *clone() const { return NULL; } same as default 62 63 /** 64 * ICU "poor man's RTTI", returns a UClassID for the actual class. 65 * 66 * @draft ICU 2.2 67 */ 68 virtual UClassID getDynamicClassID() const; 69 70 /** 71 * ICU "poor man's RTTI", returns a UClassID for this class. 72 * 73 * @draft ICU 2.2 74 */ 75 static UClassID U_EXPORT2 getStaticClassID(); 76 77 protected: 78 79 virtual int32_t getLength() const; 80 81 virtual UChar getCharAt(int32_t offset) const; 82 83 virtual UChar32 getChar32At(int32_t offset) const; 84 }; 85 86 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) 87 88 ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable, 89 UReplaceableCallbacks *funcCallback) 90 : Replaceable() 91 { 92 this->rep = replaceable; 93 this->func = funcCallback; 94 } 95 96 ReplaceableGlue::~ReplaceableGlue() {} 97 98 int32_t ReplaceableGlue::getLength() const { 99 return (*func->length)(rep); 100 } 101 102 UChar ReplaceableGlue::getCharAt(int32_t offset) const { 103 return (*func->charAt)(rep, offset); 104 } 105 106 UChar32 ReplaceableGlue::getChar32At(int32_t offset) const { 107 return (*func->char32At)(rep, offset); 108 } 109 110 void ReplaceableGlue::handleReplaceBetween(int32_t start, 111 int32_t limit, 112 const UnicodeString& text) { 113 (*func->replace)(rep, start, limit, text.getBuffer(), text.length()); 114 } 115 116 void ReplaceableGlue::extractBetween(int32_t start, 117 int32_t limit, 118 UnicodeString& target) const { 119 (*func->extract)(rep, start, limit, target.getBuffer(limit-start)); 120 target.releaseBuffer(limit-start); 121 } 122 123 void ReplaceableGlue::copy(int32_t start, int32_t limit, int32_t dest) { 124 (*func->copy)(rep, start, limit, dest); 125 } 126 U_NAMESPACE_END 127 /******************************************************************** 128 * General API 129 ********************************************************************/ 130 U_NAMESPACE_USE 131 132 U_CAPI UTransliterator* U_EXPORT2 133 utrans_openU(const UChar *id, 134 int32_t idLength, 135 UTransDirection dir, 136 const UChar *rules, 137 int32_t rulesLength, 138 UParseError *parseError, 139 UErrorCode *status) { 140 if(status==NULL || U_FAILURE(*status)) { 141 return NULL; 142 } 143 if (id == NULL) { 144 *status = U_ILLEGAL_ARGUMENT_ERROR; 145 return NULL; 146 } 147 UParseError temp; 148 149 if(parseError == NULL){ 150 parseError = &temp; 151 } 152 153 UnicodeString ID(idLength<0, id, idLength); // r-o alias 154 155 if(rules==NULL){ 156 157 Transliterator *trans = NULL; 158 159 trans = Transliterator::createInstance(ID, dir, *parseError, *status); 160 161 if(U_FAILURE(*status)){ 162 return NULL; 163 } 164 return (UTransliterator*) trans; 165 }else{ 166 UnicodeString ruleStr(rulesLength < 0, 167 rules, 168 rulesLength); // r-o alias 169 170 Transliterator *trans = NULL; 171 trans = Transliterator::createFromRules(ID, ruleStr, dir, *parseError, *status); 172 if(U_FAILURE(*status)) { 173 return NULL; 174 } 175 176 return (UTransliterator*) trans; 177 } 178 } 179 180 U_CAPI UTransliterator* U_EXPORT2 181 utrans_open(const char* id, 182 UTransDirection dir, 183 const UChar* rules, /* may be Null */ 184 int32_t rulesLength, /* -1 if null-terminated */ 185 UParseError* parseError, /* may be Null */ 186 UErrorCode* status) { 187 UnicodeString ID(id, -1, US_INV); // use invariant converter 188 return utrans_openU(ID.getBuffer(), ID.length(), dir, 189 rules, rulesLength, 190 parseError, status); 191 } 192 193 U_CAPI UTransliterator* U_EXPORT2 194 utrans_openInverse(const UTransliterator* trans, 195 UErrorCode* status) { 196 197 utrans_ENTRY(status) NULL; 198 199 UTransliterator* result = 200 (UTransliterator*) ((Transliterator*) trans)->createInverse(*status); 201 202 return result; 203 } 204 205 U_CAPI UTransliterator* U_EXPORT2 206 utrans_clone(const UTransliterator* trans, 207 UErrorCode* status) { 208 209 utrans_ENTRY(status) NULL; 210 211 if (trans == NULL) { 212 *status = U_ILLEGAL_ARGUMENT_ERROR; 213 return NULL; 214 } 215 216 Transliterator *t = ((Transliterator*) trans)->clone(); 217 if (t == NULL) { 218 *status = U_MEMORY_ALLOCATION_ERROR; 219 } 220 return (UTransliterator*) t; 221 } 222 223 U_CAPI void U_EXPORT2 224 utrans_close(UTransliterator* trans) { 225 delete (Transliterator*) trans; 226 } 227 228 U_CAPI const UChar * U_EXPORT2 229 utrans_getUnicodeID(const UTransliterator *trans, 230 int32_t *resultLength) { 231 // Transliterator keeps its ID NUL-terminated 232 const UnicodeString &ID=((Transliterator*) trans)->getID(); 233 if(resultLength!=NULL) { 234 *resultLength=ID.length(); 235 } 236 return ID.getBuffer(); 237 } 238 239 U_CAPI int32_t U_EXPORT2 240 utrans_getID(const UTransliterator* trans, 241 char* buf, 242 int32_t bufCapacity) { 243 return ((Transliterator*) trans)->getID().extract(0, 0x7fffffff, buf, bufCapacity, US_INV); 244 } 245 246 U_CAPI void U_EXPORT2 247 utrans_register(UTransliterator* adoptedTrans, 248 UErrorCode* status) { 249 utrans_ENTRY(status); 250 // status currently ignored; may remove later 251 Transliterator::registerInstance((Transliterator*) adoptedTrans); 252 } 253 254 U_CAPI void U_EXPORT2 255 utrans_unregisterID(const UChar* id, int32_t idLength) { 256 UnicodeString ID(idLength<0, id, idLength); // r-o alias 257 Transliterator::unregister(ID); 258 } 259 260 U_CAPI void U_EXPORT2 261 utrans_unregister(const char* id) { 262 UnicodeString ID(id, -1, US_INV); // use invariant converter 263 Transliterator::unregister(ID); 264 } 265 266 U_CAPI void U_EXPORT2 267 utrans_setFilter(UTransliterator* trans, 268 const UChar* filterPattern, 269 int32_t filterPatternLen, 270 UErrorCode* status) { 271 272 utrans_ENTRY(status); 273 UnicodeFilter* filter = NULL; 274 if (filterPattern != NULL && *filterPattern != 0) { 275 // Create read only alias of filterPattern: 276 UnicodeString pat(filterPatternLen < 0, filterPattern, filterPatternLen); 277 filter = new UnicodeSet(pat, *status); 278 /* test for NULL */ 279 if (filter == NULL) { 280 *status = U_MEMORY_ALLOCATION_ERROR; 281 return; 282 } 283 if (U_FAILURE(*status)) { 284 delete filter; 285 filter = NULL; 286 } 287 } 288 ((Transliterator*) trans)->adoptFilter(filter); 289 } 290 291 U_CAPI int32_t U_EXPORT2 292 utrans_countAvailableIDs(void) { 293 return Transliterator::countAvailableIDs(); 294 } 295 296 U_CAPI int32_t U_EXPORT2 297 utrans_getAvailableID(int32_t index, 298 char* buf, // may be NULL 299 int32_t bufCapacity) { 300 return Transliterator::getAvailableID(index).extract(0, 0x7fffffff, buf, bufCapacity, US_INV); 301 } 302 303 /* Transliterator UEnumeration ---------------------------------------------- */ 304 305 typedef struct UTransEnumeration { 306 UEnumeration uenum; 307 int32_t index, count; 308 } UTransEnumeration; 309 310 U_CDECL_BEGIN 311 static int32_t U_CALLCONV 312 utrans_enum_count(UEnumeration *uenum, UErrorCode *pErrorCode) { 313 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 314 return 0; 315 } 316 return ((UTransEnumeration *)uenum)->count; 317 } 318 319 static const UChar* U_CALLCONV 320 utrans_enum_unext(UEnumeration *uenum, 321 int32_t* resultLength, 322 UErrorCode *pErrorCode) { 323 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 324 return 0; 325 } 326 327 UTransEnumeration *ute=(UTransEnumeration *)uenum; 328 int32_t index=ute->index; 329 if(index<ute->count) { 330 const UnicodeString &ID=Transliterator::getAvailableID(index); 331 ute->index=index+1; 332 if(resultLength!=NULL) { 333 *resultLength=ID.length(); 334 } 335 // Transliterator keeps its ID NUL-terminated 336 return ID.getBuffer(); 337 } 338 339 if(resultLength!=NULL) { 340 *resultLength=0; 341 } 342 return NULL; 343 } 344 345 static void U_CALLCONV 346 utrans_enum_reset(UEnumeration *uenum, UErrorCode *pErrorCode) { 347 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 348 return; 349 } 350 351 UTransEnumeration *ute=(UTransEnumeration *)uenum; 352 ute->index=0; 353 ute->count=Transliterator::countAvailableIDs(); 354 } 355 356 static void U_CALLCONV 357 utrans_enum_close(UEnumeration *uenum) { 358 uprv_free(uenum); 359 } 360 U_CDECL_END 361 362 static const UEnumeration utransEnumeration={ 363 NULL, 364 NULL, 365 utrans_enum_close, 366 utrans_enum_count, 367 utrans_enum_unext, 368 uenum_nextDefault, 369 utrans_enum_reset 370 }; 371 372 U_CAPI UEnumeration * U_EXPORT2 373 utrans_openIDs(UErrorCode *pErrorCode) { 374 UTransEnumeration *ute; 375 376 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 377 return NULL; 378 } 379 380 ute=(UTransEnumeration *)uprv_malloc(sizeof(UTransEnumeration)); 381 if(ute==NULL) { 382 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 383 return NULL; 384 } 385 386 ute->uenum=utransEnumeration; 387 ute->index=0; 388 ute->count=Transliterator::countAvailableIDs(); 389 return (UEnumeration *)ute; 390 } 391 392 /******************************************************************** 393 * Transliteration API 394 ********************************************************************/ 395 396 U_CAPI void U_EXPORT2 397 utrans_trans(const UTransliterator* trans, 398 UReplaceable* rep, 399 UReplaceableCallbacks* repFunc, 400 int32_t start, 401 int32_t* limit, 402 UErrorCode* status) { 403 404 utrans_ENTRY(status); 405 406 if (trans == 0 || rep == 0 || repFunc == 0 || limit == 0) { 407 *status = U_ILLEGAL_ARGUMENT_ERROR; 408 return; 409 } 410 411 ReplaceableGlue r(rep, repFunc); 412 413 *limit = ((Transliterator*) trans)->transliterate(r, start, *limit); 414 } 415 416 U_CAPI void U_EXPORT2 417 utrans_transIncremental(const UTransliterator* trans, 418 UReplaceable* rep, 419 UReplaceableCallbacks* repFunc, 420 UTransPosition* pos, 421 UErrorCode* status) { 422 423 utrans_ENTRY(status); 424 425 if (trans == 0 || rep == 0 || repFunc == 0 || pos == 0) { 426 *status = U_ILLEGAL_ARGUMENT_ERROR; 427 return; 428 } 429 430 ReplaceableGlue r(rep, repFunc); 431 432 ((Transliterator*) trans)->transliterate(r, *pos, *status); 433 } 434 435 U_CAPI void U_EXPORT2 436 utrans_transUChars(const UTransliterator* trans, 437 UChar* text, 438 int32_t* textLength, 439 int32_t textCapacity, 440 int32_t start, 441 int32_t* limit, 442 UErrorCode* status) { 443 444 utrans_ENTRY(status); 445 446 if (trans == 0 || text == 0 || limit == 0) { 447 *status = U_ILLEGAL_ARGUMENT_ERROR; 448 return; 449 } 450 451 int32_t textLen = (textLength == NULL || *textLength < 0) 452 ? u_strlen(text) : *textLength; 453 // writeable alias: for this ct, len CANNOT be -1 (why?) 454 UnicodeString str(text, textLen, textCapacity); 455 456 *limit = ((Transliterator*) trans)->transliterate(str, start, *limit); 457 458 // Copy the string buffer back to text (only if necessary) 459 // and fill in *neededCapacity (if neededCapacity != NULL). 460 textLen = str.extract(text, textCapacity, *status); 461 if(textLength != NULL) { 462 *textLength = textLen; 463 } 464 } 465 466 U_CAPI void U_EXPORT2 467 utrans_transIncrementalUChars(const UTransliterator* trans, 468 UChar* text, 469 int32_t* textLength, 470 int32_t textCapacity, 471 UTransPosition* pos, 472 UErrorCode* status) { 473 474 utrans_ENTRY(status); 475 476 if (trans == 0 || text == 0 || pos == 0) { 477 *status = U_ILLEGAL_ARGUMENT_ERROR; 478 return; 479 } 480 481 int32_t textLen = (textLength == NULL || *textLength < 0) 482 ? u_strlen(text) : *textLength; 483 // writeable alias: for this ct, len CANNOT be -1 (why?) 484 UnicodeString str(text, textLen, textCapacity); 485 486 ((Transliterator*) trans)->transliterate(str, *pos, *status); 487 488 // Copy the string buffer back to text (only if necessary) 489 // and fill in *neededCapacity (if neededCapacity != NULL). 490 textLen = str.extract(text, textCapacity, *status); 491 if(textLength != NULL) { 492 *textLength = textLen; 493 } 494 } 495 496 U_CAPI int32_t U_EXPORT2 497 utrans_toRules( const UTransliterator* trans, 498 UBool escapeUnprintable, 499 UChar* result, int32_t resultLength, 500 UErrorCode* status) { 501 utrans_ENTRY(status) 0; 502 if ( (result==NULL)? resultLength!=0: resultLength<0 ) { 503 *status = U_ILLEGAL_ARGUMENT_ERROR; 504 return 0; 505 } 506 507 UnicodeString res; 508 res.setTo(result, 0, resultLength); 509 ((Transliterator*) trans)->toRules(res, escapeUnprintable); 510 return res.extract(result, resultLength, *status); 511 } 512 513 U_CAPI USet* U_EXPORT2 514 utrans_getSourceSet(const UTransliterator* trans, 515 UBool ignoreFilter, 516 USet* fillIn, 517 UErrorCode* status) { 518 utrans_ENTRY(status) fillIn; 519 520 if (fillIn == NULL) { 521 fillIn = uset_openEmpty(); 522 } 523 if (ignoreFilter) { 524 ((Transliterator*) trans)->handleGetSourceSet(*((UnicodeSet*)fillIn)); 525 } else { 526 ((Transliterator*) trans)->getSourceSet(*((UnicodeSet*)fillIn)); 527 } 528 return fillIn; 529 } 530 531 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 532