1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2006, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 * 8 ******************************************************************************* 9 */ 10 /* 11 * (C) Copyright IBM Corp. 2000 - All Rights Reserved 12 * A JNI wrapper to ICU native converter Interface 13 * @author: Ram Viswanadha 14 */ 15 16 #define LOG_TAG "NativeConverter" 17 18 #include "IcuUtilities.h" 19 #include "JNIHelp.h" 20 #include "JniConstants.h" 21 #include "JniException.h" 22 #include "ScopedLocalRef.h" 23 #include "ScopedPrimitiveArray.h" 24 #include "ScopedStringChars.h" 25 #include "ScopedUtfChars.h" 26 #include "cutils/log.h" 27 #include "toStringArray.h" 28 #include "unicode/ucnv.h" 29 #include "unicode/ucnv_cb.h" 30 #include "unicode/uniset.h" 31 #include "unicode/ustring.h" 32 #include "unicode/utypes.h" 33 34 #include <memory> 35 #include <vector> 36 37 #include <stdlib.h> 38 #include <string.h> 39 40 #define NativeConverter_REPORT 0 41 #define NativeConverter_IGNORE 1 42 #define NativeConverter_REPLACE 2 43 44 #define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH 45 46 struct DecoderCallbackContext { 47 UChar replacementChars[MAX_REPLACEMENT_LENGTH]; 48 size_t replacementCharCount; 49 UConverterToUCallback onUnmappableInput; 50 UConverterToUCallback onMalformedInput; 51 }; 52 53 struct EncoderCallbackContext { 54 char replacementBytes[MAX_REPLACEMENT_LENGTH]; 55 size_t replacementByteCount; 56 UConverterFromUCallback onUnmappableInput; 57 UConverterFromUCallback onMalformedInput; 58 }; 59 60 static UConverter* toUConverter(jlong address) { 61 return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address)); 62 } 63 64 static bool collectStandardNames(JNIEnv* env, const char* canonicalName, const char* standard, 65 std::vector<std::string>& result) { 66 UErrorCode status = U_ZERO_ERROR; 67 icu::UStringEnumeration e(ucnv_openStandardNames(canonicalName, standard, &status)); 68 if (maybeThrowIcuException(env, "ucnv_openStandardNames", status)) { 69 return false; 70 } 71 72 int32_t count = e.count(status); 73 if (maybeThrowIcuException(env, "StringEnumeration::count", status)) { 74 return false; 75 } 76 77 for (int32_t i = 0; i < count; ++i) { 78 const icu::UnicodeString* string = e.snext(status); 79 if (maybeThrowIcuException(env, "StringEnumeration::snext", status)) { 80 return false; 81 } 82 std::string s; 83 string->toUTF8String(s); 84 if (s.find_first_of("+,") == std::string::npos) { 85 result.push_back(s); 86 } 87 } 88 89 return true; 90 } 91 92 static const char* getICUCanonicalName(const char* name) { 93 UErrorCode error = U_ZERO_ERROR; 94 const char* canonicalName = NULL; 95 if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) { 96 return canonicalName; 97 } else if ((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) { 98 return canonicalName; 99 } else if ((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) { 100 return canonicalName; 101 } else if ((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) { 102 // We have some aliases in the form x-blah .. match those first. 103 return canonicalName; 104 } else if (strstr(name, "x-") == name) { 105 // Check if the converter can be opened with the name given. 106 error = U_ZERO_ERROR; 107 icu::LocalUConverterPointer cnv(ucnv_open(name + 2, &error)); 108 if (U_SUCCESS(error)) { 109 return name + 2; 110 } 111 } 112 return NULL; 113 } 114 115 // If a charset listed in the IANA Charset Registry is supported by an implementation 116 // of the Java platform then its canonical name must be the name listed in the registry. 117 // Many charsets are given more than one name in the registry, in which case the registry 118 // identifies one of the names as MIME-preferred. If a charset has more than one registry 119 // name then its canonical name must be the MIME-preferred name and the other names in 120 // the registry must be valid aliases. If a supported charset is not listed in the IANA 121 // registry then its canonical name must begin with one of the strings "X-" or "x-". 122 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) { 123 UErrorCode status = U_ZERO_ERROR; 124 125 // Check to see if this is a well-known MIME or IANA name. 126 const char* cName = NULL; 127 if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) { 128 return env->NewStringUTF(cName); 129 } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) { 130 return env->NewStringUTF(cName); 131 } 132 133 // Check to see if an alias already exists with "x-" prefix, if yes then 134 // make that the canonical name. 135 int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status); 136 for (int i = 0; i < aliasCount; ++i) { 137 const char* name = ucnv_getAlias(icuCanonicalName, i, &status); 138 if (name != NULL && name[0] == 'x' && name[1] == '-') { 139 return env->NewStringUTF(name); 140 } 141 } 142 143 // As a last resort, prepend "x-" to any alias and make that the canonical name. 144 status = U_ZERO_ERROR; 145 const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status); 146 if (name == NULL && strchr(icuCanonicalName, ',') != NULL) { 147 name = ucnv_getAlias(icuCanonicalName, 1, &status); 148 } 149 // If there is no UTR22 canonical name then just return the original name. 150 if (name == NULL) { 151 name = icuCanonicalName; 152 } 153 std::unique_ptr<char[]> result(new char[2 + strlen(name) + 1]); 154 strcpy(&result[0], "x-"); 155 strcat(&result[0], name); 156 return env->NewStringUTF(&result[0]); 157 } 158 159 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) { 160 ScopedUtfChars converterNameChars(env, converterName); 161 if (converterNameChars.c_str() == NULL) { 162 return 0; 163 } 164 UErrorCode status = U_ZERO_ERROR; 165 UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status); 166 maybeThrowIcuException(env, "ucnv_open", status); 167 return reinterpret_cast<uintptr_t>(cnv); 168 } 169 170 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) { 171 ucnv_close(toUConverter(address)); 172 } 173 174 static bool shouldCodecThrow(jboolean flush, UErrorCode error) { 175 if (flush) { 176 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND); 177 } else { 178 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND); 179 } 180 } 181 182 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address, 183 jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, 184 jintArray data, jboolean flush) { 185 186 UConverter* cnv = toUConverter(address); 187 if (cnv == NULL) { 188 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 189 return U_ILLEGAL_ARGUMENT_ERROR; 190 } 191 ScopedCharArrayRO uSource(env, source); 192 if (uSource.get() == NULL) { 193 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR); 194 return U_ILLEGAL_ARGUMENT_ERROR; 195 } 196 ScopedByteArrayRW uTarget(env, target); 197 if (uTarget.get() == NULL) { 198 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR); 199 return U_ILLEGAL_ARGUMENT_ERROR; 200 } 201 ScopedIntArrayRW myData(env, data); 202 if (myData.get() == NULL) { 203 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR); 204 return U_ILLEGAL_ARGUMENT_ERROR; 205 } 206 207 // Do the conversion. 208 jint* sourceOffset = &myData[0]; 209 jint* targetOffset = &myData[1]; 210 const jchar* mySource = uSource.get() + *sourceOffset; 211 const UChar* mySourceLimit= uSource.get() + sourceEnd; 212 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset); 213 const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd); 214 UErrorCode errorCode = U_ZERO_ERROR; 215 ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode); 216 *sourceOffset = (mySource - uSource.get()) - *sourceOffset; 217 *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get()); 218 219 // If there was an error, count the problematic characters. 220 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND || 221 errorCode == U_TRUNCATED_CHAR_FOUND) { 222 int8_t invalidUCharCount = 32; 223 UChar invalidUChars[32]; 224 UErrorCode minorErrorCode = U_ZERO_ERROR; 225 ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode); 226 if (U_SUCCESS(minorErrorCode)) { 227 myData[2] = invalidUCharCount; 228 } 229 } 230 231 // Managed code handles some cases; throw all other errors. 232 if (shouldCodecThrow(flush, errorCode)) { 233 maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode); 234 } 235 return errorCode; 236 } 237 238 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address, 239 jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, 240 jintArray data, jboolean flush) { 241 242 UConverter* cnv = toUConverter(address); 243 if (cnv == NULL) { 244 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 245 return U_ILLEGAL_ARGUMENT_ERROR; 246 } 247 ScopedByteArrayRO uSource(env, source); 248 if (uSource.get() == NULL) { 249 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR); 250 return U_ILLEGAL_ARGUMENT_ERROR; 251 } 252 ScopedCharArrayRW uTarget(env, target); 253 if (uTarget.get() == NULL) { 254 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR); 255 return U_ILLEGAL_ARGUMENT_ERROR; 256 } 257 ScopedIntArrayRW myData(env, data); 258 if (myData.get() == NULL) { 259 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR); 260 return U_ILLEGAL_ARGUMENT_ERROR; 261 } 262 263 // Do the conversion. 264 jint* sourceOffset = &myData[0]; 265 jint* targetOffset = &myData[1]; 266 const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset); 267 const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd); 268 UChar* cTarget = uTarget.get() + *targetOffset; 269 const UChar* cTargetLimit = uTarget.get() + targetEnd; 270 UErrorCode errorCode = U_ZERO_ERROR; 271 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode); 272 *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset; 273 *targetOffset = cTarget - uTarget.get() - *targetOffset; 274 275 // If there was an error, count the problematic bytes. 276 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND || 277 errorCode == U_TRUNCATED_CHAR_FOUND) { 278 int8_t invalidByteCount = 32; 279 char invalidBytes[32] = {'\0'}; 280 UErrorCode minorErrorCode = U_ZERO_ERROR; 281 ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode); 282 if (U_SUCCESS(minorErrorCode)) { 283 myData[2] = invalidByteCount; 284 } 285 } 286 287 // Managed code handles some cases; throw all other errors. 288 if (shouldCodecThrow(flush, errorCode)) { 289 maybeThrowIcuException(env, "ucnv_toUnicode", errorCode); 290 } 291 return errorCode; 292 } 293 294 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) { 295 UConverter* cnv = toUConverter(address); 296 if (cnv) { 297 ucnv_resetToUnicode(cnv); 298 } 299 } 300 301 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) { 302 UConverter* cnv = toUConverter(address); 303 if (cnv) { 304 ucnv_resetFromUnicode(cnv); 305 } 306 } 307 308 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) { 309 UConverter* cnv = toUConverter(address); 310 return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1; 311 } 312 313 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) { 314 UConverter* cnv = toUConverter(address); 315 return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1; 316 } 317 318 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) { 319 UConverter* cnv = toUConverter(address); 320 return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1; 321 } 322 323 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) { 324 int32_t num = ucnv_countAvailable(); 325 jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL); 326 if (result == NULL) { 327 return NULL; 328 } 329 for (int i = 0; i < num; ++i) { 330 const char* name = ucnv_getAvailableName(i); 331 ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name)); 332 if (javaCanonicalName.get() == NULL) { 333 return NULL; 334 } 335 env->SetObjectArrayElement(result, i, javaCanonicalName.get()); 336 if (env->ExceptionCheck()) { 337 return NULL; 338 } 339 } 340 return result; 341 } 342 343 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args, 344 const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, 345 UErrorCode* status) { 346 if (!rawContext) { 347 return; 348 } 349 const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 350 switch(reason) { 351 case UCNV_UNASSIGNED: 352 ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status); 353 return; 354 case UCNV_ILLEGAL: 355 case UCNV_IRREGULAR: 356 ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status); 357 return; 358 case UCNV_CLOSE: 359 delete ctx; 360 return; 361 default: 362 *status = U_ILLEGAL_ARGUMENT_ERROR; 363 return; 364 } 365 } 366 367 static void encoderReplaceCallback(const void* rawContext, 368 UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32, 369 UConverterCallbackReason, UErrorCode * err) { 370 if (rawContext == NULL) { 371 return; 372 } 373 const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 374 *err = U_ZERO_ERROR; 375 ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err); 376 } 377 378 static UConverterFromUCallback getFromUCallback(int32_t mode) { 379 switch(mode) { 380 case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP; 381 case NativeConverter_REPLACE: return encoderReplaceCallback; 382 case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP; 383 } 384 abort(); 385 } 386 387 static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address, 388 jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) { 389 UConverter* cnv = toUConverter(address); 390 if (cnv == NULL) { 391 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 392 return; 393 } 394 395 UConverterFromUCallback oldCallback = NULL; 396 const void* oldCallbackContext = NULL; 397 ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext)); 398 399 EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>( 400 reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext)); 401 if (callbackContext == NULL) { 402 callbackContext = new EncoderCallbackContext; 403 } 404 405 callbackContext->onMalformedInput = getFromUCallback(onMalformedInput); 406 callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput); 407 408 ScopedByteArrayRO replacementBytes(env, javaReplacement); 409 if (replacementBytes.get() == NULL) { 410 maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR); 411 return; 412 } 413 memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size()); 414 callbackContext->replacementByteCount = replacementBytes.size(); 415 416 UErrorCode errorCode = U_ZERO_ERROR; 417 ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); 418 maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode); 419 } 420 421 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) { 422 // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is 423 // never true for us. 424 *err = U_ZERO_ERROR; 425 } 426 427 static void decoderReplaceCallback(const void* rawContext, 428 UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason, 429 UErrorCode* err) { 430 if (!rawContext) { 431 return; 432 } 433 const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 434 *err = U_ZERO_ERROR; 435 ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err); 436 } 437 438 static UConverterToUCallback getToUCallback(int32_t mode) { 439 switch (mode) { 440 case NativeConverter_IGNORE: return decoderIgnoreCallback; 441 case NativeConverter_REPLACE: return decoderReplaceCallback; 442 case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP; 443 } 444 abort(); 445 } 446 447 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args, 448 const char* codeUnits, int32_t length, 449 UConverterCallbackReason reason, UErrorCode* status) { 450 if (!rawContext) { 451 return; 452 } 453 const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 454 switch(reason) { 455 case UCNV_UNASSIGNED: 456 ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status); 457 return; 458 case UCNV_ILLEGAL: 459 case UCNV_IRREGULAR: 460 ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status); 461 return; 462 case UCNV_CLOSE: 463 delete ctx; 464 return; 465 default: 466 *status = U_ILLEGAL_ARGUMENT_ERROR; 467 return; 468 } 469 } 470 471 static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address, 472 jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) { 473 UConverter* cnv = toUConverter(address); 474 if (cnv == NULL) { 475 maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR); 476 return; 477 } 478 479 UConverterToUCallback oldCallback; 480 const void* oldCallbackContext; 481 ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext); 482 483 DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>( 484 reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext)); 485 if (callbackContext == NULL) { 486 callbackContext = new DecoderCallbackContext; 487 } 488 489 callbackContext->onMalformedInput = getToUCallback(onMalformedInput); 490 callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput); 491 492 ScopedStringChars replacement(env, javaReplacement); 493 if (replacement.get() == NULL) { 494 maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR); 495 return; 496 } 497 u_strncpy(callbackContext->replacementChars, replacement.get(), replacement.size()); 498 callbackContext->replacementCharCount = replacement.size(); 499 500 UErrorCode errorCode = U_ZERO_ERROR; 501 ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); 502 maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode); 503 } 504 505 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) { 506 return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle)); 507 } 508 509 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) { 510 UConverter* cnv = toUConverter(address); 511 if (cnv == NULL) { 512 return NULL; 513 } 514 UErrorCode status = U_ZERO_ERROR; 515 char replacementBytes[MAX_REPLACEMENT_LENGTH]; 516 int8_t len = sizeof(replacementBytes); 517 ucnv_getSubstChars(cnv, replacementBytes, &len, &status); 518 if (!U_SUCCESS(status)) { 519 return env->NewByteArray(0); 520 } 521 jbyteArray result = env->NewByteArray(len); 522 if (result == NULL) { 523 return NULL; 524 } 525 env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes)); 526 return result; 527 } 528 529 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) { 530 ScopedUtfChars name1Chars(env, name1); 531 if (name1Chars.c_str() == NULL) { 532 return JNI_FALSE; 533 } 534 ScopedUtfChars name2Chars(env, name2); 535 if (name2Chars.c_str() == NULL) { 536 return JNI_FALSE; 537 } 538 539 UErrorCode errorCode = U_ZERO_ERROR; 540 icu::LocalUConverterPointer converter1(ucnv_open(name1Chars.c_str(), &errorCode)); 541 icu::UnicodeSet set1; 542 ucnv_getUnicodeSet(&*converter1, set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 543 544 icu::LocalUConverterPointer converter2(ucnv_open(name2Chars.c_str(), &errorCode)); 545 icu::UnicodeSet set2; 546 ucnv_getUnicodeSet(&*converter2, set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 547 548 return U_SUCCESS(errorCode) && set1.containsAll(set2); 549 } 550 551 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) { 552 ScopedUtfChars charsetNameChars(env, charsetName); 553 if (charsetNameChars.c_str() == NULL) { 554 return NULL; 555 } 556 557 // Get ICU's canonical name for this charset. 558 const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str()); 559 if (icuCanonicalName == NULL) { 560 return NULL; 561 } 562 563 // Get Java's canonical name for this charset. 564 jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName); 565 if (env->ExceptionCheck()) { 566 return NULL; 567 } 568 569 // Check that this charset is supported. 570 { 571 // ICU doesn't offer any "isSupported", so we just open and immediately close. 572 UErrorCode error = U_ZERO_ERROR; 573 icu::LocalUConverterPointer cnv(ucnv_open(icuCanonicalName, &error)); 574 if (!U_SUCCESS(error)) { 575 return NULL; 576 } 577 } 578 579 // Get the aliases for this charset. 580 std::vector<std::string> aliases; 581 if (!collectStandardNames(env, icuCanonicalName, "IANA", aliases)) { 582 return NULL; 583 } 584 if (!collectStandardNames(env, icuCanonicalName, "MIME", aliases)) { 585 return NULL; 586 } 587 if (!collectStandardNames(env, icuCanonicalName, "JAVA", aliases)) { 588 return NULL; 589 } 590 if (!collectStandardNames(env, icuCanonicalName, "WINDOWS", aliases)) { 591 return NULL; 592 } 593 jobjectArray javaAliases = toStringArray(env, aliases); 594 if (env->ExceptionCheck()) { 595 return NULL; 596 } 597 598 // Construct the CharsetICU object. 599 static jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>", 600 "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V"); 601 if (env->ExceptionCheck()) { 602 return NULL; 603 } 604 605 jstring icuCanonicalNameStr = env->NewStringUTF(icuCanonicalName); 606 if (env->ExceptionCheck()) { 607 return NULL; 608 } 609 610 return env->NewObject(JniConstants::charsetICUClass, charsetConstructor, 611 javaCanonicalName, icuCanonicalNameStr, javaAliases); 612 } 613 614 static void FreeNativeConverter(void *converter) { 615 ucnv_close(reinterpret_cast<UConverter*>(converter)); 616 } 617 618 static jlong NativeConverter_getNativeFinalizer(JNIEnv*, jclass) { 619 return reinterpret_cast<jlong>(&FreeNativeConverter); 620 } 621 622 623 static jlong NativeConverter_getNativeSize(JNIEnv*, jclass, jstring) { 624 // TODO: Improve estimate. 625 return 200; 626 } 627 628 static JNINativeMethod gMethods[] = { 629 NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"), 630 NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"), 631 NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"), 632 NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"), 633 NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"), 634 NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"), 635 NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"), 636 NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"), 637 NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"), 638 NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"), 639 NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"), 640 NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"), 641 NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"), 642 NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"), 643 NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"), 644 NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"), 645 NATIVE_METHOD(NativeConverter, getNativeFinalizer, "()J"), 646 NATIVE_METHOD(NativeConverter, getNativeSize, "()J") 647 }; 648 void register_libcore_icu_NativeConverter(JNIEnv* env) { 649 jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods)); 650 } 651