1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2006, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 * 8 ******************************************************************************* 9 */ 10 /* 11 * @(#) icujniinterface.c 1.2 00/10/11 12 * 13 * (C) Copyright IBM Corp. 2000 - All Rights Reserved 14 * A JNI wrapper to ICU native converter Interface 15 * @author: Ram Viswanadha 16 */ 17 18 #define LOG_TAG "NativeConverter" 19 20 #include "ErrorCode.h" 21 #include "JNIHelp.h" 22 #include "JniConstants.h" 23 #include "ScopedLocalRef.h" 24 #include "ScopedPrimitiveArray.h" 25 #include "ScopedUtfChars.h" 26 #include "UniquePtr.h" 27 #include "cutils/log.h" 28 #include "unicode/ucnv.h" 29 #include "unicode/ucnv_cb.h" 30 #include "unicode/uniset.h" 31 #include "unicode/ustring.h" 32 #include "unicode/utypes.h" 33 #include <stdlib.h> 34 #include <string.h> 35 36 #define NativeConverter_REPORT 0 37 #define NativeConverter_IGNORE 1 38 #define NativeConverter_REPLACE 2 39 40 struct DecoderCallbackContext { 41 int length; 42 UChar subUChars[256]; 43 UConverterToUCallback onUnmappableInput; 44 UConverterToUCallback onMalformedInput; 45 }; 46 47 struct EncoderCallbackContext { 48 int length; 49 char subBytes[256]; 50 UConverterFromUCallback onUnmappableInput; 51 UConverterFromUCallback onMalformedInput; 52 }; 53 54 struct UConverterDeleter { 55 void operator()(UConverter* p) const { 56 ucnv_close(p); 57 } 58 }; 59 typedef UniquePtr<UConverter, UConverterDeleter> UniqueUConverter; 60 61 static UConverter* toUConverter(jlong address) { 62 return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address)); 63 } 64 65 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) { 66 ScopedUtfChars converterNameChars(env, converterName); 67 if (converterNameChars.c_str() == NULL) { 68 return 0; 69 } 70 UErrorCode errorCode = U_ZERO_ERROR; 71 UConverter* cnv = ucnv_open(converterNameChars.c_str(), &errorCode); 72 icu4jni_error(env, errorCode); 73 return reinterpret_cast<uintptr_t>(cnv); 74 } 75 76 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) { 77 ucnv_close(toUConverter(address)); 78 } 79 80 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address, 81 jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, 82 jintArray data, jboolean flush) { 83 84 UConverter* cnv = toUConverter(address); 85 if (cnv == NULL) { 86 return U_ILLEGAL_ARGUMENT_ERROR; 87 } 88 ScopedCharArrayRO uSource(env, source); 89 if (uSource.get() == NULL) { 90 return U_ILLEGAL_ARGUMENT_ERROR; 91 } 92 ScopedByteArrayRW uTarget(env, target); 93 if (uTarget.get() == NULL) { 94 return U_ILLEGAL_ARGUMENT_ERROR; 95 } 96 ScopedIntArrayRW myData(env, data); 97 if (myData.get() == NULL) { 98 return U_ILLEGAL_ARGUMENT_ERROR; 99 } 100 101 // Do the conversion. 102 jint* sourceOffset = &myData[0]; 103 jint* targetOffset = &myData[1]; 104 const jchar* mySource = uSource.get() + *sourceOffset; 105 const UChar* mySourceLimit= uSource.get() + sourceEnd; 106 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset); 107 const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd); 108 UErrorCode errorCode = U_ZERO_ERROR; 109 ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode); 110 *sourceOffset = (mySource - uSource.get()) - *sourceOffset; 111 *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get()) - *targetOffset; 112 113 // Check how much more input is necessary to complete what's in the converter's internal buffer. 114 UErrorCode minorErrorCode = U_ZERO_ERROR; 115 int32_t pending = ucnv_fromUCountPending(cnv, &minorErrorCode); 116 if (U_SUCCESS(minorErrorCode)) { 117 myData[3] = pending; 118 } 119 120 // If there was an error, count the problematic characters. 121 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) { 122 int8_t len = 32; 123 UChar invalidUChars[32]; 124 ucnv_getInvalidUChars(cnv, invalidUChars, &len, &minorErrorCode); 125 if (U_SUCCESS(minorErrorCode)) { 126 myData[2] = len; 127 } 128 } 129 return errorCode; 130 } 131 132 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address, 133 jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, 134 jintArray data, jboolean flush) { 135 136 UConverter* cnv = toUConverter(address); 137 if (cnv == NULL) { 138 return U_ILLEGAL_ARGUMENT_ERROR; 139 } 140 ScopedByteArrayRO uSource(env, source); 141 if (uSource.get() == NULL) { 142 return U_ILLEGAL_ARGUMENT_ERROR; 143 } 144 ScopedCharArrayRW uTarget(env, target); 145 if (uTarget.get() == NULL) { 146 return U_ILLEGAL_ARGUMENT_ERROR; 147 } 148 ScopedIntArrayRW myData(env, data); 149 if (myData.get() == NULL) { 150 return U_ILLEGAL_ARGUMENT_ERROR; 151 } 152 153 // Do the conversion. 154 jint* sourceOffset = &myData[0]; 155 jint* targetOffset = &myData[1]; 156 const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset); 157 const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd); 158 UChar* cTarget = uTarget.get() + *targetOffset; 159 const UChar* cTargetLimit = uTarget.get() + targetEnd; 160 UErrorCode errorCode = U_ZERO_ERROR; 161 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode); 162 *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset; 163 *targetOffset = cTarget - uTarget.get() - *targetOffset; 164 165 // Check how much more input is necessary to complete what's in the converter's internal buffer. 166 UErrorCode minorErrorCode = U_ZERO_ERROR; 167 jint pending = ucnv_toUCountPending(cnv, &minorErrorCode); 168 myData[3] = pending; 169 170 // If there was an error, count the problematic bytes. 171 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) { 172 int8_t len = 32; 173 char invalidChars[32] = {'\0'}; 174 ucnv_getInvalidChars(cnv, invalidChars, &len, &minorErrorCode); 175 if (U_SUCCESS(minorErrorCode)) { 176 myData[2] = len; 177 } 178 } 179 180 return errorCode; 181 } 182 183 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) { 184 UConverter* cnv = toUConverter(address); 185 if (cnv) { 186 ucnv_resetToUnicode(cnv); 187 } 188 } 189 190 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) { 191 UConverter* cnv = toUConverter(address); 192 if (cnv) { 193 ucnv_resetFromUnicode(cnv); 194 } 195 } 196 197 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) { 198 UConverter* cnv = toUConverter(address); 199 return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1; 200 } 201 202 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) { 203 UConverter* cnv = toUConverter(address); 204 return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1; 205 } 206 207 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) { 208 UConverter* cnv = toUConverter(address); 209 return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1; 210 } 211 212 static jint NativeConverter_flushByteToChar(JNIEnv* env, jclass, jlong address, 213 jcharArray target, jint targetEnd, jintArray data) { 214 UConverter* cnv = toUConverter(address); 215 if (cnv == NULL) { 216 return U_ILLEGAL_ARGUMENT_ERROR; 217 } 218 ScopedCharArrayRW uTarget(env, target); 219 if (uTarget.get() == NULL) { 220 return U_ILLEGAL_ARGUMENT_ERROR; 221 } 222 ScopedIntArrayRW myData(env, data); 223 if (myData.get() == NULL) { 224 return U_ILLEGAL_ARGUMENT_ERROR; 225 } 226 char source = '\0'; 227 jint* targetOffset = &myData[1]; 228 const char* mySource = &source; 229 const char* mySourceLimit = &source; 230 UChar* cTarget = uTarget.get() + *targetOffset; 231 const UChar* cTargetLimit = uTarget.get() + targetEnd; 232 UErrorCode errorCode = U_ZERO_ERROR; 233 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, TRUE, &errorCode); 234 *targetOffset = cTarget - uTarget.get() - *targetOffset; 235 return errorCode; 236 } 237 238 static jint NativeConverter_flushCharToByte(JNIEnv* env, jclass, jlong address, 239 jbyteArray target, jint targetEnd, jintArray data) { 240 UConverter* cnv = toUConverter(address); 241 if (cnv == NULL) { 242 return U_ILLEGAL_ARGUMENT_ERROR; 243 } 244 ScopedByteArrayRW uTarget(env, target); 245 if (uTarget.get() == NULL) { 246 return U_ILLEGAL_ARGUMENT_ERROR; 247 } 248 ScopedIntArrayRW myData(env, data); 249 if (myData.get() == NULL) { 250 return U_ILLEGAL_ARGUMENT_ERROR; 251 } 252 jchar source = '\0'; 253 jint* targetOffset = &myData[1]; 254 const jchar* mySource = &source; 255 const UChar* mySourceLimit= &source; 256 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset); 257 const char* cTargetLimit = reinterpret_cast<char*>(uTarget.get() + targetEnd); 258 UErrorCode errorCode = U_ZERO_ERROR; 259 ucnv_fromUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, TRUE, &errorCode); 260 *targetOffset = reinterpret_cast<jbyte*>(cTarget) - uTarget.get() - *targetOffset; 261 return errorCode; 262 } 263 264 static jboolean NativeConverter_canEncode(JNIEnv*, jclass, jlong address, jint codeUnit) { 265 UErrorCode errorCode = U_ZERO_ERROR; 266 UConverter* cnv = toUConverter(address); 267 if (cnv == NULL) { 268 return JNI_FALSE; 269 } 270 271 UChar srcBuffer[3]; 272 const UChar* src = &srcBuffer[0]; 273 const UChar* srcLimit = (codeUnit < 0x10000) ? &src[1] : &src[2]; 274 275 char dstBuffer[5]; 276 char* dst = &dstBuffer[0]; 277 const char* dstLimit = &dstBuffer[4]; 278 279 int i = 0; 280 UTF_APPEND_CHAR(&srcBuffer[0], i, 2, codeUnit); 281 282 ucnv_fromUnicode(cnv, &dst, dstLimit, &src, srcLimit, NULL, TRUE, &errorCode); 283 return U_SUCCESS(errorCode); 284 } 285 286 /* 287 * If a charset listed in the IANA Charset Registry is supported by an implementation 288 * of the Java platform then its canonical name must be the name listed in the registry. 289 * Many charsets are given more than one name in the registry, in which case the registry 290 * identifies one of the names as MIME-preferred. If a charset has more than one registry 291 * name then its canonical name must be the MIME-preferred name and the other names in 292 * the registry must be valid aliases. If a supported charset is not listed in the IANA 293 * registry then its canonical name must begin with one of the strings "X-" or "x-". 294 */ 295 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) { 296 UErrorCode status = U_ZERO_ERROR; 297 298 // Check to see if this is a well-known MIME or IANA name. 299 const char* cName = NULL; 300 if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) { 301 return env->NewStringUTF(cName); 302 } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) { 303 return env->NewStringUTF(cName); 304 } 305 306 // Check to see if an alias already exists with "x-" prefix, if yes then 307 // make that the canonical name. 308 int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status); 309 for (int i = 0; i < aliasCount; ++i) { 310 const char* name = ucnv_getAlias(icuCanonicalName, i, &status); 311 if (name != NULL && name[0] == 'x' && name[1] == '-') { 312 return env->NewStringUTF(name); 313 } 314 } 315 316 // As a last resort, prepend "x-" to any alias and make that the canonical name. 317 status = U_ZERO_ERROR; 318 const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status); 319 if (name == NULL && strchr(icuCanonicalName, ',') != NULL) { 320 name = ucnv_getAlias(icuCanonicalName, 1, &status); 321 } 322 // If there is no UTR22 canonical name then just return the original name. 323 if (name == NULL) { 324 name = icuCanonicalName; 325 } 326 UniquePtr<char[]> result(new char[2 + strlen(name) + 1]); 327 strcpy(&result[0], "x-"); 328 strcat(&result[0], name); 329 return env->NewStringUTF(&result[0]); 330 } 331 332 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) { 333 int32_t num = ucnv_countAvailable(); 334 jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL); 335 for (int i = 0; i < num; ++i) { 336 const char* name = ucnv_getAvailableName(i); 337 ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name)); 338 env->SetObjectArrayElement(result, i, javaCanonicalName.get()); 339 } 340 return result; 341 } 342 343 static jobjectArray getAliases(JNIEnv* env, const char* icuCanonicalName) { 344 // Get an upper bound on the number of aliases... 345 const char* myEncName = icuCanonicalName; 346 UErrorCode error = U_ZERO_ERROR; 347 int32_t aliasCount = ucnv_countAliases(myEncName, &error); 348 if (aliasCount == 0 && myEncName[0] == 'x' && myEncName[1] == '-') { 349 myEncName = myEncName + 2; 350 aliasCount = ucnv_countAliases(myEncName, &error); 351 } 352 if (!U_SUCCESS(error)) { 353 return NULL; 354 } 355 356 // Collect the aliases we want... 357 const char* aliasArray[aliasCount]; 358 int actualAliasCount = 0; 359 for(int i = 0; i < aliasCount; ++i) { 360 const char* name = ucnv_getAlias(myEncName, (uint16_t) i, &error); 361 if (!U_SUCCESS(error)) { 362 return NULL; 363 } 364 // TODO: why do we ignore these ones? 365 if (strchr(name, '+') == 0 && strchr(name, ',') == 0) { 366 aliasArray[actualAliasCount++]= name; 367 } 368 } 369 370 // Convert our C++ char*[] into a Java String[]... 371 jobjectArray result = env->NewObjectArray(actualAliasCount, JniConstants::stringClass, NULL); 372 for (int i = 0; i < actualAliasCount; ++i) { 373 ScopedLocalRef<jstring> alias(env, env->NewStringUTF(aliasArray[i])); 374 env->SetObjectArrayElement(result, i, alias.get()); 375 } 376 return result; 377 } 378 379 static const char* getICUCanonicalName(const char* name) { 380 UErrorCode error = U_ZERO_ERROR; 381 const char* canonicalName = NULL; 382 if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) { 383 return canonicalName; 384 } else if((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) { 385 return canonicalName; 386 } else if((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) { 387 return canonicalName; 388 } else if((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) { 389 /* we have some aliases in the form x-blah .. match those first */ 390 return canonicalName; 391 } else if (strstr(name, "x-") == name) { 392 /* check if the converter can be opened with the name given */ 393 error = U_ZERO_ERROR; 394 UniqueUConverter cnv(ucnv_open(name + 2, &error)); 395 if (cnv.get() != NULL) { 396 return name + 2; 397 } 398 } 399 return NULL; 400 } 401 402 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args, 403 const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, 404 UErrorCode* status) { 405 if (!rawContext) { 406 return; 407 } 408 const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 409 switch(reason) { 410 case UCNV_UNASSIGNED: 411 ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status); 412 return; 413 case UCNV_ILLEGAL: 414 case UCNV_IRREGULAR: 415 ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status); 416 return; 417 case UCNV_CLOSE: 418 delete ctx; 419 return; 420 default: 421 *status = U_ILLEGAL_ARGUMENT_ERROR; 422 return; 423 } 424 } 425 426 static void encoderReplaceCallback(const void* rawContext, 427 UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32, 428 UConverterCallbackReason, UErrorCode * err) { 429 if (rawContext == NULL) { 430 return; 431 } 432 const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 433 *err = U_ZERO_ERROR; 434 ucnv_cbFromUWriteBytes(fromArgs, context->subBytes, context->length, 0, err); 435 } 436 437 static UConverterFromUCallback getFromUCallback(int32_t mode) { 438 switch(mode) { 439 case NativeConverter_REPORT: 440 return UCNV_FROM_U_CALLBACK_STOP; 441 case NativeConverter_IGNORE: 442 return UCNV_FROM_U_CALLBACK_SKIP; 443 case NativeConverter_REPLACE: 444 return encoderReplaceCallback; 445 } 446 abort(); 447 } 448 449 static jint NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address, 450 jint onMalformedInput, jint onUnmappableInput, jbyteArray subBytes) { 451 UConverter* cnv = toUConverter(address); 452 if (!cnv) { 453 return U_ILLEGAL_ARGUMENT_ERROR; 454 } 455 UConverterFromUCallback fromUOldAction = NULL; 456 const void* fromUOldContext = NULL; 457 ucnv_getFromUCallBack(cnv, &fromUOldAction, const_cast<const void**>(&fromUOldContext)); 458 459 /* fromUOldContext can only be DecodeCallbackContext since 460 * the converter created is private data for the decoder 461 * and callbacks can only be set via this method! 462 */ 463 EncoderCallbackContext* fromUNewContext=NULL; 464 UConverterFromUCallback fromUNewAction=NULL; 465 if (fromUOldContext == NULL) { 466 fromUNewContext = new EncoderCallbackContext; 467 fromUNewAction = CHARSET_ENCODER_CALLBACK; 468 } else { 469 fromUNewContext = const_cast<EncoderCallbackContext*>( 470 reinterpret_cast<const EncoderCallbackContext*>(fromUOldContext)); 471 fromUNewAction = fromUOldAction; 472 fromUOldAction = NULL; 473 fromUOldContext = NULL; 474 } 475 fromUNewContext->onMalformedInput = getFromUCallback(onMalformedInput); 476 fromUNewContext->onUnmappableInput = getFromUCallback(onUnmappableInput); 477 ScopedByteArrayRO sub(env, subBytes); 478 if (sub.get() == NULL) { 479 return U_ILLEGAL_ARGUMENT_ERROR; 480 } 481 fromUNewContext->length = sub.size(); 482 memcpy(fromUNewContext->subBytes, sub.get(), sub.size()); 483 UErrorCode errorCode = U_ZERO_ERROR; 484 ucnv_setFromUCallBack(cnv, fromUNewAction, fromUNewContext, &fromUOldAction, &fromUOldContext, 485 &errorCode); 486 return errorCode; 487 } 488 489 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) { 490 // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is 491 // never true for us. 492 *err = U_ZERO_ERROR; 493 } 494 495 static void decoderReplaceCallback(const void* rawContext, 496 UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason, 497 UErrorCode* err) { 498 if (!rawContext) { 499 return; 500 } 501 const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 502 *err = U_ZERO_ERROR; 503 ucnv_cbToUWriteUChars(toArgs,context->subUChars, context->length, 0, err); 504 } 505 506 static UConverterToUCallback getToUCallback(int32_t mode) { 507 switch (mode) { 508 case NativeConverter_IGNORE: return decoderIgnoreCallback; 509 case NativeConverter_REPLACE: return decoderReplaceCallback; 510 case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP; 511 } 512 abort(); 513 } 514 515 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args, 516 const char* codeUnits, int32_t length, 517 UConverterCallbackReason reason, UErrorCode* status) { 518 if (!rawContext) { 519 return; 520 } 521 const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 522 switch(reason) { 523 case UCNV_UNASSIGNED: 524 ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status); 525 return; 526 case UCNV_ILLEGAL: 527 case UCNV_IRREGULAR: 528 ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status); 529 return; 530 case UCNV_CLOSE: 531 delete ctx; 532 return; 533 default: 534 *status = U_ILLEGAL_ARGUMENT_ERROR; 535 return; 536 } 537 } 538 539 static jint NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address, 540 jint onMalformedInput, jint onUnmappableInput, jcharArray subChars) { 541 UConverter* cnv = toUConverter(address); 542 if (cnv == NULL) { 543 return U_ILLEGAL_ARGUMENT_ERROR; 544 } 545 546 UConverterToUCallback toUOldAction; 547 const void* toUOldContext; 548 ucnv_getToUCallBack(cnv, &toUOldAction, &toUOldContext); 549 550 /* toUOldContext can only be DecodeCallbackContext since 551 * the converter created is private data for the decoder 552 * and callbacks can only be set via this method! 553 */ 554 DecoderCallbackContext* toUNewContext = NULL; 555 UConverterToUCallback toUNewAction = NULL; 556 if (toUOldContext == NULL) { 557 toUNewContext = new DecoderCallbackContext; 558 toUNewAction = CHARSET_DECODER_CALLBACK; 559 } else { 560 toUNewContext = const_cast<DecoderCallbackContext*>( 561 reinterpret_cast<const DecoderCallbackContext*>(toUOldContext)); 562 toUNewAction = toUOldAction; 563 toUOldAction = NULL; 564 toUOldContext = NULL; 565 } 566 toUNewContext->onMalformedInput = getToUCallback(onMalformedInput); 567 toUNewContext->onUnmappableInput = getToUCallback(onUnmappableInput); 568 ScopedCharArrayRO sub(env, subChars); 569 if (sub.get() == NULL) { 570 return U_ILLEGAL_ARGUMENT_ERROR; 571 } 572 toUNewContext->length = sub.size(); 573 u_strncpy(toUNewContext->subUChars, sub.get(), sub.size()); 574 UErrorCode errorCode = U_ZERO_ERROR; 575 ucnv_setToUCallBack(cnv, toUNewAction, toUNewContext, &toUOldAction, &toUOldContext, 576 &errorCode); 577 return errorCode; 578 } 579 580 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) { 581 return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle)); 582 } 583 584 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) { 585 UConverter* cnv = toUConverter(address); 586 if (cnv == NULL) { 587 return NULL; 588 } 589 UErrorCode status = U_ZERO_ERROR; 590 char subBytes[10]; 591 int8_t len = sizeof(subBytes); 592 ucnv_getSubstChars(cnv, subBytes, &len, &status); 593 if (!U_SUCCESS(status)) { 594 return env->NewByteArray(0); 595 } 596 jbyteArray result = env->NewByteArray(len); 597 if (result == NULL) { 598 return NULL; 599 } 600 env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(subBytes)); 601 return result; 602 } 603 604 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) { 605 ScopedUtfChars name1Chars(env, name1); 606 if (name1Chars.c_str() == NULL) { 607 return JNI_FALSE; 608 } 609 ScopedUtfChars name2Chars(env, name2); 610 if (name2Chars.c_str() == NULL) { 611 return JNI_FALSE; 612 } 613 614 UErrorCode errorCode = U_ZERO_ERROR; 615 UniqueUConverter converter1(ucnv_open(name1Chars.c_str(), &errorCode)); 616 UnicodeSet set1; 617 ucnv_getUnicodeSet(converter1.get(), set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 618 619 UniqueUConverter converter2(ucnv_open(name2Chars.c_str(), &errorCode)); 620 UnicodeSet set2; 621 ucnv_getUnicodeSet(converter2.get(), set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 622 623 return U_SUCCESS(errorCode) && set1.containsAll(set2); 624 } 625 626 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) { 627 ScopedUtfChars charsetNameChars(env, charsetName); 628 if (charsetNameChars.c_str() == NULL) { 629 return NULL; 630 } 631 // Get ICU's canonical name for this charset. 632 const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str()); 633 if (icuCanonicalName == NULL) { 634 return NULL; 635 } 636 // Get Java's canonical name for this charset. 637 jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName); 638 if (env->ExceptionOccurred()) { 639 return NULL; 640 } 641 642 // Check that this charset is supported. 643 // ICU doesn't offer any "isSupported", so we just open and immediately close. 644 // We ignore the UErrorCode because ucnv_open returning NULL is all the information we need. 645 UErrorCode dummy = U_ZERO_ERROR; 646 UniqueUConverter cnv(ucnv_open(icuCanonicalName, &dummy)); 647 if (cnv.get() == NULL) { 648 return NULL; 649 } 650 cnv.reset(); 651 652 // Get the aliases for this charset. 653 jobjectArray aliases = getAliases(env, icuCanonicalName); 654 if (env->ExceptionOccurred()) { 655 return NULL; 656 } 657 658 // Construct the CharsetICU object. 659 jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>", 660 "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V"); 661 if (env->ExceptionOccurred()) { 662 return NULL; 663 } 664 return env->NewObject(JniConstants::charsetICUClass, charsetConstructor, 665 javaCanonicalName, env->NewStringUTF(icuCanonicalName), aliases); 666 } 667 668 static JNINativeMethod gMethods[] = { 669 NATIVE_METHOD(NativeConverter, canEncode, "(JI)Z"), 670 NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"), 671 NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"), 672 NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"), 673 NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"), 674 NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"), 675 NATIVE_METHOD(NativeConverter, flushByteToChar, "(J[CI[I)I"), 676 NATIVE_METHOD(NativeConverter, flushCharToByte, "(J[BI[I)I"), 677 NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"), 678 NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"), 679 NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"), 680 NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"), 681 NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"), 682 NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"), 683 NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"), 684 NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"), 685 NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"), 686 NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JII[C)I"), 687 NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)I"), 688 }; 689 int register_com_ibm_icu4jni_converters_NativeConverter(JNIEnv* env) { 690 return jniRegisterNativeMethods(env, "com/ibm/icu4jni/charset/NativeConverter", 691 gMethods, NELEM(gMethods)); 692 } 693