1 /** 2 ******************************************************************************* 3 * Copyright (C) 2006-2013, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.charset; 11 12 import java.nio.BufferOverflowException; 13 import java.nio.ByteBuffer; 14 import java.nio.CharBuffer; 15 import java.nio.IntBuffer; 16 import java.nio.charset.CharsetEncoder; 17 import java.nio.charset.CoderResult; 18 import java.nio.charset.CodingErrorAction; 19 20 import com.ibm.icu.impl.Assert; 21 import com.ibm.icu.lang.UCharacter; 22 import com.ibm.icu.text.UTF16; 23 24 /** 25 * An abstract class that provides framework methods of decoding operations for concrete 26 * subclasses. 27 * In the future this class will contain API that will implement converter semantics of ICU4C. 28 * @stable ICU 3.6 29 */ 30 public abstract class CharsetEncoderICU extends CharsetEncoder { 31 32 /* this is used in fromUnicode DBCS tables as an "unassigned" marker */ 33 static final char MISSING_CHAR_MARKER = '\uFFFF'; 34 35 byte[] errorBuffer = new byte[30]; 36 37 int errorBufferLength = 0; 38 39 /** these are for encodeLoopICU */ 40 int fromUnicodeStatus; 41 42 int fromUChar32; 43 44 boolean useSubChar1; 45 46 boolean useFallback; 47 48 /* maximum number of indexed UChars */ 49 static final int EXT_MAX_UCHARS = 19; 50 51 /* store previous UChars/chars to continue partial matches */ 52 int preFromUFirstCP; /* >=0: partial match */ 53 54 char[] preFromUArray = new char[EXT_MAX_UCHARS]; 55 56 int preFromUBegin; 57 58 int preFromULength; /* negative: replay */ 59 60 char[] invalidUCharBuffer = new char[2]; 61 62 int invalidUCharLength; 63 64 Object fromUContext; 65 66 private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP; 67 68 private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP; 69 70 CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() { 71 public CoderResult call(CharsetEncoderICU encoder, Object context, 72 CharBuffer source, ByteBuffer target, IntBuffer offsets, 73 char[] buffer, int length, int cp, CoderResult cr) { 74 if (cr.isUnmappable()) { 75 return onUnmappableInput.call(encoder, context, source, target, 76 offsets, buffer, length, cp, cr); 77 } else /* if (cr.isMalformed()) */ { 78 return onMalformedInput.call(encoder, context, source, target, 79 offsets, buffer, length, cp, cr); 80 } 81 // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr); 82 83 } 84 }; 85 86 /* 87 * Construcs a new encoder for the given charset 88 * 89 * @param cs 90 * for which the decoder is created 91 * @param replacement 92 * the substitution bytes 93 */ 94 CharsetEncoderICU(CharsetICU cs, byte[] replacement) { 95 super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2, 96 cs.maxBytesPerChar, replacement); 97 } 98 99 /** 100 * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping 101 * that will convert a Unicode codepoint sequence to a byte sequence, but 102 * the encoded byte sequence will round trip convert to a different 103 * Unicode codepoint sequence. 104 * @return true if the converter uses fallback, false otherwise. 105 * @stable ICU 3.8 106 */ 107 public boolean isFallbackUsed() { 108 return useFallback; 109 } 110 111 /** 112 * Sets whether this Encoder can use fallbacks? 113 * @param usesFallback true if the user wants the converter to take 114 * advantage of the fallback mapping, false otherwise. 115 * @stable ICU 3.8 116 */ 117 public void setFallbackUsed(boolean usesFallback) { 118 useFallback = usesFallback; 119 } 120 121 /* 122 * Use fallbacks from Unicode to codepage when useFallback or for private-use code points 123 * @param c A codepoint 124 */ 125 final boolean isFromUUseFallback(int c) { 126 return (useFallback) || isUnicodePrivateUse(c); 127 } 128 129 /** 130 * Use fallbacks from Unicode to codepage when useFallback or for private-use code points 131 */ 132 static final boolean isFromUUseFallback(boolean iUseFallback, int c) { 133 return (iUseFallback) || isUnicodePrivateUse(c); 134 } 135 136 private static final boolean isUnicodePrivateUse(int c) { 137 // First test for U+E000 to optimize for the most common characters. 138 return c >= 0xE000 && (c <= 0xF8FF || 139 c >= 0xF0000 && (c <= 0xFFFFD || 140 (c >= 0x100000 && c <= 0x10FFFD))); 141 } 142 143 /** 144 * Sets the action to be taken if an illegal sequence is encountered 145 * 146 * @param newAction 147 * action to be taken 148 * @exception IllegalArgumentException 149 * @stable ICU 3.6 150 */ 151 protected void implOnMalformedInput(CodingErrorAction newAction) { 152 onMalformedInput = getCallback(newAction); 153 } 154 155 /** 156 * Sets the action to be taken if an illegal sequence is encountered 157 * 158 * @param newAction 159 * action to be taken 160 * @exception IllegalArgumentException 161 * @stable ICU 3.6 162 */ 163 protected void implOnUnmappableCharacter(CodingErrorAction newAction) { 164 onUnmappableInput = getCallback(newAction); 165 } 166 167 /** 168 * Sets the callback encoder method and context to be used if an illegal sequence is encountered. 169 * You would normally call this twice to set both the malform and unmappable error. In this case, 170 * newContext should remain the same since using a different newContext each time will negate the last 171 * one used. 172 * @param err CoderResult 173 * @param newCallback CharsetCallback.Encoder 174 * @param newContext Object 175 * @stable ICU 4.0 176 */ 177 public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) { 178 if (err.isMalformed()) { 179 onMalformedInput = newCallback; 180 } else if (err.isUnmappable()) { 181 onUnmappableInput = newCallback; 182 } else { 183 /* Error: Only malformed and unmappable are handled. */ 184 } 185 186 if (fromUContext == null || !fromUContext.equals(newContext)) { 187 setFromUContext(newContext); 188 } 189 } 190 191 /** 192 * Sets fromUContext used in callbacks. 193 * 194 * @param newContext Object 195 * @exception IllegalArgumentException The object is an illegal argument for UContext. 196 * @stable ICU 4.0 197 */ 198 public final void setFromUContext(Object newContext) { 199 fromUContext = newContext; 200 } 201 202 private static CharsetCallback.Encoder getCallback(CodingErrorAction action) { 203 if (action == CodingErrorAction.REPLACE) { 204 return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE; 205 } else if (action == CodingErrorAction.IGNORE) { 206 return CharsetCallback.FROM_U_CALLBACK_SKIP; 207 } else /* if (action == CodingErrorAction.REPORT) */ { 208 return CharsetCallback.FROM_U_CALLBACK_STOP; 209 } 210 } 211 212 private static final CharBuffer EMPTY = CharBuffer.allocate(0); 213 214 /** 215 * Flushes any characters saved in the converter's internal buffer and 216 * resets the converter. 217 * @param out action to be taken 218 * @return result of flushing action and completes the decoding all input. 219 * Returns CoderResult.UNDERFLOW if the action succeeds. 220 * @stable ICU 3.6 221 */ 222 protected CoderResult implFlush(ByteBuffer out) { 223 return encode(EMPTY, out, null, true); 224 } 225 226 /** 227 * Resets the from Unicode mode of converter 228 * @stable ICU 3.6 229 */ 230 protected void implReset() { 231 errorBufferLength = 0; 232 fromUnicodeStatus = 0; 233 fromUChar32 = 0; 234 fromUnicodeReset(); 235 } 236 237 private void fromUnicodeReset() { 238 preFromUBegin = 0; 239 preFromUFirstCP = UConverterConstants.U_SENTINEL; 240 preFromULength = 0; 241 } 242 243 /** 244 * Encodes one or more chars. The default behaviour of the 245 * converter is stop and report if an error in input stream is encountered. 246 * To set different behaviour use @see CharsetEncoder.onMalformedInput() 247 * @param in buffer to decode 248 * @param out buffer to populate with decoded result 249 * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding 250 * action succeeds or more input is needed for completing the decoding action. 251 * @stable ICU 3.6 252 */ 253 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 254 if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty 255 // The Java framework should have already substituted what was left. 256 fromUChar32 = 0; 257 //fromUnicodeReset(); 258 return CoderResult.UNDERFLOW; 259 } 260 in.position(in.position() + fromUCountPending()); 261 /* do the conversion */ 262 CoderResult ret = encode(in, out, null, false); 263 setSourcePosition(in); 264 /* No need to reset to keep the proper state of the encoder. 265 if (ret.isUnderflow() && in.hasRemaining()) { 266 // The Java framework is going to substitute what is left. 267 //fromUnicodeReset(); 268 } */ 269 return ret; 270 } 271 272 /* 273 * Implements ICU semantics of buffer management 274 * @param source 275 * @param target 276 * @param offsets 277 * @return A CoderResult object that contains the error result when an error occurs. 278 */ 279 abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target, 280 IntBuffer offsets, boolean flush); 281 282 /* 283 * Implements ICU semantics for encoding the buffer 284 * @param source The input character buffer 285 * @param target The output byte buffer 286 * @param offsets 287 * @param flush true if, and only if, the invoker can provide no 288 * additional input bytes beyond those in the given buffer. 289 * @return A CoderResult object that contains the error result when an error occurs. 290 */ 291 final CoderResult encode(CharBuffer source, ByteBuffer target, 292 IntBuffer offsets, boolean flush) { 293 294 /* check parameters */ 295 if (target == null || source == null) { 296 throw new IllegalArgumentException(); 297 } 298 299 /* 300 * Make sure that the buffer sizes do not exceed the number range for 301 * int32_t because some functions use the size (in units or bytes) 302 * rather than comparing pointers, and because offsets are int32_t values. 303 * 304 * size_t is guaranteed to be unsigned and large enough for the job. 305 * 306 * Return with an error instead of adjusting the limits because we would 307 * not be able to maintain the semantics that either the source must be 308 * consumed or the target filled (unless an error occurs). 309 * An adjustment would be targetLimit=t+0x7fffffff; for example. 310 */ 311 312 /* flush the target overflow buffer */ 313 if (errorBufferLength > 0) { 314 byte[] overflowArray; 315 int i, length; 316 317 overflowArray = errorBuffer; 318 length = errorBufferLength; 319 i = 0; 320 do { 321 if (target.remaining() == 0) { 322 /* the overflow buffer contains too much, keep the rest */ 323 int j = 0; 324 325 do { 326 overflowArray[j++] = overflowArray[i++]; 327 } while (i < length); 328 329 errorBufferLength = (byte) j; 330 return CoderResult.OVERFLOW; 331 } 332 333 /* copy the overflow contents to the target */ 334 target.put(overflowArray[i++]); 335 if (offsets != null) { 336 offsets.put(-1); /* no source index available for old output */ 337 } 338 } while (i < length); 339 340 /* the overflow buffer is completely copied to the target */ 341 errorBufferLength = 0; 342 } 343 344 if (!flush && source.remaining() == 0 && preFromULength >= 0) { 345 /* the overflow buffer is emptied and there is no new input: we are done */ 346 return CoderResult.UNDERFLOW; 347 } 348 349 /* 350 * Do not simply return with a buffer overflow error if 351 * !flush && t==targetLimit 352 * because it is possible that the source will not generate any output. 353 * For example, the skip callback may be called; 354 * it does not output anything. 355 */ 356 357 return fromUnicodeWithCallback(source, target, offsets, flush); 358 359 } 360 361 /* 362 * Implementation note for m:n conversions 363 * 364 * While collecting source units to find the longest match for m:n conversion, 365 * some source units may need to be stored for a partial match. 366 * When a second buffer does not yield a match on all of the previously stored 367 * source units, then they must be "replayed", i.e., fed back into the converter. 368 * 369 * The code relies on the fact that replaying will not nest - 370 * converting a replay buffer will not result in a replay. 371 * This is because a replay is necessary only after the _continuation_ of a 372 * partial match failed, but a replay buffer is converted as a whole. 373 * It may result in some of its units being stored again for a partial match, 374 * but there will not be a continuation _during_ the replay which could fail. 375 * 376 * It is conceivable that a callback function could call the converter 377 * recursively in a way that causes another replay to be stored, but that 378 * would be an error in the callback function. 379 * Such violations will cause assertion failures in a debug build, 380 * and wrong output, but they will not cause a crash. 381 */ 382 final CoderResult fromUnicodeWithCallback(CharBuffer source, 383 ByteBuffer target, IntBuffer offsets, boolean flush) { 384 int sBufferIndex; 385 int sourceIndex; 386 int errorInputLength; 387 boolean converterSawEndOfInput, calledCallback; 388 389 /* variables for m:n conversion */ 390 CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS); 391 int replayArrayIndex = 0; 392 CharBuffer realSource; 393 boolean realFlush; 394 395 CoderResult cr = CoderResult.UNDERFLOW; 396 397 /* get the converter implementation function */ 398 sourceIndex = 0; 399 400 if (preFromULength >= 0) { 401 /* normal mode */ 402 realSource = null; 403 realFlush = false; 404 } else { 405 /* 406 * Previous m:n conversion stored source units from a partial match 407 * and failed to consume all of them. 408 * We need to "replay" them from a temporary buffer and convert them first. 409 */ 410 realSource = source; 411 realFlush = flush; 412 413 //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR); 414 replayArray.put(preFromUArray, 0, -preFromULength); 415 source = replayArray; 416 source.position(replayArrayIndex); 417 source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration 418 flush = false; 419 420 preFromULength = 0; 421 } 422 423 /* 424 * loop for conversion and error handling 425 * 426 * loop { 427 * convert 428 * loop { 429 * update offsets 430 * handle end of input 431 * handle errors/call callback 432 * } 433 * } 434 */ 435 for (;;) { 436 /* convert */ 437 cr = encodeLoop(source, target, offsets, flush); 438 /* 439 * set a flag for whether the converter 440 * successfully processed the end of the input 441 * 442 * need not check cnv.preFromULength==0 because a replay (<0) will cause 443 * s<sourceLimit before converterSawEndOfInput is checked 444 */ 445 converterSawEndOfInput = (cr.isUnderflow() && flush 446 && source.remaining() == 0 && fromUChar32 == 0); 447 448 /* no callback called yet for this iteration */ 449 calledCallback = false; 450 451 /* no sourceIndex adjustment for conversion, only for callback output */ 452 errorInputLength = 0; 453 454 /* 455 * loop for offsets and error handling 456 * 457 * iterates at most 3 times: 458 * 1. to clean up after the conversion function 459 * 2. after the callback 460 * 3. after the callback again if there was truncated input 461 */ 462 for (;;) { 463 /* update offsets if we write any */ 464 /* Currently offsets are not being used in ICU4J */ 465 /* if (offsets != null) { 466 int length = target.remaining(); 467 if (length > 0) { 468 469 /* 470 * if a converter handles offsets and updates the offsets 471 * pointer at the end, then offset should not change 472 * here; 473 * however, some converters do not handle offsets at all 474 * (sourceIndex<0) or may not update the offsets pointer 475 */ 476 /* offsets.position(offsets.position() + length); 477 } 478 479 if (sourceIndex >= 0) { 480 sourceIndex += (int) (source.position()); 481 } 482 } */ 483 484 if (preFromULength < 0) { 485 /* 486 * switch the source to new replay units (cannot occur while replaying) 487 * after offset handling and before end-of-input and callback handling 488 */ 489 if (realSource == null) { 490 realSource = source; 491 realFlush = flush; 492 493 //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR); 494 replayArray.put(preFromUArray, 0, -preFromULength); 495 496 source = replayArray; 497 source.position(replayArrayIndex); 498 source.limit(replayArrayIndex - preFromULength); 499 flush = false; 500 if ((sourceIndex += preFromULength) < 0) { 501 sourceIndex = -1; 502 } 503 504 preFromULength = 0; 505 } else { 506 /* see implementation note before _fromUnicodeWithCallback() */ 507 //agljport:todo U_ASSERT(realSource==NULL); 508 Assert.assrt(realSource == null); 509 } 510 } 511 512 /* update pointers */ 513 sBufferIndex = source.position(); 514 if (cr.isUnderflow()) { 515 if (sBufferIndex < source.limit()) { 516 /* 517 * continue with the conversion loop while there is still input left 518 * (continue converting by breaking out of only the inner loop) 519 */ 520 break; 521 } else if (realSource != null) { 522 /* switch back from replaying to the real source and continue */ 523 source = realSource; 524 flush = realFlush; 525 sourceIndex = source.position(); 526 realSource = null; 527 break; 528 } else if (flush && fromUChar32 != 0) { 529 /* 530 * the entire input stream is consumed 531 * and there is a partial, truncated input sequence left 532 */ 533 534 /* inject an error and continue with callback handling */ 535 //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND; 536 cr = CoderResult.malformedForLength(1); 537 calledCallback = false; /* new error condition */ 538 } else { 539 /* input consumed */ 540 if (flush) { 541 /* 542 * return to the conversion loop once more if the flush 543 * flag is set and the conversion function has not 544 * successfully processed the end of the input yet 545 * 546 * (continue converting by breaking out of only the inner loop) 547 */ 548 if (!converterSawEndOfInput) { 549 break; 550 } 551 552 /* reset the converter without calling the callback function */ 553 implReset(); 554 } 555 556 /* done successfully */ 557 return cr; 558 } 559 } 560 561 /*U_FAILURE(*err) */ 562 { 563 564 if (calledCallback || cr.isOverflow() 565 || (!cr.isMalformed() && !cr.isUnmappable())) { 566 /* 567 * the callback did not or cannot resolve the error: 568 * set output pointers and return 569 * 570 * the check for buffer overflow is redundant but it is 571 * a high-runner case and hopefully documents the intent 572 * well 573 * 574 * if we were replaying, then the replay buffer must be 575 * copied back into the UConverter 576 * and the real arguments must be restored 577 */ 578 if (realSource != null) { 579 int length; 580 581 //agljport:todo U_ASSERT(cnv.preFromULength==0); 582 583 length = source.remaining(); 584 if (length > 0) { 585 //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR); 586 source.get(preFromUArray, 0, length); 587 preFromULength = (byte) -length; 588 } 589 } 590 return cr; 591 } 592 } 593 594 /* callback handling */ 595 { 596 int codePoint; 597 598 /* get and write the code point */ 599 codePoint = fromUChar32; 600 errorInputLength = UTF16.append(invalidUCharBuffer, 0, 601 fromUChar32); 602 invalidUCharLength = errorInputLength; 603 604 /* set the converter state to deal with the next character */ 605 fromUChar32 = 0; 606 607 /* call the callback function */ 608 cr = fromCharErrorBehaviour.call(this, fromUContext, 609 source, target, offsets, invalidUCharBuffer, 610 invalidUCharLength, codePoint, cr); 611 } 612 613 /* 614 * loop back to the offset handling 615 * 616 * this flag will indicate after offset handling 617 * that a callback was called; 618 * if the callback did not resolve the error, then we return 619 */ 620 calledCallback = true; 621 } 622 } 623 } 624 625 /* 626 * Ascertains if a given Unicode code point (32bit value for handling surrogates) 627 * can be converted to the target encoding. If the caller wants to test if a 628 * surrogate pair can be converted to target encoding then the 629 * responsibility of assembling the int value lies with the caller. 630 * For assembling a code point the caller can use UTF16 class of ICU4J and do something like: 631 * <pre> 632 * while(i<mySource.length){ 633 * if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){ 634 * if(UTF16.isTrailSurrogate(mySource[i+1])){ 635 * int temp = UTF16.charAt(mySource,i,i+1,0); 636 * if(!((CharsetEncoderICU) myConv).canEncode(temp)){ 637 * passed=false; 638 * } 639 * i++; 640 * i++; 641 * } 642 * } 643 * } 644 * </pre> 645 * or 646 * <pre> 647 * String src = new String(mySource); 648 * int i,codepoint; 649 * boolean passed = false; 650 * while(i<src.length()){ 651 * codepoint = UTF16.charAt(src,i); 652 * i+= (codepoint>0xfff)? 2:1; 653 * if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){ 654 * passed = false; 655 * } 656 * } 657 * </pre> 658 * 659 * @param codepoint Unicode code point as int value 660 * @return true if a character can be converted 661 */ 662 /* TODO This is different from Java's canEncode(char) API. 663 * ICU's API should implement getUnicodeSet, 664 * and override canEncode(char) which queries getUnicodeSet. 665 * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C. 666 */ 667 /*public boolean canEncode(int codepoint) { 668 return true; 669 }*/ 670 /** 671 * Overrides super class method 672 * @stable ICU 3.6 673 */ 674 public boolean isLegalReplacement(byte[] repl) { 675 return true; 676 } 677 678 /* 679 * Writes out the specified output bytes to the target byte buffer or to converter internal buffers. 680 * @param cnv 681 * @param bytesArray 682 * @param bytesBegin 683 * @param bytesLength 684 * @param out 685 * @param offsets 686 * @param sourceIndex 687 * @return A CoderResult object that contains the error result when an error occurs. 688 */ 689 static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv, 690 byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out, 691 IntBuffer offsets, int sourceIndex) { 692 693 //write bytes 694 int obl = bytesLength; 695 CoderResult cr = CoderResult.UNDERFLOW; 696 int bytesLimit = bytesBegin + bytesLength; 697 try { 698 for (; bytesBegin < bytesLimit;) { 699 out.put(bytesArray[bytesBegin]); 700 bytesBegin++; 701 } 702 // success 703 bytesLength = 0; 704 } catch (BufferOverflowException ex) { 705 cr = CoderResult.OVERFLOW; 706 } 707 708 if (offsets != null) { 709 while (obl > bytesLength) { 710 offsets.put(sourceIndex); 711 --obl; 712 } 713 } 714 //write overflow 715 cnv.errorBufferLength = bytesLimit - bytesBegin; 716 if (cnv.errorBufferLength > 0) { 717 int index = 0; 718 while (bytesBegin < bytesLimit) { 719 cnv.errorBuffer[index++] = bytesArray[bytesBegin++]; 720 } 721 cr = CoderResult.OVERFLOW; 722 } 723 return cr; 724 } 725 726 /* 727 * Returns the number of chars held in the converter's internal state 728 * because more input is needed for completing the conversion. This function is 729 * useful for mapping semantics of ICU's converter interface to those of iconv, 730 * and this information is not needed for normal conversion. 731 * @return The number of chars in the state. -1 if an error is encountered. 732 */ 733 /*public*/int fromUCountPending() { 734 if (preFromULength > 0) { 735 return UTF16.getCharCount(preFromUFirstCP) + preFromULength; 736 } else if (preFromULength < 0) { 737 return -preFromULength; 738 } else if (fromUChar32 > 0) { 739 return 1; 740 } else if (preFromUFirstCP > 0) { 741 return UTF16.getCharCount(preFromUFirstCP); 742 } 743 return 0; 744 } 745 746 /** 747 * 748 * @param source 749 */ 750 private final void setSourcePosition(CharBuffer source) { 751 752 // ok was there input held in the previous invocation of encodeLoop 753 // that resulted in output in this invocation? 754 source.position(source.position() - fromUCountPending()); 755 } 756 757 /* 758 * Write the codepage substitution character. 759 * Subclasses to override this method. 760 * For stateful converters, it is typically necessary to handle this 761 * specificially for the converter in order to properly maintain the state. 762 * @param source The input character buffer 763 * @param target The output byte buffer 764 * @param offsets 765 * @return A CoderResult object that contains the error result when an error occurs. 766 */ 767 CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, 768 ByteBuffer target, IntBuffer offsets) { 769 CharsetICU cs = (CharsetICU) encoder.charset(); 770 byte[] sub = encoder.replacement(); 771 if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) { 772 return CharsetEncoderICU.fromUWriteBytes(encoder, 773 new byte[] { cs.subChar1 }, 0, 1, target, offsets, source 774 .position()); 775 } else { 776 return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0, 777 sub.length, target, offsets, source.position()); 778 } 779 } 780 781 /* 782 * Write the characters to target. 783 * @param source The input character buffer 784 * @param target The output byte buffer 785 * @param offsets 786 * @return A CoderResult object that contains the error result when an error occurs. 787 */ 788 CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder, 789 CharBuffer source, ByteBuffer target, IntBuffer offsets) { 790 CoderResult cr = CoderResult.UNDERFLOW; 791 792 /* This is a fun one. Recursion can occur - we're basically going to 793 * just retry shoving data through the same converter. Note, if you got 794 * here through some kind of invalid sequence, you maybe should emit a 795 * reset sequence of some kind. Since this IS an actual conversion, 796 * take care that you've changed the callback or the data, or you'll 797 * get an infinite loop. 798 */ 799 800 int oldTargetPosition = target.position(); 801 int offsetIndex = source.position(); 802 803 cr = encoder.encode(source, target, null, false); /* no offsets and no flush */ 804 805 if (offsets != null) { 806 while (target.position() != oldTargetPosition) { 807 offsets.put(offsetIndex); 808 oldTargetPosition++; 809 } 810 } 811 812 /* Note, if you did something like used a stop subcallback, things would get interesting. 813 * In fact, here's where we want to return the partially consumed in-source! 814 */ 815 if (cr.isOverflow()) { 816 /* Overflowed target. Now, we'll write into the charErrorBuffer. 817 * It's a fixed size. If we overflow it...Hm 818 */ 819 820 /* start the new target at the first free slot in the error buffer */ 821 int errBuffLen = encoder.errorBufferLength; 822 ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer); 823 newTarget.position(errBuffLen); /* set the position at the end of the error buffer */ 824 encoder.errorBufferLength = 0; 825 826 encoder.encode(source, newTarget, null, false); 827 828 encoder.errorBuffer = newTarget.array(); 829 encoder.errorBufferLength = newTarget.position(); 830 } 831 832 return cr; 833 } 834 835 /** 836 * <p> 837 * Handles a common situation where a character has been read and it may be 838 * a lead surrogate followed by a trail surrogate. This method can change 839 * the source position and will modify fromUChar32. 840 * </p> 841 * 842 * <p> 843 * If <code>null</code> is returned, then there was success in reading a 844 * surrogate pair, the codepoint is stored in <code>fromUChar32</code> and 845 * <code>fromUChar32</code> should be reset (to 0) after being read. 846 * </p> 847 * 848 * @param source 849 * The encoding source. 850 * @param lead 851 * A character that may be the first in a surrogate pair. 852 * @return <code>CoderResult.malformedForLength(1)</code> or 853 * <code>CoderResult.UNDERFLOW</code> if there is a problem, or 854 * <code>null</code> if there isn't. 855 * @see #handleSurrogates(CharBuffer, char) 856 * @see #handleSurrogates(char[], int, int, char) 857 */ 858 final CoderResult handleSurrogates(CharBuffer source, char lead) { 859 if (!UTF16.isLeadSurrogate(lead)) { 860 fromUChar32 = lead; 861 return CoderResult.malformedForLength(1); 862 } 863 864 if (!source.hasRemaining()) { 865 fromUChar32 = lead; 866 return CoderResult.UNDERFLOW; 867 } 868 869 char trail = source.get(); 870 871 if (!UTF16.isTrailSurrogate(trail)) { 872 fromUChar32 = lead; 873 source.position(source.position() - 1); 874 return CoderResult.malformedForLength(1); 875 } 876 877 fromUChar32 = UCharacter.getCodePoint(lead, trail); 878 return null; 879 } 880 881 /** 882 * <p> 883 * Same as <code>handleSurrogates(CharBuffer, char)</code>, but with arrays. As an added 884 * requirement, the calling method must also increment the index if this method returns 885 * <code>null</code>. 886 * </p> 887 * 888 * 889 * @param source 890 * The encoding source. 891 * @param lead 892 * A character that may be the first in a surrogate pair. 893 * @return <code>CoderResult.malformedForLength(1)</code> or 894 * <code>CoderResult.UNDERFLOW</code> if there is a problem, or <code>null</code> if 895 * there isn't. 896 * @see #handleSurrogates(CharBuffer, char) 897 * @see #handleSurrogates(char[], int, int, char) 898 */ 899 final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex, 900 int sourceLimit, char lead) { 901 if (!UTF16.isLeadSurrogate(lead)) { 902 fromUChar32 = lead; 903 return CoderResult.malformedForLength(1); 904 } 905 906 if (sourceIndex >= sourceLimit) { 907 fromUChar32 = lead; 908 return CoderResult.UNDERFLOW; 909 } 910 911 char trail = sourceArray[sourceIndex]; 912 913 if (!UTF16.isTrailSurrogate(trail)) { 914 fromUChar32 = lead; 915 return CoderResult.malformedForLength(1); 916 } 917 918 fromUChar32 = UCharacter.getCodePoint(lead, trail); 919 return null; 920 } 921 922 /** 923 * Returns the maxCharsPerByte value for the Charset that created this encoder. 924 * @return maxCharsPerByte 925 * @stable ICU 4.8 926 */ 927 public final float maxCharsPerByte() { 928 return ((CharsetICU)(this.charset())).maxCharsPerByte; 929 } 930 931 /** 932 * Calculates the size of a buffer for conversion from Unicode to a charset. 933 * The calculated size is guaranteed to be sufficient for this conversion. 934 * 935 * It takes into account initial and final non-character bytes that are output 936 * by some converters. 937 * It does not take into account callbacks which output more than one charset 938 * character sequence per call, like escape callbacks. 939 * The default (substitution) callback only outputs one charset character sequence. 940 * 941 * @param length Number of chars to be converted. 942 * @param maxCharSize Return value from maxBytesPerChar for the converter 943 * that will be used. 944 * @return Size of a buffer that will be large enough to hold the output of bytes 945 * 946 * @stable ICU 49 947 */ 948 public static int getMaxBytesForString(int length, int maxCharSize) { 949 return ((length + 10) * maxCharSize); 950 } 951 952 } 953