1 // 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // bytesinkutil.cpp 5 // created: 2017sep14 Markus W. Scherer 6 7 #include "unicode/utypes.h" 8 #include "unicode/bytestream.h" 9 #include "unicode/edits.h" 10 #include "unicode/stringoptions.h" 11 #include "unicode/utf8.h" 12 #include "unicode/utf16.h" 13 #include "bytesinkutil.h" 14 #include "cmemory.h" 15 #include "uassert.h" 16 17 U_NAMESPACE_BEGIN 18 19 UBool 20 ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length, 21 ByteSink &sink, Edits *edits, UErrorCode &errorCode) { 22 if (U_FAILURE(errorCode)) { return FALSE; } 23 char scratch[200]; 24 int32_t s8Length = 0; 25 for (int32_t i = 0; i < s16Length;) { 26 int32_t capacity; 27 int32_t desiredCapacity = s16Length - i; 28 if (desiredCapacity < (INT32_MAX / 3)) { 29 desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit 30 } else if (desiredCapacity < (INT32_MAX / 2)) { 31 desiredCapacity *= 2; 32 } else { 33 desiredCapacity = INT32_MAX; 34 } 35 char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity, 36 scratch, UPRV_LENGTHOF(scratch), &capacity); 37 capacity -= U8_MAX_LENGTH - 1; 38 int32_t j = 0; 39 for (; i < s16Length && j < capacity;) { 40 UChar32 c; 41 U16_NEXT_UNSAFE(s16, i, c); 42 U8_APPEND_UNSAFE(buffer, j, c); 43 } 44 if (j > (INT32_MAX - s8Length)) { 45 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 46 return FALSE; 47 } 48 sink.Append(buffer, j); 49 s8Length += j; 50 } 51 if (edits != nullptr) { 52 edits->addReplace(length, s8Length); 53 } 54 return TRUE; 55 } 56 57 UBool 58 ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit, 59 const char16_t *s16, int32_t s16Length, 60 ByteSink &sink, Edits *edits, UErrorCode &errorCode) { 61 if (U_FAILURE(errorCode)) { return FALSE; } 62 if ((limit - s) > INT32_MAX) { 63 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 64 return FALSE; 65 } 66 return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode); 67 } 68 69 void 70 ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) { 71 char s8[U8_MAX_LENGTH]; 72 int32_t s8Length = 0; 73 U8_APPEND_UNSAFE(s8, s8Length, c); 74 if (edits != nullptr) { 75 edits->addReplace(length, s8Length); 76 } 77 sink.Append(s8, s8Length); 78 } 79 80 namespace { 81 82 // See unicode/utf8.h U8_APPEND_UNSAFE(). 83 inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } 84 inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } 85 86 } // namespace 87 88 void 89 ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) { 90 U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8 91 char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) }; 92 sink.Append(s8, 2); 93 } 94 95 UBool 96 ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length, 97 ByteSink &sink, uint32_t options, Edits *edits, 98 UErrorCode &errorCode) { 99 if (U_FAILURE(errorCode)) { return FALSE; } 100 if (length > 0) { 101 if (edits != nullptr) { 102 edits->addUnchanged(length); 103 } 104 if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { 105 sink.Append(reinterpret_cast<const char *>(s), length); 106 } 107 } 108 return TRUE; 109 } 110 111 UBool 112 ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit, 113 ByteSink &sink, uint32_t options, Edits *edits, 114 UErrorCode &errorCode) { 115 if (U_FAILURE(errorCode)) { return FALSE; } 116 if ((limit - s) > INT32_MAX) { 117 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 118 return FALSE; 119 } 120 return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode); 121 } 122 123 U_NAMESPACE_END 124