Home | History | Annotate | Download | only in common
      1 //  2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // bytesinkutil.cpp
      5 // created: 2017sep14 Markus W. Scherer
      6 
      7 #include "unicode/utypes.h"
      8 #include "unicode/bytestream.h"
      9 #include "unicode/edits.h"
     10 #include "unicode/stringoptions.h"
     11 #include "unicode/utf8.h"
     12 #include "unicode/utf16.h"
     13 #include "bytesinkutil.h"
     14 #include "cmemory.h"
     15 #include "uassert.h"
     16 
     17 U_NAMESPACE_BEGIN
     18 
     19 UBool
     20 ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
     21                            ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
     22     if (U_FAILURE(errorCode)) { return FALSE; }
     23     char scratch[200];
     24     int32_t s8Length = 0;
     25     for (int32_t i = 0; i < s16Length;) {
     26         int32_t capacity;
     27         int32_t desiredCapacity = s16Length - i;
     28         if (desiredCapacity < (INT32_MAX / 3)) {
     29             desiredCapacity *= 3;  // max 3 UTF-8 bytes per UTF-16 code unit
     30         } else if (desiredCapacity < (INT32_MAX / 2)) {
     31             desiredCapacity *= 2;
     32         } else {
     33             desiredCapacity = INT32_MAX;
     34         }
     35         char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
     36                                             scratch, UPRV_LENGTHOF(scratch), &capacity);
     37         capacity -= U8_MAX_LENGTH - 1;
     38         int32_t j = 0;
     39         for (; i < s16Length && j < capacity;) {
     40             UChar32 c;
     41             U16_NEXT_UNSAFE(s16, i, c);
     42             U8_APPEND_UNSAFE(buffer, j, c);
     43         }
     44         if (j > (INT32_MAX - s8Length)) {
     45             errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     46             return FALSE;
     47         }
     48         sink.Append(buffer, j);
     49         s8Length += j;
     50     }
     51     if (edits != nullptr) {
     52         edits->addReplace(length, s8Length);
     53     }
     54     return TRUE;
     55 }
     56 
     57 UBool
     58 ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
     59                            const char16_t *s16, int32_t s16Length,
     60                            ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
     61     if (U_FAILURE(errorCode)) { return FALSE; }
     62     if ((limit - s) > INT32_MAX) {
     63         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     64         return FALSE;
     65     }
     66     return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
     67 }
     68 
     69 void
     70 ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
     71     char s8[U8_MAX_LENGTH];
     72     int32_t s8Length = 0;
     73     U8_APPEND_UNSAFE(s8, s8Length, c);
     74     if (edits != nullptr) {
     75         edits->addReplace(length, s8Length);
     76     }
     77     sink.Append(s8, s8Length);
     78 }
     79 
     80 namespace {
     81 
     82 // See unicode/utf8.h U8_APPEND_UNSAFE().
     83 inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
     84 inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
     85 
     86 }  // namespace
     87 
     88 void
     89 ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
     90     U_ASSERT(0x80 <= c && c <= 0x7ff);  // 2-byte UTF-8
     91     char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
     92     sink.Append(s8, 2);
     93 }
     94 
     95 UBool
     96 ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
     97                               ByteSink &sink, uint32_t options, Edits *edits,
     98                               UErrorCode &errorCode) {
     99     if (U_FAILURE(errorCode)) { return FALSE; }
    100     if (length > 0) {
    101         if (edits != nullptr) {
    102             edits->addUnchanged(length);
    103         }
    104         if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
    105             sink.Append(reinterpret_cast<const char *>(s), length);
    106         }
    107     }
    108     return TRUE;
    109 }
    110 
    111 UBool
    112 ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
    113                               ByteSink &sink, uint32_t options, Edits *edits,
    114                               UErrorCode &errorCode) {
    115     if (U_FAILURE(errorCode)) { return FALSE; }
    116     if ((limit - s) > INT32_MAX) {
    117         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    118         return FALSE;
    119     }
    120     return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
    121 }
    122 
    123 U_NAMESPACE_END
    124