Home | History | Annotate | Download | only in i18n
      1 //  2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
      7 
      8 #include "number_stringbuilder.h"
      9 #include "unicode/utf16.h"
     10 #include "uvectr32.h"
     11 
     12 using namespace icu;
     13 using namespace icu::number;
     14 using namespace icu::number::impl;
     15 
     16 namespace {
     17 
     18 // A version of uprv_memcpy that checks for length 0.
     19 // By default, uprv_memcpy requires a length of at least 1.
     20 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
     21     if (len > 0) {
     22         uprv_memcpy(dest, src, len);
     23     }
     24 }
     25 
     26 // A version of uprv_memmove that checks for length 0.
     27 // By default, uprv_memmove requires a length of at least 1.
     28 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
     29     if (len > 0) {
     30         uprv_memmove(dest, src, len);
     31     }
     32 }
     33 
     34 } // namespace
     35 
     36 NumberStringBuilder::NumberStringBuilder() = default;
     37 
     38 NumberStringBuilder::~NumberStringBuilder() {
     39     if (fUsingHeap) {
     40         uprv_free(fChars.heap.ptr);
     41         uprv_free(fFields.heap.ptr);
     42     }
     43 }
     44 
     45 NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder &other) {
     46     *this = other;
     47 }
     48 
     49 NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &other) {
     50     // Check for self-assignment
     51     if (this == &other) {
     52         return *this;
     53     }
     54 
     55     // Continue with deallocation and copying
     56     if (fUsingHeap) {
     57         uprv_free(fChars.heap.ptr);
     58         uprv_free(fFields.heap.ptr);
     59         fUsingHeap = false;
     60     }
     61 
     62     int32_t capacity = other.getCapacity();
     63     if (capacity > DEFAULT_CAPACITY) {
     64         // FIXME: uprv_malloc
     65         // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
     66         auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
     67         auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
     68         if (newChars == nullptr || newFields == nullptr) {
     69             // UErrorCode is not available; fail silently.
     70             uprv_free(newChars);
     71             uprv_free(newFields);
     72             *this = NumberStringBuilder();  // can't fail
     73             return *this;
     74         }
     75 
     76         fUsingHeap = true;
     77         fChars.heap.capacity = capacity;
     78         fChars.heap.ptr = newChars;
     79         fFields.heap.capacity = capacity;
     80         fFields.heap.ptr = newFields;
     81     }
     82 
     83     uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
     84     uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
     85 
     86     fZero = other.fZero;
     87     fLength = other.fLength;
     88     return *this;
     89 }
     90 
     91 int32_t NumberStringBuilder::length() const {
     92     return fLength;
     93 }
     94 
     95 int32_t NumberStringBuilder::codePointCount() const {
     96     return u_countChar32(getCharPtr() + fZero, fLength);
     97 }
     98 
     99 UChar32 NumberStringBuilder::getFirstCodePoint() const {
    100     if (fLength == 0) {
    101         return -1;
    102     }
    103     UChar32 cp;
    104     U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
    105     return cp;
    106 }
    107 
    108 UChar32 NumberStringBuilder::getLastCodePoint() const {
    109     if (fLength == 0) {
    110         return -1;
    111     }
    112     int32_t offset = fLength;
    113     U16_BACK_1(getCharPtr() + fZero, 0, offset);
    114     UChar32 cp;
    115     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
    116     return cp;
    117 }
    118 
    119 UChar32 NumberStringBuilder::codePointAt(int32_t index) const {
    120     UChar32 cp;
    121     U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
    122     return cp;
    123 }
    124 
    125 UChar32 NumberStringBuilder::codePointBefore(int32_t index) const {
    126     int32_t offset = index;
    127     U16_BACK_1(getCharPtr() + fZero, 0, offset);
    128     UChar32 cp;
    129     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
    130     return cp;
    131 }
    132 
    133 NumberStringBuilder &NumberStringBuilder::clear() {
    134     // TODO: Reset the heap here?
    135     fZero = getCapacity() / 2;
    136     fLength = 0;
    137     return *this;
    138 }
    139 
    140 int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
    141     return insertCodePoint(fLength, codePoint, field, status);
    142 }
    143 
    144 int32_t
    145 NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
    146     int32_t count = U16_LENGTH(codePoint);
    147     int32_t position = prepareForInsert(index, count, status);
    148     if (U_FAILURE(status)) {
    149         return count;
    150     }
    151     if (count == 1) {
    152         getCharPtr()[position] = (char16_t) codePoint;
    153         getFieldPtr()[position] = field;
    154     } else {
    155         getCharPtr()[position] = U16_LEAD(codePoint);
    156         getCharPtr()[position + 1] = U16_TRAIL(codePoint);
    157         getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
    158     }
    159     return count;
    160 }
    161 
    162 int32_t NumberStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) {
    163     return insert(fLength, unistr, field, status);
    164 }
    165 
    166 int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
    167                                     UErrorCode &status) {
    168     if (unistr.length() == 0) {
    169         // Nothing to insert.
    170         return 0;
    171     } else if (unistr.length() == 1) {
    172         // Fast path: insert using insertCodePoint.
    173         return insertCodePoint(index, unistr.charAt(0), field, status);
    174     } else {
    175         return insert(index, unistr, 0, unistr.length(), field, status);
    176     }
    177 }
    178 
    179 int32_t
    180 NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
    181                             Field field, UErrorCode &status) {
    182     int32_t count = end - start;
    183     int32_t position = prepareForInsert(index, count, status);
    184     if (U_FAILURE(status)) {
    185         return count;
    186     }
    187     for (int32_t i = 0; i < count; i++) {
    188         getCharPtr()[position + i] = unistr.charAt(start + i);
    189         getFieldPtr()[position + i] = field;
    190     }
    191     return count;
    192 }
    193 
    194 int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) {
    195     return insert(fLength, other, status);
    196 }
    197 
    198 int32_t
    199 NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status) {
    200     if (this == &other) {
    201         status = U_ILLEGAL_ARGUMENT_ERROR;
    202         return 0;
    203     }
    204     int32_t count = other.fLength;
    205     if (count == 0) {
    206         // Nothing to insert.
    207         return 0;
    208     }
    209     int32_t position = prepareForInsert(index, count, status);
    210     if (U_FAILURE(status)) {
    211         return count;
    212     }
    213     for (int32_t i = 0; i < count; i++) {
    214         getCharPtr()[position + i] = other.charAt(i);
    215         getFieldPtr()[position + i] = other.fieldAt(i);
    216     }
    217     return count;
    218 }
    219 
    220 int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
    221     if (index == 0 && fZero - count >= 0) {
    222         // Append to start
    223         fZero -= count;
    224         fLength += count;
    225         return fZero;
    226     } else if (index == fLength && fZero + fLength + count < getCapacity()) {
    227         // Append to end
    228         fLength += count;
    229         return fZero + fLength - count;
    230     } else {
    231         // Move chars around and/or allocate more space
    232         return prepareForInsertHelper(index, count, status);
    233     }
    234 }
    235 
    236 int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
    237     int32_t oldCapacity = getCapacity();
    238     int32_t oldZero = fZero;
    239     char16_t *oldChars = getCharPtr();
    240     Field *oldFields = getFieldPtr();
    241     if (fLength + count > oldCapacity) {
    242         int32_t newCapacity = (fLength + count) * 2;
    243         int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
    244 
    245         // C++ note: malloc appears in two places: here and in the assignment operator.
    246         auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
    247         auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
    248         if (newChars == nullptr || newFields == nullptr) {
    249             uprv_free(newChars);
    250             uprv_free(newFields);
    251             status = U_MEMORY_ALLOCATION_ERROR;
    252             return -1;
    253         }
    254 
    255         // First copy the prefix and then the suffix, leaving room for the new chars that the
    256         // caller wants to insert.
    257         // C++ note: memcpy is OK because the src and dest do not overlap.
    258         uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
    259         uprv_memcpy2(newChars + newZero + index + count,
    260                 oldChars + oldZero + index,
    261                 sizeof(char16_t) * (fLength - index));
    262         uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
    263         uprv_memcpy2(newFields + newZero + index + count,
    264                 oldFields + oldZero + index,
    265                 sizeof(Field) * (fLength - index));
    266 
    267         if (fUsingHeap) {
    268             uprv_free(oldChars);
    269             uprv_free(oldFields);
    270         }
    271         fUsingHeap = true;
    272         fChars.heap.ptr = newChars;
    273         fChars.heap.capacity = newCapacity;
    274         fFields.heap.ptr = newFields;
    275         fFields.heap.capacity = newCapacity;
    276         fZero = newZero;
    277         fLength += count;
    278     } else {
    279         int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
    280 
    281         // C++ note: memmove is required because src and dest may overlap.
    282         // First copy the entire string to the location of the prefix, and then move the suffix
    283         // to make room for the new chars that the caller wants to insert.
    284         uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
    285         uprv_memmove2(oldChars + newZero + index + count,
    286                 oldChars + newZero + index,
    287                 sizeof(char16_t) * (fLength - index));
    288         uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
    289         uprv_memmove2(oldFields + newZero + index + count,
    290                 oldFields + newZero + index,
    291                 sizeof(Field) * (fLength - index));
    292 
    293         fZero = newZero;
    294         fLength += count;
    295     }
    296     return fZero + index;
    297 }
    298 
    299 UnicodeString NumberStringBuilder::toUnicodeString() const {
    300     return UnicodeString(getCharPtr() + fZero, fLength);
    301 }
    302 
    303 UnicodeString NumberStringBuilder::toDebugString() const {
    304     UnicodeString sb;
    305     sb.append(u"<NumberStringBuilder [", -1);
    306     sb.append(toUnicodeString());
    307     sb.append(u"] [", -1);
    308     for (int i = 0; i < fLength; i++) {
    309         if (fieldAt(i) == UNUM_FIELD_COUNT) {
    310             sb.append(u'n');
    311         } else {
    312             char16_t c;
    313             switch (fieldAt(i)) {
    314                 case UNUM_SIGN_FIELD:
    315                     c = u'-';
    316                     break;
    317                 case UNUM_INTEGER_FIELD:
    318                     c = u'i';
    319                     break;
    320                 case UNUM_FRACTION_FIELD:
    321                     c = u'f';
    322                     break;
    323                 case UNUM_EXPONENT_FIELD:
    324                     c = u'e';
    325                     break;
    326                 case UNUM_EXPONENT_SIGN_FIELD:
    327                     c = u'+';
    328                     break;
    329                 case UNUM_EXPONENT_SYMBOL_FIELD:
    330                     c = u'E';
    331                     break;
    332                 case UNUM_DECIMAL_SEPARATOR_FIELD:
    333                     c = u'.';
    334                     break;
    335                 case UNUM_GROUPING_SEPARATOR_FIELD:
    336                     c = u',';
    337                     break;
    338                 case UNUM_PERCENT_FIELD:
    339                     c = u'%';
    340                     break;
    341                 case UNUM_PERMILL_FIELD:
    342                     c = u'';
    343                     break;
    344                 case UNUM_CURRENCY_FIELD:
    345                     c = u'$';
    346                     break;
    347                 default:
    348                     c = u'?';
    349                     break;
    350             }
    351             sb.append(c);
    352         }
    353     }
    354     sb.append(u"]>", -1);
    355     return sb;
    356 }
    357 
    358 const char16_t *NumberStringBuilder::chars() const {
    359     return getCharPtr() + fZero;
    360 }
    361 
    362 bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const {
    363     if (fLength != other.fLength) {
    364         return false;
    365     }
    366     for (int32_t i = 0; i < fLength; i++) {
    367         if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
    368             return false;
    369         }
    370     }
    371     return true;
    372 }
    373 
    374 void NumberStringBuilder::populateFieldPosition(FieldPosition &fp, int32_t offset, UErrorCode &status) const {
    375     int32_t rawField = fp.getField();
    376 
    377     if (rawField == FieldPosition::DONT_CARE) {
    378         return;
    379     }
    380 
    381     if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
    382         status = U_ILLEGAL_ARGUMENT_ERROR;
    383         return;
    384     }
    385 
    386     auto field = static_cast<Field>(rawField);
    387 
    388     bool seenStart = false;
    389     int32_t fractionStart = -1;
    390     for (int i = fZero; i <= fZero + fLength; i++) {
    391         Field _field = UNUM_FIELD_COUNT;
    392         if (i < fZero + fLength) {
    393             _field = getFieldPtr()[i];
    394         }
    395         if (seenStart && field != _field) {
    396             // Special case: GROUPING_SEPARATOR counts as an INTEGER.
    397             if (field == UNUM_INTEGER_FIELD && _field == UNUM_GROUPING_SEPARATOR_FIELD) {
    398                 continue;
    399             }
    400             fp.setEndIndex(i - fZero + offset);
    401             break;
    402         } else if (!seenStart && field == _field) {
    403             fp.setBeginIndex(i - fZero + offset);
    404             seenStart = true;
    405         }
    406         if (_field == UNUM_INTEGER_FIELD || _field == UNUM_DECIMAL_SEPARATOR_FIELD) {
    407             fractionStart = i - fZero + 1;
    408         }
    409     }
    410 
    411     // Backwards compatibility: FRACTION needs to start after INTEGER if empty
    412     if (field == UNUM_FRACTION_FIELD && !seenStart) {
    413         fp.setBeginIndex(fractionStart + offset);
    414         fp.setEndIndex(fractionStart + offset);
    415     }
    416 }
    417 
    418 void NumberStringBuilder::populateFieldPositionIterator(FieldPositionIterator &fpi, UErrorCode &status) const {
    419     // TODO: Set an initial capacity on uvec?
    420     LocalPointer <UVector32> uvec(new UVector32(status));
    421     if (U_FAILURE(status)) {
    422         return;
    423     }
    424 
    425     Field current = UNUM_FIELD_COUNT;
    426     int32_t currentStart = -1;
    427     for (int32_t i = 0; i < fLength; i++) {
    428         Field field = fieldAt(i);
    429         if (current == UNUM_INTEGER_FIELD && field == UNUM_GROUPING_SEPARATOR_FIELD) {
    430             // Special case: GROUPING_SEPARATOR counts as an INTEGER.
    431             // Add the field, followed by the start index, followed by the end index to uvec.
    432             uvec->addElement(UNUM_GROUPING_SEPARATOR_FIELD, status);
    433             uvec->addElement(i, status);
    434             uvec->addElement(i + 1, status);
    435         } else if (current != field) {
    436             if (current != UNUM_FIELD_COUNT) {
    437                 // Add the field, followed by the start index, followed by the end index to uvec.
    438                 uvec->addElement(current, status);
    439                 uvec->addElement(currentStart, status);
    440                 uvec->addElement(i, status);
    441             }
    442             current = field;
    443             currentStart = i;
    444         }
    445         if (U_FAILURE(status)) {
    446             return;
    447         }
    448     }
    449     if (current != UNUM_FIELD_COUNT) {
    450         // Add the field, followed by the start index, followed by the end index to uvec.
    451         uvec->addElement(current, status);
    452         uvec->addElement(currentStart, status);
    453         uvec->addElement(fLength, status);
    454     }
    455 
    456     // Give uvec to the FieldPositionIterator, which adopts it.
    457     fpi.setData(uvec.orphan(), status);
    458 }
    459 
    460 #endif /* #if !UCONFIG_NO_FORMATTING */
    461