1 // 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT 7 8 #include "number_stringbuilder.h" 9 #include "unicode/utf16.h" 10 #include "uvectr32.h" 11 12 using namespace icu; 13 using namespace icu::number; 14 using namespace icu::number::impl; 15 16 namespace { 17 18 // A version of uprv_memcpy that checks for length 0. 19 // By default, uprv_memcpy requires a length of at least 1. 20 inline void uprv_memcpy2(void* dest, const void* src, size_t len) { 21 if (len > 0) { 22 uprv_memcpy(dest, src, len); 23 } 24 } 25 26 // A version of uprv_memmove that checks for length 0. 27 // By default, uprv_memmove requires a length of at least 1. 28 inline void uprv_memmove2(void* dest, const void* src, size_t len) { 29 if (len > 0) { 30 uprv_memmove(dest, src, len); 31 } 32 } 33 34 } // namespace 35 36 NumberStringBuilder::NumberStringBuilder() = default; 37 38 NumberStringBuilder::~NumberStringBuilder() { 39 if (fUsingHeap) { 40 uprv_free(fChars.heap.ptr); 41 uprv_free(fFields.heap.ptr); 42 } 43 } 44 45 NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder &other) { 46 *this = other; 47 } 48 49 NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &other) { 50 // Check for self-assignment 51 if (this == &other) { 52 return *this; 53 } 54 55 // Continue with deallocation and copying 56 if (fUsingHeap) { 57 uprv_free(fChars.heap.ptr); 58 uprv_free(fFields.heap.ptr); 59 fUsingHeap = false; 60 } 61 62 int32_t capacity = other.getCapacity(); 63 if (capacity > DEFAULT_CAPACITY) { 64 // FIXME: uprv_malloc 65 // C++ note: malloc appears in two places: here and in prepareForInsertHelper. 66 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity)); 67 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity)); 68 if (newChars == nullptr || newFields == nullptr) { 69 // UErrorCode is not available; fail silently. 70 uprv_free(newChars); 71 uprv_free(newFields); 72 *this = NumberStringBuilder(); // can't fail 73 return *this; 74 } 75 76 fUsingHeap = true; 77 fChars.heap.capacity = capacity; 78 fChars.heap.ptr = newChars; 79 fFields.heap.capacity = capacity; 80 fFields.heap.ptr = newFields; 81 } 82 83 uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity); 84 uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity); 85 86 fZero = other.fZero; 87 fLength = other.fLength; 88 return *this; 89 } 90 91 int32_t NumberStringBuilder::length() const { 92 return fLength; 93 } 94 95 int32_t NumberStringBuilder::codePointCount() const { 96 return u_countChar32(getCharPtr() + fZero, fLength); 97 } 98 99 UChar32 NumberStringBuilder::getFirstCodePoint() const { 100 if (fLength == 0) { 101 return -1; 102 } 103 UChar32 cp; 104 U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp); 105 return cp; 106 } 107 108 UChar32 NumberStringBuilder::getLastCodePoint() const { 109 if (fLength == 0) { 110 return -1; 111 } 112 int32_t offset = fLength; 113 U16_BACK_1(getCharPtr() + fZero, 0, offset); 114 UChar32 cp; 115 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); 116 return cp; 117 } 118 119 UChar32 NumberStringBuilder::codePointAt(int32_t index) const { 120 UChar32 cp; 121 U16_GET(getCharPtr() + fZero, 0, index, fLength, cp); 122 return cp; 123 } 124 125 UChar32 NumberStringBuilder::codePointBefore(int32_t index) const { 126 int32_t offset = index; 127 U16_BACK_1(getCharPtr() + fZero, 0, offset); 128 UChar32 cp; 129 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); 130 return cp; 131 } 132 133 NumberStringBuilder &NumberStringBuilder::clear() { 134 // TODO: Reset the heap here? 135 fZero = getCapacity() / 2; 136 fLength = 0; 137 return *this; 138 } 139 140 int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) { 141 return insertCodePoint(fLength, codePoint, field, status); 142 } 143 144 int32_t 145 NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) { 146 int32_t count = U16_LENGTH(codePoint); 147 int32_t position = prepareForInsert(index, count, status); 148 if (U_FAILURE(status)) { 149 return count; 150 } 151 if (count == 1) { 152 getCharPtr()[position] = (char16_t) codePoint; 153 getFieldPtr()[position] = field; 154 } else { 155 getCharPtr()[position] = U16_LEAD(codePoint); 156 getCharPtr()[position + 1] = U16_TRAIL(codePoint); 157 getFieldPtr()[position] = getFieldPtr()[position + 1] = field; 158 } 159 return count; 160 } 161 162 int32_t NumberStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) { 163 return insert(fLength, unistr, field, status); 164 } 165 166 int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field, 167 UErrorCode &status) { 168 if (unistr.length() == 0) { 169 // Nothing to insert. 170 return 0; 171 } else if (unistr.length() == 1) { 172 // Fast path: insert using insertCodePoint. 173 return insertCodePoint(index, unistr.charAt(0), field, status); 174 } else { 175 return insert(index, unistr, 0, unistr.length(), field, status); 176 } 177 } 178 179 int32_t 180 NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, 181 Field field, UErrorCode &status) { 182 int32_t count = end - start; 183 int32_t position = prepareForInsert(index, count, status); 184 if (U_FAILURE(status)) { 185 return count; 186 } 187 for (int32_t i = 0; i < count; i++) { 188 getCharPtr()[position + i] = unistr.charAt(start + i); 189 getFieldPtr()[position + i] = field; 190 } 191 return count; 192 } 193 194 int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) { 195 return insert(fLength, other, status); 196 } 197 198 int32_t 199 NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status) { 200 if (this == &other) { 201 status = U_ILLEGAL_ARGUMENT_ERROR; 202 return 0; 203 } 204 int32_t count = other.fLength; 205 if (count == 0) { 206 // Nothing to insert. 207 return 0; 208 } 209 int32_t position = prepareForInsert(index, count, status); 210 if (U_FAILURE(status)) { 211 return count; 212 } 213 for (int32_t i = 0; i < count; i++) { 214 getCharPtr()[position + i] = other.charAt(i); 215 getFieldPtr()[position + i] = other.fieldAt(i); 216 } 217 return count; 218 } 219 220 int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) { 221 if (index == 0 && fZero - count >= 0) { 222 // Append to start 223 fZero -= count; 224 fLength += count; 225 return fZero; 226 } else if (index == fLength && fZero + fLength + count < getCapacity()) { 227 // Append to end 228 fLength += count; 229 return fZero + fLength - count; 230 } else { 231 // Move chars around and/or allocate more space 232 return prepareForInsertHelper(index, count, status); 233 } 234 } 235 236 int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) { 237 int32_t oldCapacity = getCapacity(); 238 int32_t oldZero = fZero; 239 char16_t *oldChars = getCharPtr(); 240 Field *oldFields = getFieldPtr(); 241 if (fLength + count > oldCapacity) { 242 int32_t newCapacity = (fLength + count) * 2; 243 int32_t newZero = newCapacity / 2 - (fLength + count) / 2; 244 245 // C++ note: malloc appears in two places: here and in the assignment operator. 246 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity)); 247 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity)); 248 if (newChars == nullptr || newFields == nullptr) { 249 uprv_free(newChars); 250 uprv_free(newFields); 251 status = U_MEMORY_ALLOCATION_ERROR; 252 return -1; 253 } 254 255 // First copy the prefix and then the suffix, leaving room for the new chars that the 256 // caller wants to insert. 257 // C++ note: memcpy is OK because the src and dest do not overlap. 258 uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index); 259 uprv_memcpy2(newChars + newZero + index + count, 260 oldChars + oldZero + index, 261 sizeof(char16_t) * (fLength - index)); 262 uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index); 263 uprv_memcpy2(newFields + newZero + index + count, 264 oldFields + oldZero + index, 265 sizeof(Field) * (fLength - index)); 266 267 if (fUsingHeap) { 268 uprv_free(oldChars); 269 uprv_free(oldFields); 270 } 271 fUsingHeap = true; 272 fChars.heap.ptr = newChars; 273 fChars.heap.capacity = newCapacity; 274 fFields.heap.ptr = newFields; 275 fFields.heap.capacity = newCapacity; 276 fZero = newZero; 277 fLength += count; 278 } else { 279 int32_t newZero = oldCapacity / 2 - (fLength + count) / 2; 280 281 // C++ note: memmove is required because src and dest may overlap. 282 // First copy the entire string to the location of the prefix, and then move the suffix 283 // to make room for the new chars that the caller wants to insert. 284 uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength); 285 uprv_memmove2(oldChars + newZero + index + count, 286 oldChars + newZero + index, 287 sizeof(char16_t) * (fLength - index)); 288 uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength); 289 uprv_memmove2(oldFields + newZero + index + count, 290 oldFields + newZero + index, 291 sizeof(Field) * (fLength - index)); 292 293 fZero = newZero; 294 fLength += count; 295 } 296 return fZero + index; 297 } 298 299 UnicodeString NumberStringBuilder::toUnicodeString() const { 300 return UnicodeString(getCharPtr() + fZero, fLength); 301 } 302 303 UnicodeString NumberStringBuilder::toDebugString() const { 304 UnicodeString sb; 305 sb.append(u"<NumberStringBuilder [", -1); 306 sb.append(toUnicodeString()); 307 sb.append(u"] [", -1); 308 for (int i = 0; i < fLength; i++) { 309 if (fieldAt(i) == UNUM_FIELD_COUNT) { 310 sb.append(u'n'); 311 } else { 312 char16_t c; 313 switch (fieldAt(i)) { 314 case UNUM_SIGN_FIELD: 315 c = u'-'; 316 break; 317 case UNUM_INTEGER_FIELD: 318 c = u'i'; 319 break; 320 case UNUM_FRACTION_FIELD: 321 c = u'f'; 322 break; 323 case UNUM_EXPONENT_FIELD: 324 c = u'e'; 325 break; 326 case UNUM_EXPONENT_SIGN_FIELD: 327 c = u'+'; 328 break; 329 case UNUM_EXPONENT_SYMBOL_FIELD: 330 c = u'E'; 331 break; 332 case UNUM_DECIMAL_SEPARATOR_FIELD: 333 c = u'.'; 334 break; 335 case UNUM_GROUPING_SEPARATOR_FIELD: 336 c = u','; 337 break; 338 case UNUM_PERCENT_FIELD: 339 c = u'%'; 340 break; 341 case UNUM_PERMILL_FIELD: 342 c = u''; 343 break; 344 case UNUM_CURRENCY_FIELD: 345 c = u'$'; 346 break; 347 default: 348 c = u'?'; 349 break; 350 } 351 sb.append(c); 352 } 353 } 354 sb.append(u"]>", -1); 355 return sb; 356 } 357 358 const char16_t *NumberStringBuilder::chars() const { 359 return getCharPtr() + fZero; 360 } 361 362 bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const { 363 if (fLength != other.fLength) { 364 return false; 365 } 366 for (int32_t i = 0; i < fLength; i++) { 367 if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) { 368 return false; 369 } 370 } 371 return true; 372 } 373 374 void NumberStringBuilder::populateFieldPosition(FieldPosition &fp, int32_t offset, UErrorCode &status) const { 375 int32_t rawField = fp.getField(); 376 377 if (rawField == FieldPosition::DONT_CARE) { 378 return; 379 } 380 381 if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) { 382 status = U_ILLEGAL_ARGUMENT_ERROR; 383 return; 384 } 385 386 auto field = static_cast<Field>(rawField); 387 388 bool seenStart = false; 389 int32_t fractionStart = -1; 390 for (int i = fZero; i <= fZero + fLength; i++) { 391 Field _field = UNUM_FIELD_COUNT; 392 if (i < fZero + fLength) { 393 _field = getFieldPtr()[i]; 394 } 395 if (seenStart && field != _field) { 396 // Special case: GROUPING_SEPARATOR counts as an INTEGER. 397 if (field == UNUM_INTEGER_FIELD && _field == UNUM_GROUPING_SEPARATOR_FIELD) { 398 continue; 399 } 400 fp.setEndIndex(i - fZero + offset); 401 break; 402 } else if (!seenStart && field == _field) { 403 fp.setBeginIndex(i - fZero + offset); 404 seenStart = true; 405 } 406 if (_field == UNUM_INTEGER_FIELD || _field == UNUM_DECIMAL_SEPARATOR_FIELD) { 407 fractionStart = i - fZero + 1; 408 } 409 } 410 411 // Backwards compatibility: FRACTION needs to start after INTEGER if empty 412 if (field == UNUM_FRACTION_FIELD && !seenStart) { 413 fp.setBeginIndex(fractionStart + offset); 414 fp.setEndIndex(fractionStart + offset); 415 } 416 } 417 418 void NumberStringBuilder::populateFieldPositionIterator(FieldPositionIterator &fpi, UErrorCode &status) const { 419 // TODO: Set an initial capacity on uvec? 420 LocalPointer <UVector32> uvec(new UVector32(status)); 421 if (U_FAILURE(status)) { 422 return; 423 } 424 425 Field current = UNUM_FIELD_COUNT; 426 int32_t currentStart = -1; 427 for (int32_t i = 0; i < fLength; i++) { 428 Field field = fieldAt(i); 429 if (current == UNUM_INTEGER_FIELD && field == UNUM_GROUPING_SEPARATOR_FIELD) { 430 // Special case: GROUPING_SEPARATOR counts as an INTEGER. 431 // Add the field, followed by the start index, followed by the end index to uvec. 432 uvec->addElement(UNUM_GROUPING_SEPARATOR_FIELD, status); 433 uvec->addElement(i, status); 434 uvec->addElement(i + 1, status); 435 } else if (current != field) { 436 if (current != UNUM_FIELD_COUNT) { 437 // Add the field, followed by the start index, followed by the end index to uvec. 438 uvec->addElement(current, status); 439 uvec->addElement(currentStart, status); 440 uvec->addElement(i, status); 441 } 442 current = field; 443 currentStart = i; 444 } 445 if (U_FAILURE(status)) { 446 return; 447 } 448 } 449 if (current != UNUM_FIELD_COUNT) { 450 // Add the field, followed by the start index, followed by the end index to uvec. 451 uvec->addElement(current, status); 452 uvec->addElement(currentStart, status); 453 uvec->addElement(fLength, status); 454 } 455 456 // Give uvec to the FieldPositionIterator, which adopts it. 457 fpi.setData(uvec.orphan(), status); 458 } 459 460 #endif /* #if !UCONFIG_NO_FORMATTING */ 461