Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 2014-2016, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 ******************************************************************************
      6 * simplepatternformatter.cpp
      7 */
      8 
      9 #include "unicode/utypes.h"
     10 #include "unicode/unistr.h"
     11 #include "simplepatternformatter.h"
     12 #include "uassert.h"
     13 
     14 U_NAMESPACE_BEGIN
     15 
     16 namespace {
     17 
     18 /**
     19  * Argument numbers must be smaller than this limit.
     20  * Text segment lengths are offset by this much.
     21  * This is currently the only unused char value in compiled patterns,
     22  * except it is the maximum value of the first unit (max arg +1).
     23  */
     24 const int32_t ARG_NUM_LIMIT = 0x100;
     25 /**
     26  * Initial and maximum char/UChar value set for a text segment.
     27  * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
     28  * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
     29  */
     30 const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
     31 /**
     32  * Maximum length of a text segment. Longer segments are split into shorter ones.
     33  */
     34 const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
     35 
     36 enum {
     37     APOS = 0x27,
     38     DIGIT_ZERO = 0x30,
     39     DIGIT_ONE = 0x31,
     40     DIGIT_NINE = 0x39,
     41     OPEN_BRACE = 0x7b,
     42     CLOSE_BRACE = 0x7d
     43 };
     44 
     45 inline UBool isInvalidArray(const void *array, int32_t length) {
     46    return (length < 0 || (array == NULL && length != 0));
     47 }
     48 
     49 }  // namespace
     50 
     51 SimplePatternFormatter &SimplePatternFormatter::operator=(
     52         const SimplePatternFormatter& other) {
     53     if (this == &other) {
     54         return *this;
     55     }
     56     compiledPattern = other.compiledPattern;
     57     return *this;
     58 }
     59 
     60 SimplePatternFormatter::~SimplePatternFormatter() {}
     61 
     62 UBool SimplePatternFormatter::compileMinMaxPlaceholders(
     63         const UnicodeString &pattern,
     64         int32_t min, int32_t max,
     65         UErrorCode &errorCode) {
     66     if (U_FAILURE(errorCode)) {
     67         return FALSE;
     68     }
     69     // Parse consistent with MessagePattern, but
     70     // - support only simple numbered arguments
     71     // - build a simple binary structure into the result string
     72     const UChar *patternBuffer = pattern.getBuffer();
     73     int32_t patternLength = pattern.length();
     74     // Reserve the first char for the number of arguments.
     75     compiledPattern.setTo((UChar)0);
     76     int32_t textLength = 0;
     77     int32_t maxArg = -1;
     78     UBool inQuote = FALSE;
     79     for (int32_t i = 0; i < patternLength;) {
     80         UChar c = patternBuffer[i++];
     81         if (c == APOS) {
     82             if (i < patternLength && (c = patternBuffer[i]) == APOS) {
     83                 // double apostrophe, skip the second one
     84                 ++i;
     85             } else if (inQuote) {
     86                 // skip the quote-ending apostrophe
     87                 inQuote = FALSE;
     88                 continue;
     89             } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
     90                 // Skip the quote-starting apostrophe, find the end of the quoted literal text.
     91                 ++i;
     92                 inQuote = TRUE;
     93             } else {
     94                 // The apostrophe is part of literal text.
     95                 c = APOS;
     96             }
     97         } else if (!inQuote && c == OPEN_BRACE) {
     98             if (textLength > 0) {
     99                 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
    100                                           (UChar)(ARG_NUM_LIMIT + textLength));
    101                 textLength = 0;
    102             }
    103             int32_t argNumber;
    104             if ((i + 1) < patternLength &&
    105                     0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
    106                     patternBuffer[i + 1] == CLOSE_BRACE) {
    107                 i += 2;
    108             } else {
    109                 // Multi-digit argument number (no leading zero) or syntax error.
    110                 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
    111                 // around the number, but this class does not.
    112                 argNumber = -1;
    113                 if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
    114                     argNumber = c - DIGIT_ZERO;
    115                     while (i < patternLength &&
    116                             DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
    117                         argNumber = argNumber * 10 + (c - DIGIT_ZERO);
    118                         if (argNumber >= ARG_NUM_LIMIT) {
    119                             break;
    120                         }
    121                     }
    122                 }
    123                 if (argNumber < 0 || c != CLOSE_BRACE) {
    124                     errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    125                     return FALSE;
    126                 }
    127             }
    128             if (argNumber > maxArg) {
    129                 maxArg = argNumber;
    130             }
    131             compiledPattern.append((UChar)argNumber);
    132             continue;
    133         }  // else: c is part of literal text
    134         // Append c and track the literal-text segment length.
    135         if (textLength == 0) {
    136             // Reserve a char for the length of a new text segment, preset the maximum length.
    137             compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
    138         }
    139         compiledPattern.append(c);
    140         if (++textLength == MAX_SEGMENT_LENGTH) {
    141             textLength = 0;
    142         }
    143     }
    144     if (textLength > 0) {
    145         compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
    146                                   (UChar)(ARG_NUM_LIMIT + textLength));
    147     }
    148     int32_t argCount = maxArg + 1;
    149     if (argCount < min || max < argCount) {
    150         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    151         return FALSE;
    152     }
    153     compiledPattern.setCharAt(0, (UChar)argCount);
    154     return TRUE;
    155 }
    156 
    157 UnicodeString& SimplePatternFormatter::format(
    158         const UnicodeString &value0,
    159         UnicodeString &appendTo, UErrorCode &errorCode) const {
    160     const UnicodeString *values[] = { &value0 };
    161     return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
    162 }
    163 
    164 UnicodeString& SimplePatternFormatter::format(
    165         const UnicodeString &value0,
    166         const UnicodeString &value1,
    167         UnicodeString &appendTo, UErrorCode &errorCode) const {
    168     const UnicodeString *values[] = { &value0, &value1 };
    169     return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
    170 }
    171 
    172 UnicodeString& SimplePatternFormatter::format(
    173         const UnicodeString &value0,
    174         const UnicodeString &value1,
    175         const UnicodeString &value2,
    176         UnicodeString &appendTo, UErrorCode &errorCode) const {
    177     const UnicodeString *values[] = { &value0, &value1, &value2 };
    178     return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
    179 }
    180 
    181 UnicodeString& SimplePatternFormatter::formatAndAppend(
    182         const UnicodeString *const *values, int32_t valuesLength,
    183         UnicodeString &appendTo,
    184         int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
    185     if (U_FAILURE(errorCode)) {
    186         return appendTo;
    187     }
    188     if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
    189             valuesLength < getPlaceholderCount()) {
    190         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    191         return appendTo;
    192     }
    193     return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
    194                   appendTo, NULL, TRUE,
    195                   offsets, offsetsLength, errorCode);
    196 }
    197 
    198 UnicodeString &SimplePatternFormatter::formatAndReplace(
    199         const UnicodeString *const *values, int32_t valuesLength,
    200         UnicodeString &result,
    201         int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
    202     if (U_FAILURE(errorCode)) {
    203         return result;
    204     }
    205     if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
    206         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    207         return result;
    208     }
    209     const UChar *cp = compiledPattern.getBuffer();
    210     int32_t cpLength = compiledPattern.length();
    211     if (valuesLength < getPlaceholderCount(cp, cpLength)) {
    212         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    213         return result;
    214     }
    215 
    216     // If the pattern starts with an argument whose value is the same object
    217     // as the result, then we keep the result contents and append to it.
    218     // Otherwise we replace its contents.
    219     int32_t firstArg = -1;
    220     // If any non-initial argument value is the same object as the result,
    221     // then we first copy its contents and use that instead while formatting.
    222     UnicodeString resultCopy;
    223     if (getPlaceholderCount(cp, cpLength) > 0) {
    224         for (int32_t i = 1; i < cpLength;) {
    225             int32_t n = cp[i++];
    226             if (n < ARG_NUM_LIMIT) {
    227                 if (values[n] == &result) {
    228                     if (i == 2) {
    229                         firstArg = n;
    230                     } else if (resultCopy.isEmpty() && !result.isEmpty()) {
    231                         resultCopy = result;
    232                     }
    233                 }
    234             } else {
    235                 i += n - ARG_NUM_LIMIT;
    236             }
    237         }
    238     }
    239     if (firstArg < 0) {
    240         result.remove();
    241     }
    242     return format(cp, cpLength, values,
    243                   result, &resultCopy, FALSE,
    244                   offsets, offsetsLength, errorCode);
    245 }
    246 
    247 UnicodeString SimplePatternFormatter::getTextWithNoPlaceholders(
    248         const UChar *compiledPattern, int32_t compiledPatternLength) {
    249     int32_t capacity = compiledPatternLength - 1 -
    250             getPlaceholderCount(compiledPattern, compiledPatternLength);
    251     UnicodeString sb(capacity, 0, 0);  // Java: StringBuilder
    252     for (int32_t i = 1; i < compiledPatternLength;) {
    253         int32_t segmentLength = compiledPattern[i++] - ARG_NUM_LIMIT;
    254         if (segmentLength > 0) {
    255             sb.append(compiledPattern + i, segmentLength);
    256             i += segmentLength;
    257         }
    258     }
    259     return sb;
    260 }
    261 
    262 UnicodeString &SimplePatternFormatter::format(
    263         const UChar *compiledPattern, int32_t compiledPatternLength,
    264         const UnicodeString *const *values,
    265         UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
    266         int32_t *offsets, int32_t offsetsLength,
    267         UErrorCode &errorCode) {
    268     if (U_FAILURE(errorCode)) {
    269         return result;
    270     }
    271     for (int32_t i = 0; i < offsetsLength; i++) {
    272         offsets[i] = -1;
    273     }
    274     for (int32_t i = 1; i < compiledPatternLength;) {
    275         int32_t n = compiledPattern[i++];
    276         if (n < ARG_NUM_LIMIT) {
    277             const UnicodeString *value = values[n];
    278             if (value == NULL) {
    279                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    280                 return result;
    281             }
    282             if (value == &result) {
    283                 if (forbidResultAsValue) {
    284                     errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    285                     return result;
    286                 }
    287                 if (i == 2) {
    288                     // We are appending to result which is also the first value object.
    289                     if (n < offsetsLength) {
    290                         offsets[n] = 0;
    291                     }
    292                 } else {
    293                     if (n < offsetsLength) {
    294                         offsets[n] = result.length();
    295                     }
    296                     result.append(*resultCopy);
    297                 }
    298             } else {
    299                 if (n < offsetsLength) {
    300                     offsets[n] = result.length();
    301                 }
    302                 result.append(*value);
    303             }
    304         } else {
    305             int32_t length = n - ARG_NUM_LIMIT;
    306             result.append(compiledPattern + i, length);
    307             i += length;
    308         }
    309     }
    310     return result;
    311 }
    312 
    313 U_NAMESPACE_END
    314