Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2014-2016, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.impl;
     10 
     11 /**
     12  * Formats simple patterns like "{1} was born in {0}".
     13  * Internal version of {@link com.ibm.icu.text.SimpleFormatter}
     14  * with only static methods, to avoid wrapper objects.
     15  *
     16  * <p>This class "compiles" pattern strings into a binary format
     17  * and implements formatting etc. based on that.
     18  *
     19  * <p>Format:
     20  * Index 0: One more than the highest argument number.
     21  * Followed by zero or more arguments or literal-text segments.
     22  *
     23  * <p>An argument is stored as its number, less than ARG_NUM_LIMIT.
     24  * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
     25  * followed by that many chars.
     26  */
     27 public final class SimpleFormatterImpl {
     28     /**
     29      * Argument numbers must be smaller than this limit.
     30      * Text segment lengths are offset by this much.
     31      * This is currently the only unused char value in compiled patterns,
     32      * except it is the maximum value of the first unit (max arg +1).
     33      */
     34     private static final int ARG_NUM_LIMIT = 0x100;
     35     private static final char LEN1_CHAR = (char)(ARG_NUM_LIMIT + 1);
     36     private static final char LEN2_CHAR = (char)(ARG_NUM_LIMIT + 2);
     37     private static final char LEN3_CHAR = (char)(ARG_NUM_LIMIT + 3);
     38     /**
     39      * Initial and maximum char/UChar value set for a text segment.
     40      * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
     41      * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
     42      */
     43     private static final char SEGMENT_LENGTH_ARGUMENT_CHAR = (char)0xffff;
     44     /**
     45      * Maximum length of a text segment. Longer segments are split into shorter ones.
     46      */
     47     private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_ARGUMENT_CHAR - ARG_NUM_LIMIT;
     48 
     49     /** "Intern" some common patterns. */
     50     private static final String[][] COMMON_PATTERNS = {
     51         { "{0} {1}", "\u0002\u0000" + LEN1_CHAR + " \u0001" },
     52         { "{0} ({1})", "\u0002\u0000" + LEN2_CHAR + " (\u0001" + LEN1_CHAR + ')' },
     53         { "{0}, {1}", "\u0002\u0000" + LEN2_CHAR + ", \u0001" },
     54         { "{0}  {1}", "\u0002\u0000" + LEN3_CHAR + "  \u0001" },  // en dash
     55     };
     56 
     57     /** Use only static methods. */
     58     private SimpleFormatterImpl() {}
     59 
     60     /**
     61      * Creates a compiled form of the pattern string, for use with appropriate static methods.
     62      * The number of arguments checked against the given limits is the
     63      * highest argument number plus one, not the number of occurrences of arguments.
     64      *
     65      * @param pattern The pattern string.
     66      * @param min The pattern must have at least this many arguments.
     67      * @param max The pattern must have at most this many arguments.
     68      * @return The compiled-pattern string.
     69      * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
     70      */
     71     public static String compileToStringMinMaxArguments(
     72             CharSequence pattern, StringBuilder sb, int min, int max) {
     73         // Return some precompiled common two-argument patterns.
     74         if (min <= 2 && 2 <= max) {
     75             for (String[] pair : COMMON_PATTERNS) {
     76                 if (pair[0].contentEquals(pattern)) {
     77                     assert pair[1].charAt(0) == 2;
     78                     return pair[1];
     79                 }
     80             }
     81         }
     82         // Parse consistent with MessagePattern, but
     83         // - support only simple numbered arguments
     84         // - build a simple binary structure into the result string
     85         int patternLength = pattern.length();
     86         sb.ensureCapacity(patternLength);
     87         // Reserve the first char for the number of arguments.
     88         sb.setLength(1);
     89         int textLength = 0;
     90         int maxArg = -1;
     91         boolean inQuote = false;
     92         for (int i = 0; i < patternLength;) {
     93             char c = pattern.charAt(i++);
     94             if (c == '\'') {
     95                 if (i < patternLength && (c = pattern.charAt(i)) == '\'') {
     96                     // double apostrophe, skip the second one
     97                     ++i;
     98                 } else if (inQuote) {
     99                     // skip the quote-ending apostrophe
    100                     inQuote = false;
    101                     continue;
    102                 } else if (c == '{' || c == '}') {
    103                     // Skip the quote-starting apostrophe, find the end of the quoted literal text.
    104                     ++i;
    105                     inQuote = true;
    106                 } else {
    107                     // The apostrophe is part of literal text.
    108                     c = '\'';
    109                 }
    110             } else if (!inQuote && c == '{') {
    111                 if (textLength > 0) {
    112                     sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
    113                     textLength = 0;
    114                 }
    115                 int argNumber;
    116                 if ((i + 1) < patternLength &&
    117                         0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 &&
    118                         pattern.charAt(i + 1) == '}') {
    119                     i += 2;
    120                 } else {
    121                     // Multi-digit argument number (no leading zero) or syntax error.
    122                     // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
    123                     // around the number, but this class does not.
    124                     int argStart = i - 1;
    125                     argNumber = -1;
    126                     if (i < patternLength && '1' <= (c = pattern.charAt(i++)) && c <= '9') {
    127                         argNumber = c - '0';
    128                         while (i < patternLength && '0' <= (c = pattern.charAt(i++)) && c <= '9') {
    129                             argNumber = argNumber * 10 + (c - '0');
    130                             if (argNumber >= ARG_NUM_LIMIT) {
    131                                 break;
    132                             }
    133                         }
    134                     }
    135                     if (argNumber < 0 || c != '}') {
    136                         throw new IllegalArgumentException(
    137                                 "Argument syntax error in pattern \"" + pattern +
    138                                 "\" at index " + argStart +
    139                                 ": " + pattern.subSequence(argStart, i));
    140                     }
    141                 }
    142                 if (argNumber > maxArg) {
    143                     maxArg = argNumber;
    144                 }
    145                 sb.append((char)argNumber);
    146                 continue;
    147             }  // else: c is part of literal text
    148             // Append c and track the literal-text segment length.
    149             if (textLength == 0) {
    150                 // Reserve a char for the length of a new text segment, preset the maximum length.
    151                 sb.append(SEGMENT_LENGTH_ARGUMENT_CHAR);
    152             }
    153             sb.append(c);
    154             if (++textLength == MAX_SEGMENT_LENGTH) {
    155                 textLength = 0;
    156             }
    157         }
    158         if (textLength > 0) {
    159             sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
    160         }
    161         int argCount = maxArg + 1;
    162         if (argCount < min) {
    163             throw new IllegalArgumentException(
    164                     "Fewer than minimum " + min + " arguments in pattern \"" + pattern + "\"");
    165         }
    166         if (argCount > max) {
    167             throw new IllegalArgumentException(
    168                     "More than maximum " + max + " arguments in pattern \"" + pattern + "\"");
    169         }
    170         sb.setCharAt(0, (char)argCount);
    171         return sb.toString();
    172     }
    173 
    174     /**
    175      * @param compiledPattern Compiled form of a pattern string.
    176      * @return The max argument number + 1.
    177      */
    178     public static int getArgumentLimit(String compiledPattern) {
    179         return compiledPattern.charAt(0);
    180     }
    181 
    182     /**
    183      * Formats the given values.
    184      *
    185      * @param compiledPattern Compiled form of a pattern string.
    186      */
    187     public static String formatCompiledPattern(String compiledPattern, CharSequence... values) {
    188         return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString();
    189     }
    190 
    191     /**
    192      * Formats the not-compiled pattern with the given values.
    193      * Equivalent to compileToStringMinMaxArguments() followed by formatCompiledPattern().
    194      * The number of arguments checked against the given limits is the
    195      * highest argument number plus one, not the number of occurrences of arguments.
    196      *
    197      * @param pattern Not-compiled form of a pattern string.
    198      * @param min The pattern must have at least this many arguments.
    199      * @param max The pattern must have at most this many arguments.
    200      * @return The compiled-pattern string.
    201      * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
    202      */
    203     public static String formatRawPattern(String pattern, int min, int max, CharSequence... values) {
    204         StringBuilder sb = new StringBuilder();
    205         String compiledPattern = compileToStringMinMaxArguments(pattern, sb, min, max);
    206         sb.setLength(0);
    207         return formatAndAppend(compiledPattern, sb, null, values).toString();
    208     }
    209 
    210     /**
    211      * Formats the given values, appending to the appendTo builder.
    212      *
    213      * @param compiledPattern Compiled form of a pattern string.
    214      * @param appendTo Gets the formatted pattern and values appended.
    215      * @param offsets offsets[i] receives the offset of where
    216      *                values[i] replaced pattern argument {i}.
    217      *                Can be null, or can be shorter or longer than values.
    218      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
    219      * @param values The argument values.
    220      *               An argument value must not be the same object as appendTo.
    221      *               values.length must be at least getArgumentLimit().
    222      *               Can be null if getArgumentLimit()==0.
    223      * @return appendTo
    224      */
    225     public static StringBuilder formatAndAppend(
    226             String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) {
    227         int valuesLength = values != null ? values.length : 0;
    228         if (valuesLength < getArgumentLimit(compiledPattern)) {
    229             throw new IllegalArgumentException("Too few values.");
    230         }
    231         return format(compiledPattern, values, appendTo, null, true, offsets);
    232     }
    233 
    234     /**
    235      * Formats the given values, replacing the contents of the result builder.
    236      * May optimize by actually appending to the result if it is the same object
    237      * as the value corresponding to the initial argument in the pattern.
    238      *
    239      * @param compiledPattern Compiled form of a pattern string.
    240      * @param result Gets its contents replaced by the formatted pattern and values.
    241      * @param offsets offsets[i] receives the offset of where
    242      *                values[i] replaced pattern argument {i}.
    243      *                Can be null, or can be shorter or longer than values.
    244      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
    245      * @param values The argument values.
    246      *               An argument value may be the same object as result.
    247      *               values.length must be at least getArgumentLimit().
    248      * @return result
    249      */
    250     public static StringBuilder formatAndReplace(
    251             String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) {
    252         int valuesLength = values != null ? values.length : 0;
    253         if (valuesLength < getArgumentLimit(compiledPattern)) {
    254             throw new IllegalArgumentException("Too few values.");
    255         }
    256 
    257         // If the pattern starts with an argument whose value is the same object
    258         // as the result, then we keep the result contents and append to it.
    259         // Otherwise we replace its contents.
    260         int firstArg = -1;
    261         // If any non-initial argument value is the same object as the result,
    262         // then we first copy its contents and use that instead while formatting.
    263         String resultCopy = null;
    264         if (getArgumentLimit(compiledPattern) > 0) {
    265             for (int i = 1; i < compiledPattern.length();) {
    266                 int n = compiledPattern.charAt(i++);
    267                 if (n < ARG_NUM_LIMIT) {
    268                     if (values[n] == result) {
    269                         if (i == 2) {
    270                             firstArg = n;
    271                         } else if (resultCopy == null) {
    272                             resultCopy = result.toString();
    273                         }
    274                     }
    275                 } else {
    276                     i += n - ARG_NUM_LIMIT;
    277                 }
    278             }
    279         }
    280         if (firstArg < 0) {
    281             result.setLength(0);
    282         }
    283         return format(compiledPattern, values, result, resultCopy, false, offsets);
    284     }
    285 
    286     /**
    287      * Returns the pattern text with none of the arguments.
    288      * Like formatting with all-empty string values.
    289      *
    290      * @param compiledPattern Compiled form of a pattern string.
    291      */
    292     public static String getTextWithNoArguments(String compiledPattern) {
    293         int capacity = compiledPattern.length() - 1 - getArgumentLimit(compiledPattern);
    294         StringBuilder sb = new StringBuilder(capacity);
    295         for (int i = 1; i < compiledPattern.length();) {
    296             int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT;
    297             if (segmentLength > 0) {
    298                 int limit = i + segmentLength;
    299                 sb.append(compiledPattern, i, limit);
    300                 i = limit;
    301             }
    302         }
    303         return sb.toString();
    304     }
    305 
    306     private static StringBuilder format(
    307             String compiledPattern, CharSequence[] values,
    308             StringBuilder result, String resultCopy, boolean forbidResultAsValue,
    309             int[] offsets) {
    310         int offsetsLength;
    311         if (offsets == null) {
    312             offsetsLength = 0;
    313         } else {
    314             offsetsLength = offsets.length;
    315             for (int i = 0; i < offsetsLength; i++) {
    316                 offsets[i] = -1;
    317             }
    318         }
    319         for (int i = 1; i < compiledPattern.length();) {
    320             int n = compiledPattern.charAt(i++);
    321             if (n < ARG_NUM_LIMIT) {
    322                 CharSequence value = values[n];
    323                 if (value == result) {
    324                     if (forbidResultAsValue) {
    325                         throw new IllegalArgumentException("Value must not be same object as result");
    326                     }
    327                     if (i == 2) {
    328                         // We are appending to result which is also the first value object.
    329                         if (n < offsetsLength) {
    330                             offsets[n] = 0;
    331                         }
    332                     } else {
    333                         if (n < offsetsLength) {
    334                             offsets[n] = result.length();
    335                         }
    336                         result.append(resultCopy);
    337                     }
    338                 } else {
    339                     if (n < offsetsLength) {
    340                         offsets[n] = result.length();
    341                     }
    342                     result.append(value);
    343                 }
    344             } else {
    345                 int limit = i + (n - ARG_NUM_LIMIT);
    346                 result.append(compiledPattern, i, limit);
    347                 i = limit;
    348             }
    349         }
    350         return result;
    351     }
    352 }
    353