Home | History | Annotate | Download | only in escape
      1 /*
      2  * Copyright (C) 2009 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.escape;
     18 
     19 import static com.google.common.base.Preconditions.checkNotNull;
     20 
     21 import com.google.common.annotations.Beta;
     22 import com.google.common.annotations.GwtCompatible;
     23 
     24 import java.util.Map;
     25 
     26 /**
     27  * A {@link CharEscaper} that uses an array to quickly look up replacement
     28  * characters for a given {@code char} value. An additional safe range is
     29  * provided that determines whether {@code char} values without specific
     30  * replacements are to be considered safe and left unescaped or should be
     31  * escaped in a general way.
     32  *
     33  * <p>A good example of usage of this class is for Java source code escaping
     34  * where the replacement array contains information about special ASCII
     35  * characters such as {@code \\t} and {@code \\n} while {@link #escapeUnsafe}
     36  * is overridden to handle general escaping of the form {@code \\uxxxx}.
     37  *
     38  * <p>The size of the data structure used by {@link ArrayBasedCharEscaper} is
     39  * proportional to the highest valued character that requires escaping.
     40  * For example a replacement map containing the single character
     41  * '{@code \}{@code u1000}' will require approximately 16K of memory. If you
     42  * need to create multiple escaper instances that have the same character
     43  * replacement mapping consider using {@link ArrayBasedEscaperMap}.
     44  *
     45  * @author Sven Mawson
     46  * @author David Beaumont
     47  * @since 15.0
     48  */
     49 @Beta
     50 @GwtCompatible
     51 public abstract class ArrayBasedCharEscaper extends CharEscaper {
     52   // The replacement array (see ArrayBasedEscaperMap).
     53   private final char[][] replacements;
     54   // The number of elements in the replacement array.
     55   private final int replacementsLength;
     56   // The first character in the safe range.
     57   private final char safeMin;
     58   // The last character in the safe range.
     59   private final char safeMax;
     60 
     61   /**
     62    * Creates a new ArrayBasedCharEscaper instance with the given replacement map
     63    * and specified safe range. If {@code safeMax < safeMin} then no characters
     64    * are considered safe.
     65    *
     66    * <p>If a character has no mapped replacement then it is checked against the
     67    * safe range. If it lies outside that, then {@link #escapeUnsafe} is
     68    * called, otherwise no escaping is performed.
     69    *
     70    * @param replacementMap a map of characters to their escaped representations
     71    * @param safeMin the lowest character value in the safe range
     72    * @param safeMax the highest character value in the safe range
     73    */
     74   protected ArrayBasedCharEscaper(Map<Character, String> replacementMap,
     75       char safeMin, char safeMax) {
     76 
     77     this(ArrayBasedEscaperMap.create(replacementMap), safeMin, safeMax);
     78   }
     79 
     80   /**
     81    * Creates a new ArrayBasedCharEscaper instance with the given replacement map
     82    * and specified safe range. If {@code safeMax < safeMin} then no characters
     83    * are considered safe. This initializer is useful when explicit instances of
     84    * ArrayBasedEscaperMap are used to allow the sharing of large replacement
     85    * mappings.
     86    *
     87    * <p>If a character has no mapped replacement then it is checked against the
     88    * safe range. If it lies outside that, then {@link #escapeUnsafe} is
     89    * called, otherwise no escaping is performed.
     90    *
     91    * @param escaperMap the mapping of characters to be escaped
     92    * @param safeMin the lowest character value in the safe range
     93    * @param safeMax the highest character value in the safe range
     94    */
     95   protected ArrayBasedCharEscaper(ArrayBasedEscaperMap escaperMap,
     96       char safeMin, char safeMax) {
     97 
     98     checkNotNull(escaperMap);  // GWT specific check (do not optimize)
     99     this.replacements = escaperMap.getReplacementArray();
    100     this.replacementsLength = replacements.length;
    101     if (safeMax < safeMin) {
    102       // If the safe range is empty, set the range limits to opposite extremes
    103       // to ensure the first test of either value will (almost certainly) fail.
    104       safeMax = Character.MIN_VALUE;
    105       safeMin = Character.MAX_VALUE;
    106     }
    107     this.safeMin = safeMin;
    108     this.safeMax = safeMax;
    109   }
    110 
    111   /*
    112    * This is overridden to improve performance. Rough benchmarking shows that
    113    * this almost doubles the speed when processing strings that do not require
    114    * any escaping.
    115    */
    116   @Override
    117   public final String escape(String s) {
    118     checkNotNull(s);  // GWT specific check (do not optimize).
    119     for (int i = 0; i < s.length(); i++) {
    120       char c = s.charAt(i);
    121       if ((c < replacementsLength && replacements[c] != null) ||
    122           c > safeMax || c < safeMin) {
    123         return escapeSlow(s, i);
    124       }
    125     }
    126     return s;
    127   }
    128 
    129   /**
    130    * Escapes a single character using the replacement array and safe range
    131    * values. If the given character does not have an explicit replacement and
    132    * lies outside the safe range then {@link #escapeUnsafe} is called.
    133    */
    134   @Override protected final char[] escape(char c) {
    135     if (c < replacementsLength) {
    136       char[] chars = replacements[c];
    137       if (chars != null) {
    138         return chars;
    139       }
    140     }
    141     if (c >= safeMin && c <= safeMax) {
    142       return null;
    143     }
    144     return escapeUnsafe(c);
    145   }
    146 
    147   /**
    148    * Escapes a {@code char} value that has no direct explicit value in the
    149    * replacement array and lies outside the stated safe range. Subclasses should
    150    * override this method to provide generalized escaping for characters.
    151    *
    152    * <p>Note that arrays returned by this method must not be modified once they
    153    * have been returned. However it is acceptable to return the same array
    154    * multiple times (even for different input characters).
    155    *
    156    * @param c the character to escape
    157    * @return the replacement characters, or {@code null} if no escaping was
    158    *         required
    159    */
    160   // TODO(user,cpovirk): Rename this something better once refactoring done
    161   protected abstract char[] escapeUnsafe(char c);
    162 }
    163