1 /* 2 * Copyright (C) 2009 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.escape; 18 19 import static com.google.common.base.Preconditions.checkNotNull; 20 21 import com.google.common.annotations.Beta; 22 import com.google.common.annotations.GwtCompatible; 23 24 import java.util.Map; 25 26 /** 27 * A {@link CharEscaper} that uses an array to quickly look up replacement 28 * characters for a given {@code char} value. An additional safe range is 29 * provided that determines whether {@code char} values without specific 30 * replacements are to be considered safe and left unescaped or should be 31 * escaped in a general way. 32 * 33 * <p>A good example of usage of this class is for Java source code escaping 34 * where the replacement array contains information about special ASCII 35 * characters such as {@code \\t} and {@code \\n} while {@link #escapeUnsafe} 36 * is overridden to handle general escaping of the form {@code \\uxxxx}. 37 * 38 * <p>The size of the data structure used by {@link ArrayBasedCharEscaper} is 39 * proportional to the highest valued character that requires escaping. 40 * For example a replacement map containing the single character 41 * '{@code \}{@code u1000}' will require approximately 16K of memory. If you 42 * need to create multiple escaper instances that have the same character 43 * replacement mapping consider using {@link ArrayBasedEscaperMap}. 44 * 45 * @author Sven Mawson 46 * @author David Beaumont 47 * @since 15.0 48 */ 49 @Beta 50 @GwtCompatible 51 public abstract class ArrayBasedCharEscaper extends CharEscaper { 52 // The replacement array (see ArrayBasedEscaperMap). 53 private final char[][] replacements; 54 // The number of elements in the replacement array. 55 private final int replacementsLength; 56 // The first character in the safe range. 57 private final char safeMin; 58 // The last character in the safe range. 59 private final char safeMax; 60 61 /** 62 * Creates a new ArrayBasedCharEscaper instance with the given replacement map 63 * and specified safe range. If {@code safeMax < safeMin} then no characters 64 * are considered safe. 65 * 66 * <p>If a character has no mapped replacement then it is checked against the 67 * safe range. If it lies outside that, then {@link #escapeUnsafe} is 68 * called, otherwise no escaping is performed. 69 * 70 * @param replacementMap a map of characters to their escaped representations 71 * @param safeMin the lowest character value in the safe range 72 * @param safeMax the highest character value in the safe range 73 */ 74 protected ArrayBasedCharEscaper(Map<Character, String> replacementMap, 75 char safeMin, char safeMax) { 76 77 this(ArrayBasedEscaperMap.create(replacementMap), safeMin, safeMax); 78 } 79 80 /** 81 * Creates a new ArrayBasedCharEscaper instance with the given replacement map 82 * and specified safe range. If {@code safeMax < safeMin} then no characters 83 * are considered safe. This initializer is useful when explicit instances of 84 * ArrayBasedEscaperMap are used to allow the sharing of large replacement 85 * mappings. 86 * 87 * <p>If a character has no mapped replacement then it is checked against the 88 * safe range. If it lies outside that, then {@link #escapeUnsafe} is 89 * called, otherwise no escaping is performed. 90 * 91 * @param escaperMap the mapping of characters to be escaped 92 * @param safeMin the lowest character value in the safe range 93 * @param safeMax the highest character value in the safe range 94 */ 95 protected ArrayBasedCharEscaper(ArrayBasedEscaperMap escaperMap, 96 char safeMin, char safeMax) { 97 98 checkNotNull(escaperMap); // GWT specific check (do not optimize) 99 this.replacements = escaperMap.getReplacementArray(); 100 this.replacementsLength = replacements.length; 101 if (safeMax < safeMin) { 102 // If the safe range is empty, set the range limits to opposite extremes 103 // to ensure the first test of either value will (almost certainly) fail. 104 safeMax = Character.MIN_VALUE; 105 safeMin = Character.MAX_VALUE; 106 } 107 this.safeMin = safeMin; 108 this.safeMax = safeMax; 109 } 110 111 /* 112 * This is overridden to improve performance. Rough benchmarking shows that 113 * this almost doubles the speed when processing strings that do not require 114 * any escaping. 115 */ 116 @Override 117 public final String escape(String s) { 118 checkNotNull(s); // GWT specific check (do not optimize). 119 for (int i = 0; i < s.length(); i++) { 120 char c = s.charAt(i); 121 if ((c < replacementsLength && replacements[c] != null) || 122 c > safeMax || c < safeMin) { 123 return escapeSlow(s, i); 124 } 125 } 126 return s; 127 } 128 129 /** 130 * Escapes a single character using the replacement array and safe range 131 * values. If the given character does not have an explicit replacement and 132 * lies outside the safe range then {@link #escapeUnsafe} is called. 133 */ 134 @Override protected final char[] escape(char c) { 135 if (c < replacementsLength) { 136 char[] chars = replacements[c]; 137 if (chars != null) { 138 return chars; 139 } 140 } 141 if (c >= safeMin && c <= safeMax) { 142 return null; 143 } 144 return escapeUnsafe(c); 145 } 146 147 /** 148 * Escapes a {@code char} value that has no direct explicit value in the 149 * replacement array and lies outside the stated safe range. Subclasses should 150 * override this method to provide generalized escaping for characters. 151 * 152 * <p>Note that arrays returned by this method must not be modified once they 153 * have been returned. However it is acceptable to return the same array 154 * multiple times (even for different input characters). 155 * 156 * @param c the character to escape 157 * @return the replacement characters, or {@code null} if no escaping was 158 * required 159 */ 160 // TODO(user,cpovirk): Rename this something better once refactoring done 161 protected abstract char[] escapeUnsafe(char c); 162 } 163