1 /** 2 * Copyright (c) 2006, Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.android.mail.common.base; 18 19 import static com.google.android.mail.common.base.Preconditions.checkNotNull; 20 21 import java.io.IOException; 22 23 /** 24 * An object that converts literal text into a format safe for inclusion in a particular context 25 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the 26 * text is performed automatically by the relevant parser. 27 * 28 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code 29 * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the 30 * resulting XML document is parsed, the parser API will return this text as the original literal 31 * string {@code "Foo<Bar>"}. 32 * 33 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by 34 * multiple threads. 35 * 36 * <p>Several popular escapers are defined as constants in the class {@link CharEscapers}. To create 37 * your own escapers, use {@link CharEscaperBuilder}, or extend this class and implement the {@link 38 * #escape(char)} method. 39 * 40 * @author sven (at) google.com (Sven Mawson) 41 */ 42 public abstract class CharEscaper extends Escaper { 43 /** 44 * Returns the escaped form of a given literal string. 45 * 46 * @param string the literal string to be escaped 47 * @return the escaped form of {@code string} 48 * @throws NullPointerException if {@code string} is null 49 */ 50 @Override public String escape(String string) { 51 checkNotNull(string); 52 // Inlineable fast-path loop which hands off to escapeSlow() only if needed 53 int length = string.length(); 54 for (int index = 0; index < length; index++) { 55 if (escape(string.charAt(index)) != null) { 56 return escapeSlow(string, index); 57 } 58 } 59 return string; 60 } 61 62 /** 63 * Returns an {@code Appendable} instance which automatically escapes all text appended to it 64 * before passing the resulting text to an underlying {@code Appendable}. 65 * 66 * <p>The methods of the returned object will propagate any exceptions thrown by the underlying 67 * {@code Appendable}, and will throw {@link NullPointerException} if asked to append {@code 68 * null}, but do not otherwise throw any exceptions. 69 * 70 * <p>The escaping behavior is identical to that of {@link #escape(String)}, so the following code 71 * is always equivalent to {@code escaper.escape(string)}: <pre> {@code 72 * 73 * StringBuilder sb = new StringBuilder(); 74 * escaper.escape(sb).append(string); 75 * return sb.toString();}</pre> 76 * 77 * @param out the underlying {@code Appendable} to append escaped output to 78 * @return an {@code Appendable} which passes text to {@code out} after escaping it 79 * @throws NullPointerException if {@code out} is null. 80 */ 81 @Override public Appendable escape(final Appendable out) { 82 checkNotNull(out); 83 84 return new Appendable() { 85 @Override public Appendable append(CharSequence csq) throws IOException { 86 out.append(escape(csq.toString())); 87 return this; 88 } 89 90 @Override public Appendable append(CharSequence csq, int start, int end) throws IOException { 91 out.append(escape(csq.subSequence(start, end).toString())); 92 return this; 93 } 94 95 @Override public Appendable append(char c) throws IOException { 96 char[] escaped = escape(c); 97 if (escaped == null) { 98 out.append(c); 99 } else { 100 for (char e : escaped) { 101 out.append(e); 102 } 103 } 104 return this; 105 } 106 }; 107 } 108 109 /** 110 * Returns the escaped form of a given literal string, starting at the given index. This method is 111 * called by the {@link #escape(String)} method when it discovers that escaping is required. It is 112 * protected to allow subclasses to override the fastpath escaping function to inline their 113 * escaping test. See {@link CharEscaperBuilder} for an example usage. 114 * 115 * @param s the literal string to be escaped 116 * @param index the index to start escaping from 117 * @return the escaped form of {@code string} 118 * @throws NullPointerException if {@code string} is null 119 */ 120 protected String escapeSlow(String s, int index) { 121 int slen = s.length(); 122 123 // Get a destination buffer and setup some loop variables. 124 char[] dest = Platform.charBufferFromThreadLocal(); 125 int destSize = dest.length; 126 int destIndex = 0; 127 int lastEscape = 0; 128 129 // Loop through the rest of the string, replacing when needed into the 130 // destination buffer, which gets grown as needed as well. 131 for (; index < slen; index++) { 132 133 // Get a replacement for the current character. 134 char[] r = escape(s.charAt(index)); 135 136 // If no replacement is needed, just continue. 137 if (r == null) continue; 138 139 int rlen = r.length; 140 int charsSkipped = index - lastEscape; 141 142 // This is the size needed to add the replacement, not the full size needed by the string. We 143 // only regrow when we absolutely must. 144 int sizeNeeded = destIndex + charsSkipped + rlen; 145 if (destSize < sizeNeeded) { 146 destSize = sizeNeeded + (slen - index) + DEST_PAD; 147 dest = growBuffer(dest, destIndex, destSize); 148 } 149 150 // If we have skipped any characters, we need to copy them now. 151 if (charsSkipped > 0) { 152 s.getChars(lastEscape, index, dest, destIndex); 153 destIndex += charsSkipped; 154 } 155 156 // Copy the replacement string into the dest buffer as needed. 157 if (rlen > 0) { 158 System.arraycopy(r, 0, dest, destIndex, rlen); 159 destIndex += rlen; 160 } 161 lastEscape = index + 1; 162 } 163 164 // Copy leftover characters if there are any. 165 int charsLeft = slen - lastEscape; 166 if (charsLeft > 0) { 167 int sizeNeeded = destIndex + charsLeft; 168 if (destSize < sizeNeeded) { 169 170 // Regrow and copy, expensive! No padding as this is the final copy. 171 dest = growBuffer(dest, destIndex, sizeNeeded); 172 } 173 s.getChars(lastEscape, slen, dest, destIndex); 174 destIndex = sizeNeeded; 175 } 176 return new String(dest, 0, destIndex); 177 } 178 179 /** 180 * Returns the escaped form of the given character, or {@code null} if this character does not 181 * need to be escaped. If an empty array is returned, this effectively strips the input character 182 * from the resulting text. 183 * 184 * <p>If the character does not need to be escaped, this method should return {@code null}, rather 185 * than a one-character array containing the character itself. This enables the escaping algorithm 186 * to perform more efficiently. 187 * 188 * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should 189 * not throw any exceptions. 190 * 191 * @param c the character to escape if necessary 192 * @return the replacement characters, or {@code null} if no escaping was needed 193 */ 194 protected abstract char[] escape(char c); 195 196 /** 197 * Helper method to grow the character buffer as needed, this only happens once in a while so it's 198 * ok if it's in a method call. If the index passed in is 0 then no copying will be done. 199 */ 200 private static char[] growBuffer(char[] dest, int index, int size) { 201 char[] copy = new char[size]; 202 if (index > 0) { 203 System.arraycopy(dest, 0, copy, 0, index); 204 } 205 return copy; 206 } 207 208 /** 209 * The amount of padding to use when growing the escape buffer. 210 */ 211 private static final int DEST_PAD = 32; 212 }