Home | History | Annotate | Download | only in base
      1 /**
      2  * Copyright (c) 2006, Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.android.mail.common.base;
     18 
     19 import static com.google.android.mail.common.base.Preconditions.checkNotNull;
     20 
     21 import java.io.IOException;
     22 
     23 /**
     24  * An object that converts literal text into a format safe for inclusion in a particular context
     25  * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
     26  * text is performed automatically by the relevant parser.
     27  *
     28  * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
     29  * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
     30  * resulting XML document is parsed, the parser API will return this text as the original literal
     31  * string {@code "Foo<Bar>"}.
     32  *
     33  * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by
     34  * multiple threads.
     35  *
     36  * <p>Several popular escapers are defined as constants in the class {@link CharEscapers}. To create
     37  * your own escapers, use {@link CharEscaperBuilder}, or extend this class and implement the {@link
     38  * #escape(char)} method.
     39  *
     40  * @author sven (at) google.com (Sven Mawson)
     41  */
     42 public abstract class CharEscaper extends Escaper {
     43   /**
     44    * Returns the escaped form of a given literal string.
     45    *
     46    * @param string the literal string to be escaped
     47    * @return the escaped form of {@code string}
     48    * @throws NullPointerException if {@code string} is null
     49    */
     50   @Override public String escape(String string) {
     51     checkNotNull(string);
     52     // Inlineable fast-path loop which hands off to escapeSlow() only if needed
     53     int length = string.length();
     54     for (int index = 0; index < length; index++) {
     55       if (escape(string.charAt(index)) != null) {
     56         return escapeSlow(string, index);
     57       }
     58     }
     59     return string;
     60   }
     61 
     62   /**
     63    * Returns an {@code Appendable} instance which automatically escapes all text appended to it
     64    * before passing the resulting text to an underlying {@code Appendable}.
     65    *
     66    * <p>The methods of the returned object will propagate any exceptions thrown by the underlying
     67    * {@code Appendable}, and will throw {@link NullPointerException} if asked to append {@code
     68    * null}, but do not otherwise throw any exceptions.
     69    *
     70    * <p>The escaping behavior is identical to that of {@link #escape(String)}, so the following code
     71    * is always equivalent to {@code escaper.escape(string)}: <pre>   {@code
     72    *
     73    *   StringBuilder sb = new StringBuilder();
     74    *   escaper.escape(sb).append(string);
     75    *   return sb.toString();}</pre>
     76    *
     77    * @param out the underlying {@code Appendable} to append escaped output to
     78    * @return an {@code Appendable} which passes text to {@code out} after escaping it
     79    * @throws NullPointerException if {@code out} is null.
     80    */
     81   @Override public Appendable escape(final Appendable out) {
     82     checkNotNull(out);
     83 
     84     return new Appendable() {
     85       @Override public Appendable append(CharSequence csq) throws IOException {
     86         out.append(escape(csq.toString()));
     87         return this;
     88       }
     89 
     90       @Override public Appendable append(CharSequence csq, int start, int end) throws IOException {
     91         out.append(escape(csq.subSequence(start, end).toString()));
     92         return this;
     93       }
     94 
     95       @Override public Appendable append(char c) throws IOException {
     96         char[] escaped = escape(c);
     97         if (escaped == null) {
     98           out.append(c);
     99         } else {
    100           for (char e : escaped) {
    101             out.append(e);
    102           }
    103         }
    104         return this;
    105       }
    106     };
    107   }
    108 
    109   /**
    110    * Returns the escaped form of a given literal string, starting at the given index. This method is
    111    * called by the {@link #escape(String)} method when it discovers that escaping is required. It is
    112    * protected to allow subclasses to override the fastpath escaping function to inline their
    113    * escaping test. See {@link CharEscaperBuilder} for an example usage.
    114    *
    115    * @param s the literal string to be escaped
    116    * @param index the index to start escaping from
    117    * @return the escaped form of {@code string}
    118    * @throws NullPointerException if {@code string} is null
    119    */
    120   protected String escapeSlow(String s, int index) {
    121     int slen = s.length();
    122 
    123     // Get a destination buffer and setup some loop variables.
    124     char[] dest = Platform.charBufferFromThreadLocal();
    125     int destSize = dest.length;
    126     int destIndex = 0;
    127     int lastEscape = 0;
    128 
    129     // Loop through the rest of the string, replacing when needed into the
    130     // destination buffer, which gets grown as needed as well.
    131     for (; index < slen; index++) {
    132 
    133       // Get a replacement for the current character.
    134       char[] r = escape(s.charAt(index));
    135 
    136       // If no replacement is needed, just continue.
    137       if (r == null) continue;
    138 
    139       int rlen = r.length;
    140       int charsSkipped = index - lastEscape;
    141 
    142       // This is the size needed to add the replacement, not the full size needed by the string. We
    143       // only regrow when we absolutely must.
    144       int sizeNeeded = destIndex + charsSkipped + rlen;
    145       if (destSize < sizeNeeded) {
    146         destSize = sizeNeeded + (slen - index) + DEST_PAD;
    147         dest = growBuffer(dest, destIndex, destSize);
    148       }
    149 
    150       // If we have skipped any characters, we need to copy them now.
    151       if (charsSkipped > 0) {
    152         s.getChars(lastEscape, index, dest, destIndex);
    153         destIndex += charsSkipped;
    154       }
    155 
    156       // Copy the replacement string into the dest buffer as needed.
    157       if (rlen > 0) {
    158         System.arraycopy(r, 0, dest, destIndex, rlen);
    159         destIndex += rlen;
    160       }
    161       lastEscape = index + 1;
    162     }
    163 
    164     // Copy leftover characters if there are any.
    165     int charsLeft = slen - lastEscape;
    166     if (charsLeft > 0) {
    167       int sizeNeeded = destIndex + charsLeft;
    168       if (destSize < sizeNeeded) {
    169 
    170         // Regrow and copy, expensive! No padding as this is the final copy.
    171         dest = growBuffer(dest, destIndex, sizeNeeded);
    172       }
    173       s.getChars(lastEscape, slen, dest, destIndex);
    174       destIndex = sizeNeeded;
    175     }
    176     return new String(dest, 0, destIndex);
    177   }
    178 
    179   /**
    180    * Returns the escaped form of the given character, or {@code null} if this character does not
    181    * need to be escaped. If an empty array is returned, this effectively strips the input character
    182    * from the resulting text.
    183    *
    184    * <p>If the character does not need to be escaped, this method should return {@code null}, rather
    185    * than a one-character array containing the character itself. This enables the escaping algorithm
    186    * to perform more efficiently.
    187    *
    188    * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should
    189    * not throw any exceptions.
    190    *
    191    * @param c the character to escape if necessary
    192    * @return the replacement characters, or {@code null} if no escaping was needed
    193    */
    194   protected abstract char[] escape(char c);
    195 
    196   /**
    197    * Helper method to grow the character buffer as needed, this only happens once in a while so it's
    198    * ok if it's in a method call. If the index passed in is 0 then no copying will be done.
    199    */
    200   private static char[] growBuffer(char[] dest, int index, int size) {
    201     char[] copy = new char[size];
    202     if (index > 0) {
    203       System.arraycopy(dest, 0, copy, 0, index);
    204     }
    205     return copy;
    206   }
    207 
    208   /**
    209    * The amount of padding to use when growing the escape buffer.
    210    */
    211   private static final int DEST_PAD = 32;
    212 }