Home | History | Annotate | Download | only in base
      1 /**
      2  * Copyright (c) 2008, Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.android.mail.common.base;
     18 
     19 /**
     20  * An object that converts literal text into a format safe for inclusion in a particular context
     21  * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
     22  * text is performed automatically by the relevant parser.
     23  *
     24  * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
     25  * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
     26  * resulting XML document is parsed, the parser API will return this text as the original literal
     27  * string {@code "Foo<Bar>"}.
     28  *
     29  * <p>An {@code Escaper} instance is required to be stateless, and safe when used concurrently by
     30  * multiple threads.
     31  *
     32  * <p>The two primary implementations of this interface are {@link CharEscaper} and {@link
     33  * UnicodeEscaper}. They are heavily optimized for performance and greatly simplify the task of
     34  * implementing new escapers. It is strongly recommended that when implementing a new escaper you
     35  * extend one of these classes. If you find that you are unable to achieve the desired behavior
     36  * using either of these classes, please contact the Java libraries team for advice.
     37  *
     38  * <p>Several popular escapers are defined as constants in the class {@link CharEscapers}. To create
     39  * your own escapers, use {@link CharEscaperBuilder}, or extend {@link CharEscaper} or {@code
     40  * UnicodeEscaper}.
     41  *
     42  * @author dbeaumont (at) google.com (David Beaumont)
     43  */
     44 public abstract class Escaper {
     45   /**
     46    * Returns the escaped form of a given literal string.
     47    *
     48    * <p>Note that this method may treat input characters differently depending on the specific
     49    * escaper implementation.
     50    *
     51    * <ul>
     52    * <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a>
     53    *     correctly, including surrogate character pairs. If the input is badly formed the escaper
     54    *     should throw {@link IllegalArgumentException}.
     55    * <li>{@link CharEscaper} handles Java characters independently and does not verify the input
     56    *     for well formed characters. A CharEscaper should not be used in situations where input is
     57    *     not guaranteed to be restricted to the Basic Multilingual Plane (BMP).
     58    * </ul>
     59    *
     60    * @param string the literal string to be escaped
     61    * @return the escaped form of {@code string}
     62    * @throws NullPointerException if {@code string} is null
     63    * @throws IllegalArgumentException if {@code string} contains badly formed UTF-16 or cannot be
     64    *         escaped for any other reason
     65    */
     66   public abstract String escape(String string);
     67 
     68   /**
     69    * Returns an {@code Appendable} instance which automatically escapes all text appended to it
     70    * before passing the resulting text to an underlying {@code Appendable}.
     71    *
     72    * <p>Note that the Appendable returned by this method may treat input characters differently
     73    * depending on the specific escaper implementation.
     74    *
     75    * <ul>
     76    * <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a>
     77    *     correctly, including surrogate character pairs. If the input is badly formed the escaper
     78    *     should throw {@link IllegalArgumentException}.
     79    * <li>{@link CharEscaper} handles Java characters independently and does not verify the input
     80    *     for well formed characters. A CharEscaper should not be used in situations where input is
     81    *     not guaranteed to be restricted to the Basic Multilingual Plane (BMP).
     82    * </ul>
     83    *
     84    * <p>In all implementations the escaped Appendable should throw {@code NullPointerException} if
     85    * given a {@code null} {@link CharSequence}.
     86    *
     87    * @param out the underlying {@code Appendable} to append escaped output to
     88    * @return an {@code Appendable} which passes text to {@code out} after escaping it
     89    */
     90   public abstract Appendable escape(Appendable out);
     91 
     92   private final Function<String, String> asFunction =
     93       new Function<String, String>() {
     94         public String apply(String from) {
     95           return escape(from);
     96         }
     97       };
     98 
     99   /**
    100    * Returns a {@link Function} that invokes {@link #escape(String)} on this escaper.
    101    */
    102   public Function<String, String> asFunction() {
    103     return asFunction;
    104   }
    105 }