Home | History | Annotate | Download | only in base
      1 /*
      2  * Copyright (C) 2008 Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 package com.android.mail.lib.base;
     17 
     18 /**
     19  * An object that converts literal text into a format safe for inclusion in a particular context
     20  * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
     21  * text is performed automatically by the relevant parser.
     22  *
     23  * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
     24  * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
     25  * resulting XML document is parsed, the parser API will return this text as the original literal
     26  * string {@code "Foo<Bar>"}.
     27  *
     28  * <p>An {@code Escaper} instance is required to be stateless, and safe when used concurrently by
     29  * multiple threads.
     30  *
     31  * <p>The two primary implementations of this interface are {@link CharEscaper} and {@link
     32  * UnicodeEscaper}. They are heavily optimized for performance and greatly simplify the task of
     33  * implementing new escapers. It is strongly recommended that when implementing a new escaper you
     34  * extend one of these classes. If you find that you are unable to achieve the desired behavior
     35  * using either of these classes, please contact the Java libraries team for advice.
     36  *
     37  * <p>Several popular escapers are defined as constants in the class {@link CharEscapers}. To create
     38  * your own escapers, use {@link CharEscaperBuilder}, or extend {@link CharEscaper} or {@code
     39  * UnicodeEscaper}.
     40  *
     41  * @author dbeaumont (at) google.com (David Beaumont)
     42  */
     43 public abstract class Escaper {
     44   /**
     45    * Returns the escaped form of a given literal string.
     46    *
     47    * <p>Note that this method may treat input characters differently depending on the specific
     48    * escaper implementation.
     49    *
     50    * <ul>
     51    * <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a>
     52    *     correctly, including surrogate character pairs. If the input is badly formed the escaper
     53    *     should throw {@link IllegalArgumentException}.
     54    * <li>{@link CharEscaper} handles Java characters independently and does not verify the input
     55    *     for well formed characters. A CharEscaper should not be used in situations where input is
     56    *     not guaranteed to be restricted to the Basic Multilingual Plane (BMP).
     57    * </ul>
     58    *
     59    * @param string the literal string to be escaped
     60    * @return the escaped form of {@code string}
     61    * @throws NullPointerException if {@code string} is null
     62    * @throws IllegalArgumentException if {@code string} contains badly formed UTF-16 or cannot be
     63    *         escaped for any other reason
     64    */
     65   public abstract String escape(String string);
     66 
     67   /**
     68    * Returns an {@code Appendable} instance which automatically escapes all text appended to it
     69    * before passing the resulting text to an underlying {@code Appendable}.
     70    *
     71    * <p>Note that the Appendable returned by this method may treat input characters differently
     72    * depending on the specific escaper implementation.
     73    *
     74    * <ul>
     75    * <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a>
     76    *     correctly, including surrogate character pairs. If the input is badly formed the escaper
     77    *     should throw {@link IllegalArgumentException}.
     78    * <li>{@link CharEscaper} handles Java characters independently and does not verify the input
     79    *     for well formed characters. A CharEscaper should not be used in situations where input is
     80    *     not guaranteed to be restricted to the Basic Multilingual Plane (BMP).
     81    * </ul>
     82    *
     83    * <p>In all implementations the escaped Appendable should throw {@code NullPointerException} if
     84    * given a {@code null} {@link CharSequence}.
     85    *
     86    * @param out the underlying {@code Appendable} to append escaped output to
     87    * @return an {@code Appendable} which passes text to {@code out} after escaping it
     88    */
     89   public abstract Appendable escape(Appendable out);
     90 
     91   private final Function<String, String> asFunction =
     92       new Function<String, String>() {
     93         public String apply(String from) {
     94           return escape(from);
     95         }
     96       };
     97 
     98   /**
     99    * Returns a {@link Function} that invokes {@link #escape(String)} on this escaper.
    100    */
    101   public Function<String, String> asFunction() {
    102     return asFunction;
    103   }
    104 }