Home | History | Annotate | Download | only in escape
      1 /*
      2  * Copyright (C) 2009 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.escape;
     18 
     19 import java.util.HashMap;
     20 import java.util.Map;
     21 
     22 /**
     23  * A factory for Escaper instances used to escape strings for safe use in Java.
     24  *
     25  * <p>This is a subset of source code escapers that are in the process of being open-sources as part
     26  * of guava, see: https://github.com/google/guava/issues/1620
     27  */
     28 // TODO(cushon): migrate to the guava version once it is open-sourced, and delete this
     29 public final class SourceCodeEscapers {
     30   private SourceCodeEscapers() {}
     31 
     32   // For each xxxEscaper() method, please add links to external reference pages
     33   // that are considered authoritative for the behavior of that escaper.
     34 
     35   // From: http://en.wikipedia.org/wiki/ASCII#ASCII_printable_characters
     36   private static final char PRINTABLE_ASCII_MIN = 0x20; // ' '
     37   private static final char PRINTABLE_ASCII_MAX = 0x7E; // '~'
     38 
     39   private static final char[] HEX_DIGITS = "0123456789abcdef".toCharArray();
     40 
     41   /**
     42    * Returns an {@link Escaper} instance that escapes special characters in a string so it can
     43    * safely be included in either a Java character literal or string literal. This is the preferred
     44    * way to escape Java characters for use in String or character literals.
     45    *
     46    * <p>See: <a href= "http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089"
     47    * >The Java Language Specification</a> for more details.
     48    */
     49   public static CharEscaper javaCharEscaper() {
     50     return JAVA_CHAR_ESCAPER;
     51   }
     52 
     53   private static final CharEscaper JAVA_CHAR_ESCAPER;
     54 
     55   static {
     56     Map<Character, String> javaMap = new HashMap<>();
     57     javaMap.put('\b', "\\b");
     58     javaMap.put('\f', "\\f");
     59     javaMap.put('\n', "\\n");
     60     javaMap.put('\r', "\\r");
     61     javaMap.put('\t', "\\t");
     62     javaMap.put('\"', "\\\"");
     63     javaMap.put('\\', "\\\\");
     64     javaMap.put('\'', "\\'");
     65     JAVA_CHAR_ESCAPER = new JavaCharEscaper(javaMap);
     66   }
     67 
     68   // This escaper does not produce octal escape sequences. See:
     69   // http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089
     70   //  "Octal escapes are provided for compatibility with C, but can express
     71   //   only Unicode values \u0000 through \u00FF, so Unicode escapes are
     72   //   usually preferred."
     73   private static class JavaCharEscaper extends ArrayBasedCharEscaper {
     74     JavaCharEscaper(Map<Character, String> replacements) {
     75       super(replacements, PRINTABLE_ASCII_MIN, PRINTABLE_ASCII_MAX);
     76     }
     77 
     78     @Override
     79     protected char[] escapeUnsafe(char c) {
     80       return asUnicodeHexEscape(c);
     81     }
     82   }
     83 
     84   // Helper for common case of escaping a single char.
     85   private static char[] asUnicodeHexEscape(char c) {
     86     // Equivalent to String.format("\\u%04x", (int)c);
     87     char[] r = new char[6];
     88     r[0] = '\\';
     89     r[1] = 'u';
     90     r[5] = HEX_DIGITS[c & 0xF];
     91     c >>>= 4;
     92     r[4] = HEX_DIGITS[c & 0xF];
     93     c >>>= 4;
     94     r[3] = HEX_DIGITS[c & 0xF];
     95     c >>>= 4;
     96     r[2] = HEX_DIGITS[c & 0xF];
     97     return r;
     98   }
     99 }
    100