Home | History | Annotate | Download | only in escape
      1 /*
      2  * Copyright (C) 2009 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.escape;
     18 
     19 import com.google.common.annotations.GwtCompatible;
     20 import com.google.common.collect.ImmutableMap;
     21 import com.google.common.escape.testing.EscaperAsserts;
     22 
     23 import junit.framework.TestCase;
     24 
     25 import java.io.IOException;
     26 import java.util.Map;
     27 
     28 /**
     29  * @author David Beaumont
     30  */
     31 @GwtCompatible
     32 public class ArrayBasedUnicodeEscaperTest extends TestCase {
     33   private static final Map<Character, String> NO_REPLACEMENTS =
     34       ImmutableMap.of();
     35   private static final Map<Character, String> SIMPLE_REPLACEMENTS =
     36       ImmutableMap.of(
     37           '\n', "<newline>",
     38           '\t', "<tab>",
     39           '&', "<and>");
     40   private static final char[] NO_CHARS = new char[0];
     41 
     42   public void testReplacements() throws IOException {
     43     // In reality this is not a very sensible escaper to have (if you are only
     44     // escaping elements from a map you would use a ArrayBasedCharEscaper).
     45     UnicodeEscaper escaper = new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS,
     46         Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
     47           @Override protected char[] escapeUnsafe(int c) {
     48             return NO_CHARS;
     49           }
     50     };
     51     EscaperAsserts.assertBasic(escaper);
     52     assertEquals("<tab>Fish <and> Chips<newline>",
     53         escaper.escape("\tFish & Chips\n"));
     54 
     55     // Verify that everything else is left unescaped.
     56     String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
     57     assertEquals(safeChars, escaper.escape(safeChars));
     58 
     59     // Ensure that Unicode escapers behave correctly wrt badly formed input.
     60     String badUnicode = "\uDC00\uD800";
     61     try {
     62       escaper.escape(badUnicode);
     63       fail("should fail for bad Unicode");
     64     } catch (IllegalArgumentException e) {
     65       // Pass
     66     }
     67   }
     68 
     69   public void testSafeRange() throws IOException {
     70     // Basic escaping of unsafe chars (wrap them in {,}'s)
     71     UnicodeEscaper wrappingEscaper =
     72         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
     73           @Override protected char[] escapeUnsafe(int c) {
     74             return ("{" + (char) c + "}").toCharArray();
     75           }
     76         };
     77     EscaperAsserts.assertBasic(wrappingEscaper);
     78     // '[' and '@' lie either side of [A-Z].
     79     assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
     80   }
     81 
     82   public void testDeleteUnsafeChars() throws IOException {
     83     UnicodeEscaper deletingEscaper =
     84         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
     85           @Override protected char[] escapeUnsafe(int c) {
     86             return NO_CHARS;
     87           }
     88         };
     89     EscaperAsserts.assertBasic(deletingEscaper);
     90     assertEquals("Everything outside the printable ASCII range is deleted.",
     91         deletingEscaper.escape("\tEverything\0 outside the\uD800\uDC00 " +
     92             "printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
     93   }
     94 
     95   public void testReplacementPriority() throws IOException {
     96     UnicodeEscaper replacingEscaper =
     97         new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
     98           private final char[] unknown = new char[] { '?' };
     99           @Override protected char[] escapeUnsafe(int c) {
    100             return unknown;
    101           }
    102         };
    103     EscaperAsserts.assertBasic(replacingEscaper);
    104 
    105     // Replacements are applied first regardless of whether the character is in
    106     // the safe range or not ('&' is a safe char while '\t' and '\n' are not).
    107     assertEquals("<tab>Fish <and>? Chips?<newline>",
    108         replacingEscaper.escape("\tFish &\0 Chips\r\n"));
    109   }
    110 
    111   public void testCodePointsFromSurrogatePairs() throws IOException {
    112     UnicodeEscaper surrogateEscaper =
    113         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
    114           private final char[] escaped = new char[] { 'X' };
    115           @Override protected char[] escapeUnsafe(int c) {
    116             return escaped;
    117           }
    118         };
    119     EscaperAsserts.assertBasic(surrogateEscaper);
    120 
    121     // A surrogate pair defining a code point within the safe range.
    122     String safeInput = "\uD800\uDC00";  // 0x10000
    123     assertEquals(safeInput, surrogateEscaper.escape(safeInput));
    124 
    125     // A surrogate pair defining a code point outside the safe range (but both
    126     // of the surrogate characters lie within the safe range). It is important
    127     // not to accidentally treat this as a sequence of safe characters.
    128     String unsafeInput = "\uDBFF\uDFFF";  // 0x10FFFF
    129     assertEquals("X", surrogateEscaper.escape(unsafeInput));
    130   }
    131 }
    132