1 /* 2 * Copyright (C) 2009 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.escape; 18 19 import com.google.common.annotations.GwtCompatible; 20 import com.google.common.collect.ImmutableMap; 21 import com.google.common.escape.testing.EscaperAsserts; 22 23 import junit.framework.TestCase; 24 25 import java.io.IOException; 26 import java.util.Map; 27 28 /** 29 * @author David Beaumont 30 */ 31 @GwtCompatible 32 public class ArrayBasedUnicodeEscaperTest extends TestCase { 33 private static final Map<Character, String> NO_REPLACEMENTS = 34 ImmutableMap.of(); 35 private static final Map<Character, String> SIMPLE_REPLACEMENTS = 36 ImmutableMap.of( 37 '\n', "<newline>", 38 '\t', "<tab>", 39 '&', "<and>"); 40 private static final char[] NO_CHARS = new char[0]; 41 42 public void testReplacements() throws IOException { 43 // In reality this is not a very sensible escaper to have (if you are only 44 // escaping elements from a map you would use a ArrayBasedCharEscaper). 45 UnicodeEscaper escaper = new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, 46 Character.MIN_VALUE, Character.MAX_CODE_POINT, null) { 47 @Override protected char[] escapeUnsafe(int c) { 48 return NO_CHARS; 49 } 50 }; 51 EscaperAsserts.assertBasic(escaper); 52 assertEquals("<tab>Fish <and> Chips<newline>", 53 escaper.escape("\tFish & Chips\n")); 54 55 // Verify that everything else is left unescaped. 56 String safeChars = "\0\u0100\uD800\uDC00\uFFFF"; 57 assertEquals(safeChars, escaper.escape(safeChars)); 58 59 // Ensure that Unicode escapers behave correctly wrt badly formed input. 60 String badUnicode = "\uDC00\uD800"; 61 try { 62 escaper.escape(badUnicode); 63 fail("should fail for bad Unicode"); 64 } catch (IllegalArgumentException e) { 65 // Pass 66 } 67 } 68 69 public void testSafeRange() throws IOException { 70 // Basic escaping of unsafe chars (wrap them in {,}'s) 71 UnicodeEscaper wrappingEscaper = 72 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) { 73 @Override protected char[] escapeUnsafe(int c) { 74 return ("{" + (char) c + "}").toCharArray(); 75 } 76 }; 77 EscaperAsserts.assertBasic(wrappingEscaper); 78 // '[' and '@' lie either side of [A-Z]. 79 assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]")); 80 } 81 82 public void testDeleteUnsafeChars() throws IOException { 83 UnicodeEscaper deletingEscaper = 84 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) { 85 @Override protected char[] escapeUnsafe(int c) { 86 return NO_CHARS; 87 } 88 }; 89 EscaperAsserts.assertBasic(deletingEscaper); 90 assertEquals("Everything outside the printable ASCII range is deleted.", 91 deletingEscaper.escape("\tEverything\0 outside the\uD800\uDC00 " + 92 "printable ASCII \uFFFFrange is \u007Fdeleted.\n")); 93 } 94 95 public void testReplacementPriority() throws IOException { 96 UnicodeEscaper replacingEscaper = 97 new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) { 98 private final char[] unknown = new char[] { '?' }; 99 @Override protected char[] escapeUnsafe(int c) { 100 return unknown; 101 } 102 }; 103 EscaperAsserts.assertBasic(replacingEscaper); 104 105 // Replacements are applied first regardless of whether the character is in 106 // the safe range or not ('&' is a safe char while '\t' and '\n' are not). 107 assertEquals("<tab>Fish <and>? Chips?<newline>", 108 replacingEscaper.escape("\tFish &\0 Chips\r\n")); 109 } 110 111 public void testCodePointsFromSurrogatePairs() throws IOException { 112 UnicodeEscaper surrogateEscaper = 113 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) { 114 private final char[] escaped = new char[] { 'X' }; 115 @Override protected char[] escapeUnsafe(int c) { 116 return escaped; 117 } 118 }; 119 EscaperAsserts.assertBasic(surrogateEscaper); 120 121 // A surrogate pair defining a code point within the safe range. 122 String safeInput = "\uD800\uDC00"; // 0x10000 123 assertEquals(safeInput, surrogateEscaper.escape(safeInput)); 124 125 // A surrogate pair defining a code point outside the safe range (but both 126 // of the surrogate characters lie within the safe range). It is important 127 // not to accidentally treat this as a sequence of safe characters. 128 String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF 129 assertEquals("X", surrogateEscaper.escape(unsafeInput)); 130 } 131 } 132