1 /* 2 * Copyright (C) 2008 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.escape; 18 19 import com.google.common.annotations.GwtCompatible; 20 21 import junit.framework.TestCase; 22 23 /** 24 * Tests for {@link UnicodeEscaper}. 25 * 26 * @author David Beaumont 27 */ 28 @GwtCompatible 29 public class UnicodeEscaperTest extends TestCase { 30 31 private static final String SMALLEST_SURROGATE = 32 "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE; 33 private static final String LARGEST_SURROGATE = 34 "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE; 35 36 private static final String TEST_STRING = 37 "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" + 38 SMALLEST_SURROGATE + "0189" + LARGEST_SURROGATE; 39 40 // Escapes nothing 41 private static final UnicodeEscaper NOP_ESCAPER = new UnicodeEscaper() { 42 @Override 43 protected char[] escape(int c) { 44 return null; 45 } 46 }; 47 48 // Escapes everything except [a-zA-Z0-9] 49 private static final UnicodeEscaper SIMPLE_ESCAPER = new UnicodeEscaper() { 50 @Override 51 protected char[] escape(int cp) { 52 return ('a' <= cp && cp <= 'z') || 53 ('A' <= cp && cp <= 'Z') || 54 ('0' <= cp && cp <= '9') ? null : 55 ("[" + String.valueOf(cp) + "]").toCharArray(); 56 } 57 }; 58 59 public void testNopEscaper() { 60 UnicodeEscaper e = NOP_ESCAPER; 61 assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING)); 62 } 63 64 public void testSimpleEscaper() { 65 UnicodeEscaper e = SIMPLE_ESCAPER; 66 String expected = 67 "[0]abyz[128][256][2048][4096]ABYZ[65535]" + 68 "[" + Character.MIN_SUPPLEMENTARY_CODE_POINT + "]" + 69 "0189[" + Character.MAX_CODE_POINT + "]"; 70 assertEquals(expected, escapeAsString(e, TEST_STRING)); 71 } 72 73 public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer 74 StringBuffer input = new StringBuffer(); 75 StringBuffer expected = new StringBuffer(); 76 for (int i = 256; i < 1024; i++) { 77 input.append((char) i); 78 expected.append("[" + i + "]"); 79 } 80 assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString())); 81 } 82 83 public void testSurrogatePairs() { 84 UnicodeEscaper e = SIMPLE_ESCAPER; 85 86 // Build up a range of surrogate pair characters to test 87 final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT; 88 final int max = Character.MAX_CODE_POINT; 89 final int range = max - min; 90 final int s1 = min + (1 * range) / 4; 91 final int s2 = min + (2 * range) / 4; 92 final int s3 = min + (3 * range) / 4; 93 final char[] dst = new char[12]; 94 95 // Put surrogate pairs at odd indices so they can be split easily 96 dst[0] = 'x'; 97 Character.toChars(min, dst, 1); 98 Character.toChars(s1, dst, 3); 99 Character.toChars(s2, dst, 5); 100 Character.toChars(s3, dst, 7); 101 Character.toChars(max, dst, 9); 102 dst[11] = 'x'; 103 String test = new String(dst); 104 105 // Get the expected result string 106 String expected = 107 "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x"; 108 assertEquals(expected, escapeAsString(e, test)); 109 } 110 111 public void testTrailingHighSurrogate() { 112 String test = "abc" + Character.MIN_HIGH_SURROGATE; 113 try { 114 escapeAsString(NOP_ESCAPER, test); 115 fail("Trailing high surrogate should cause exception"); 116 } catch (IllegalArgumentException expected) { 117 // Pass 118 } 119 try { 120 escapeAsString(SIMPLE_ESCAPER, test); 121 fail("Trailing high surrogate should cause exception"); 122 } catch (IllegalArgumentException expected) { 123 // Pass 124 } 125 } 126 127 public void testNullInput() { 128 UnicodeEscaper e = SIMPLE_ESCAPER; 129 try { 130 e.escape((String) null); 131 fail("Null string should cause exception"); 132 } catch (NullPointerException expected) { 133 // Pass 134 } 135 } 136 137 public void testBadStrings() { 138 UnicodeEscaper e = SIMPLE_ESCAPER; 139 String[] BAD_STRINGS = { 140 String.valueOf(Character.MIN_LOW_SURROGATE), 141 Character.MIN_LOW_SURROGATE + "xyz", 142 "abc" + Character.MIN_LOW_SURROGATE, 143 "abc" + Character.MIN_LOW_SURROGATE + "xyz", 144 String.valueOf(Character.MAX_LOW_SURROGATE), 145 Character.MAX_LOW_SURROGATE + "xyz", 146 "abc" + Character.MAX_LOW_SURROGATE, 147 "abc" + Character.MAX_LOW_SURROGATE + "xyz", 148 }; 149 for (String s : BAD_STRINGS) { 150 try { 151 escapeAsString(e, s); 152 fail("Isolated low surrogate should cause exception [" + s + "]"); 153 } catch (IllegalArgumentException expected) { 154 // Pass 155 } 156 } 157 } 158 159 public void testFalsePositivesForNextEscapedIndex() { 160 UnicodeEscaper e = new UnicodeEscaper() { 161 // Canonical escaper method that only escapes lower case ASCII letters. 162 @Override 163 protected char[] escape(int cp) { 164 return ('a' <= cp && cp <= 'z') ? 165 new char[] { Character.toUpperCase((char) cp) } : null; 166 } 167 // Inefficient implementation that defines all letters as escapable. 168 @Override 169 protected int nextEscapeIndex(CharSequence csq, int index, int end) { 170 while (index < end && !Character.isLetter(csq.charAt(index))) { 171 index++; 172 } 173 return index; 174 } 175 }; 176 assertEquals("\0HELLO \uD800\uDC00 WORLD!\n", 177 e.escape("\0HeLLo \uD800\uDC00 WorlD!\n")); 178 } 179 180 public void testCodePointAt_IndexOutOfBoundsException() { 181 try { 182 UnicodeEscaper.codePointAt("Testing...", 4, 2); 183 fail(); 184 } catch (IndexOutOfBoundsException expected) { 185 } 186 } 187 188 private String escapeAsString(Escaper e, String s) { 189 return e.escape(s); 190 } 191 } 192