Home | History | Annotate | Download | only in escape
      1 /*
      2  * Copyright (C) 2008 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.escape;
     18 
     19 import com.google.common.annotations.GwtCompatible;
     20 
     21 import junit.framework.TestCase;
     22 
     23 /**
     24  * Tests for {@link UnicodeEscaper}.
     25  *
     26  * @author David Beaumont
     27  */
     28 @GwtCompatible
     29 public class UnicodeEscaperTest extends TestCase {
     30 
     31   private static final String SMALLEST_SURROGATE =
     32       "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE;
     33   private static final String LARGEST_SURROGATE =
     34       "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE;
     35 
     36   private static final String TEST_STRING =
     37       "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" +
     38       SMALLEST_SURROGATE + "0189" +  LARGEST_SURROGATE;
     39 
     40   // Escapes nothing
     41   private static final UnicodeEscaper NOP_ESCAPER = new UnicodeEscaper() {
     42     @Override
     43     protected char[] escape(int c) {
     44       return null;
     45     }
     46   };
     47 
     48   // Escapes everything except [a-zA-Z0-9]
     49   private static final UnicodeEscaper SIMPLE_ESCAPER = new UnicodeEscaper() {
     50     @Override
     51     protected char[] escape(int cp) {
     52       return ('a' <= cp && cp <= 'z') ||
     53              ('A' <= cp && cp <= 'Z') ||
     54              ('0' <= cp && cp <= '9') ? null :
     55           ("[" + String.valueOf(cp) + "]").toCharArray();
     56     }
     57   };
     58 
     59   public void testNopEscaper() {
     60     UnicodeEscaper e = NOP_ESCAPER;
     61     assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING));
     62   }
     63 
     64   public void testSimpleEscaper() {
     65     UnicodeEscaper e = SIMPLE_ESCAPER;
     66     String expected =
     67         "[0]abyz[128][256][2048][4096]ABYZ[65535]" +
     68         "[" + Character.MIN_SUPPLEMENTARY_CODE_POINT + "]" +
     69         "0189[" + Character.MAX_CODE_POINT + "]";
     70     assertEquals(expected, escapeAsString(e, TEST_STRING));
     71   }
     72 
     73   public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
     74     StringBuffer input = new StringBuffer();
     75     StringBuffer expected = new StringBuffer();
     76     for (int i = 256; i < 1024; i++) {
     77       input.append((char) i);
     78       expected.append("[" + i + "]");
     79     }
     80     assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
     81   }
     82 
     83   public void testSurrogatePairs() {
     84     UnicodeEscaper e = SIMPLE_ESCAPER;
     85 
     86     // Build up a range of surrogate pair characters to test
     87     final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT;
     88     final int max = Character.MAX_CODE_POINT;
     89     final int range = max - min;
     90     final int s1 = min + (1 * range) / 4;
     91     final int s2 = min + (2 * range) / 4;
     92     final int s3 = min + (3 * range) / 4;
     93     final char[] dst = new char[12];
     94 
     95     // Put surrogate pairs at odd indices so they can be split easily
     96     dst[0] = 'x';
     97     Character.toChars(min, dst, 1);
     98     Character.toChars(s1, dst, 3);
     99     Character.toChars(s2, dst, 5);
    100     Character.toChars(s3, dst, 7);
    101     Character.toChars(max, dst, 9);
    102     dst[11] = 'x';
    103     String test = new String(dst);
    104 
    105     // Get the expected result string
    106     String expected =
    107         "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x";
    108     assertEquals(expected, escapeAsString(e, test));
    109   }
    110 
    111   public void testTrailingHighSurrogate() {
    112     String test = "abc" + Character.MIN_HIGH_SURROGATE;
    113     try {
    114       escapeAsString(NOP_ESCAPER, test);
    115       fail("Trailing high surrogate should cause exception");
    116     } catch (IllegalArgumentException expected) {
    117       // Pass
    118     }
    119     try {
    120       escapeAsString(SIMPLE_ESCAPER, test);
    121       fail("Trailing high surrogate should cause exception");
    122     } catch (IllegalArgumentException expected) {
    123       // Pass
    124     }
    125   }
    126 
    127   public void testNullInput() {
    128     UnicodeEscaper e = SIMPLE_ESCAPER;
    129     try {
    130       e.escape((String) null);
    131       fail("Null string should cause exception");
    132     } catch (NullPointerException expected) {
    133       // Pass
    134     }
    135   }
    136 
    137   public void testBadStrings() {
    138     UnicodeEscaper e = SIMPLE_ESCAPER;
    139     String[] BAD_STRINGS = {
    140         String.valueOf(Character.MIN_LOW_SURROGATE),
    141         Character.MIN_LOW_SURROGATE + "xyz",
    142         "abc" + Character.MIN_LOW_SURROGATE,
    143         "abc" + Character.MIN_LOW_SURROGATE + "xyz",
    144         String.valueOf(Character.MAX_LOW_SURROGATE),
    145         Character.MAX_LOW_SURROGATE + "xyz",
    146         "abc" + Character.MAX_LOW_SURROGATE,
    147         "abc" + Character.MAX_LOW_SURROGATE + "xyz",
    148     };
    149     for (String s : BAD_STRINGS) {
    150       try {
    151         escapeAsString(e, s);
    152         fail("Isolated low surrogate should cause exception [" + s + "]");
    153       } catch (IllegalArgumentException expected) {
    154         // Pass
    155       }
    156     }
    157   }
    158 
    159   public void testFalsePositivesForNextEscapedIndex() {
    160     UnicodeEscaper e = new UnicodeEscaper() {
    161       // Canonical escaper method that only escapes lower case ASCII letters.
    162       @Override
    163       protected char[] escape(int cp) {
    164         return ('a' <= cp && cp <= 'z') ?
    165             new char[] { Character.toUpperCase((char) cp) } : null;
    166       }
    167       // Inefficient implementation that defines all letters as escapable.
    168       @Override
    169       protected int nextEscapeIndex(CharSequence csq, int index, int end) {
    170         while (index < end && !Character.isLetter(csq.charAt(index))) {
    171           index++;
    172         }
    173         return index;
    174       }
    175     };
    176     assertEquals("\0HELLO \uD800\uDC00 WORLD!\n",
    177         e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
    178   }
    179 
    180   public void testCodePointAt_IndexOutOfBoundsException() {
    181     try {
    182       UnicodeEscaper.codePointAt("Testing...", 4, 2);
    183       fail();
    184     } catch (IndexOutOfBoundsException expected) {
    185     }
    186   }
    187 
    188   private String escapeAsString(Escaper e, String s) {
    189     return e.escape(s);
    190   }
    191 }
    192