Home | History | Annotate | Download | only in escape
      1 /*
      2  * Copyright (C) 2010 Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.clearsilver.jsilver.functions.escape;
     18 
     19 
     20 /**
     21  * This class HTML escapes a string in the same way as the ClearSilver html_escape function.
     22  *
     23  * This implementation has been optimized for performance.
     24  *
     25  */
     26 public class HtmlEscapeFunction extends SimpleEscapingFunction {
     27 
     28   // The escape chars
     29   private static final char[] ESCAPE_CHARS = {'<', '>', '&', '\'', '"'};
     30 
     31   // UNQUOTED_ESCAPE_CHARS = ESCAPE_CHARS + UNQUOTED_EXTRA_CHARS + chars < 0x20 + 0x7f
     32   private static final char[] UNQUOTED_ESCAPE_CHARS;
     33 
     34   private static final char[] UNQUOTED_EXTRA_CHARS = {'=', ' '};
     35 
     36   // The corresponding escape strings for all ascii characters.
     37   // With control characters, we simply strip them out if necessary.
     38   private static String[] ESCAPE_CODES =
     39       {"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
     40           "", "", "", "", "", "", "", "", "", "", "!", "&quot;", "#", "$", "%", "&amp;", "&#39;",
     41           "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
     42           ":", ";", "&lt;", "&#61;", "&gt;", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I",
     43           "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[",
     44           "\\", "]", "^", "_", "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
     45           "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~",
     46           ""};
     47 
     48   static {
     49     UNQUOTED_ESCAPE_CHARS = new char[33 + ESCAPE_CHARS.length + UNQUOTED_EXTRA_CHARS.length];
     50     // In unquoted HTML attributes, strip out control characters also, as they could
     51     // get interpreted as end of attribute, just like spaces.
     52     for (int n = 0; n <= 0x1f; n++) {
     53       UNQUOTED_ESCAPE_CHARS[n] = (char) n;
     54     }
     55     UNQUOTED_ESCAPE_CHARS[32] = (char) 0x7f;
     56     System.arraycopy(ESCAPE_CHARS, 0, UNQUOTED_ESCAPE_CHARS, 33, ESCAPE_CHARS.length);
     57     System.arraycopy(UNQUOTED_EXTRA_CHARS, 0, UNQUOTED_ESCAPE_CHARS, 33 + ESCAPE_CHARS.length,
     58         UNQUOTED_EXTRA_CHARS.length);
     59 
     60   }
     61 
     62   /**
     63    * isUnquoted should be true if the function is escaping a string that will appear inside an
     64    * unquoted HTML attribute.
     65    *
     66    * If the string is unquoted, we strip out all characters 0 - 0x1f and 0x7f for security reasons.
     67    */
     68   public HtmlEscapeFunction(boolean isUnquoted) {
     69     if (isUnquoted) {
     70       super.setEscapeChars(UNQUOTED_ESCAPE_CHARS);
     71     } else {
     72       super.setEscapeChars(ESCAPE_CHARS);
     73     }
     74   }
     75 
     76   @Override
     77   protected String getEscapeString(char c) {
     78     if (c < 0x80) {
     79       return ESCAPE_CODES[c];
     80     }
     81     throw new IllegalArgumentException("Unexpected escape character " + c + "[" + (int) c + "]");
     82   }
     83 }
     84