Home | History | Annotate | Download | only in autoescape
      1 /*
      2  * Copyright (C) 2010 Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.clearsilver.jsilver.autoescape;
     18 
     19 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR;
     20 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_CSS;
     21 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_JS;
     22 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_UNQUOTED_JS;
     23 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI;
     24 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI_START;
     25 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_HTML;
     26 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS;
     27 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS_UNQUOTED;
     28 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_STYLE;
     29 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR;
     30 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_CSS;
     31 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_JS;
     32 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS;
     33 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI;
     34 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI_START;
     35 import com.google.clearsilver.jsilver.exceptions.JSilverAutoEscapingException;
     36 import com.google.streamhtmlparser.ExternalState;
     37 import com.google.streamhtmlparser.HtmlParser;
     38 import com.google.streamhtmlparser.HtmlParserFactory;
     39 import com.google.streamhtmlparser.ParseException;
     40 
     41 import java.util.HashMap;
     42 import java.util.HashSet;
     43 import java.util.Map;
     44 
     45 /**
     46  * Encapsulates auto escaping logic.
     47  */
     48 public class AutoEscapeContext {
     49   /**
     50    * Map of content-type to corresponding {@code HtmlParser.Mode}, used by {@code setContentType} to
     51    * specify the content type of provided input. Valid values and the corresponding mode are: <br>
     52    * <table>
     53    * <tr>
     54    * <td>text/html</td>
     55    * <td>HtmlParser.Mode.HTML</td>
     56    * </tr>
     57    * <tr>
     58    * <td>text/plain</td>
     59    * <td>HtmlParser.Mode.HTML</td>
     60    * </tr>
     61    * <tr>
     62    * <td>application/javascript</td>
     63    * <td>HtmlParser.Mode.JS</td>
     64    * </tr>
     65    * <tr>
     66    * <td>application/json</td>
     67    * <td>HtmlParser.Mode.JS</td>
     68    * </tr>
     69    * <tr>
     70    * <td>text/javascript</td>
     71    * <td>HtmlParser.Mode.JS</td>
     72    * </tr>
     73    * <tr>
     74    * <td>text/css</td>
     75    * <td>HtmlParser.Mode.CSS</td>
     76    * </tr>
     77    * </table>
     78    *
     79    * @see #setContentType
     80    */
     81   public static final Map<String, HtmlParser.Mode> CONTENT_TYPE_LIST;
     82 
     83   // These options are used to provide extra information to HtmlParserFactory.createParserInMode or
     84   // HtmlParserFactory.createParserInAttribute, which is required for certain modes.
     85   private static final HashSet<HtmlParserFactory.AttributeOptions> quotedJsAttributeOption;
     86   private static final HashSet<HtmlParserFactory.AttributeOptions> partialUrlAttributeOption;
     87   private static final HashSet<HtmlParserFactory.ModeOptions> jsModeOption;
     88 
     89   private HtmlParser htmlParser;
     90 
     91   static {
     92     quotedJsAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>();
     93     quotedJsAttributeOption.add(HtmlParserFactory.AttributeOptions.JS_QUOTED);
     94 
     95     partialUrlAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>();
     96     partialUrlAttributeOption.add(HtmlParserFactory.AttributeOptions.URL_PARTIAL);
     97 
     98     jsModeOption = new HashSet<HtmlParserFactory.ModeOptions>();
     99     jsModeOption.add(HtmlParserFactory.ModeOptions.JS_QUOTED);
    100 
    101     CONTENT_TYPE_LIST = new HashMap<String, HtmlParser.Mode>();
    102     CONTENT_TYPE_LIST.put("text/html", HtmlParser.Mode.HTML);
    103     CONTENT_TYPE_LIST.put("text/plain", HtmlParser.Mode.HTML);
    104     CONTENT_TYPE_LIST.put("application/javascript", HtmlParser.Mode.JS);
    105     CONTENT_TYPE_LIST.put("application/json", HtmlParser.Mode.JS);
    106     CONTENT_TYPE_LIST.put("text/javascript", HtmlParser.Mode.JS);
    107     CONTENT_TYPE_LIST.put("text/css", HtmlParser.Mode.CSS);
    108   }
    109 
    110   /**
    111    * Name of resource being auto escaped. Will be used in error and display messages.
    112    */
    113   private String resourceName;
    114 
    115   public AutoEscapeContext() {
    116     this(EscapeMode.ESCAPE_AUTO, null);
    117   }
    118 
    119   /**
    120    * Create a new context in the state represented by mode.
    121    *
    122    * @param mode EscapeMode object.
    123    */
    124   public AutoEscapeContext(EscapeMode mode) {
    125     this(mode, null);
    126   }
    127 
    128   /**
    129    * Create a new context in the state represented by mode. If a non-null resourceName is provided,
    130    * it will be used in displaying error messages.
    131    *
    132    * @param mode The initial EscapeMode for this context
    133    * @param resourceName Name of the resource being auto escaped.
    134    */
    135   public AutoEscapeContext(EscapeMode mode, String resourceName) {
    136     this.resourceName = resourceName;
    137     htmlParser = createHtmlParser(mode);
    138   }
    139 
    140   /**
    141    * Create a new context that is a copy of the current state of this context.
    142    *
    143    * @return New {@code AutoEscapeContext} that is a snapshot of the current state of this context.
    144    */
    145   public AutoEscapeContext cloneCurrentEscapeContext() {
    146     AutoEscapeContext autoEscapeContext = new AutoEscapeContext();
    147     autoEscapeContext.resourceName = resourceName;
    148     autoEscapeContext.htmlParser = HtmlParserFactory.createParser(htmlParser);
    149     return autoEscapeContext;
    150   }
    151 
    152   /**
    153    * Sets the current position in the resource being auto escaped. Useful for generating detailed
    154    * error messages.
    155    *
    156    * @param line line number.
    157    * @param column column number within line.
    158    */
    159   public void setCurrentPosition(int line, int column) {
    160     htmlParser.setLineNumber(line);
    161     htmlParser.setColumnNumber(column);
    162   }
    163 
    164   /**
    165    * Returns the name of the resource currently being auto escaped.
    166    */
    167   public String getResourceName() {
    168     return resourceName;
    169   }
    170 
    171   /**
    172    * Returns the current line number within the resource being auto escaped.
    173    */
    174   public int getLineNumber() {
    175     return htmlParser.getLineNumber();
    176   }
    177 
    178   /**
    179    * Returns the current column number within the resource being auto escaped.
    180    */
    181   public int getColumnNumber() {
    182     return htmlParser.getColumnNumber();
    183   }
    184 
    185   private HtmlParser createHtmlParser(EscapeMode mode) {
    186     switch (mode) {
    187       case ESCAPE_AUTO:
    188       case ESCAPE_AUTO_HTML:
    189         return HtmlParserFactory.createParser();
    190 
    191       case ESCAPE_AUTO_JS_UNQUOTED:
    192         // <script>START HERE
    193         return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, null);
    194 
    195       case ESCAPE_AUTO_JS:
    196         // <script> var a = 'START HERE
    197         return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, jsModeOption);
    198 
    199       case ESCAPE_AUTO_STYLE:
    200         // <style>START HERE
    201         return HtmlParserFactory.createParserInMode(HtmlParser.Mode.CSS, null);
    202 
    203       case ESCAPE_AUTO_ATTR:
    204         // <input text="START HERE
    205         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, true, null);
    206 
    207       case ESCAPE_AUTO_UNQUOTED_ATTR:
    208         // <input text=START HERE
    209         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, false, null);
    210 
    211       case ESCAPE_AUTO_ATTR_URI:
    212         // <a href="http://www.google.com/a?START HERE
    213         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true,
    214             partialUrlAttributeOption);
    215 
    216       case ESCAPE_AUTO_UNQUOTED_ATTR_URI:
    217         // <a href=http://www.google.com/a?START HERE
    218         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false,
    219             partialUrlAttributeOption);
    220 
    221       case ESCAPE_AUTO_ATTR_URI_START:
    222         // <a href="START HERE
    223         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true, null);
    224 
    225       case ESCAPE_AUTO_UNQUOTED_ATTR_URI_START:
    226         // <a href=START HERE
    227         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false, null);
    228 
    229       case ESCAPE_AUTO_ATTR_JS:
    230         // <input onclick="doClick('START HERE
    231         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true,
    232             quotedJsAttributeOption);
    233 
    234       case ESCAPE_AUTO_ATTR_UNQUOTED_JS:
    235         // <input onclick="doClick(START HERE
    236         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true, null);
    237 
    238       case ESCAPE_AUTO_UNQUOTED_ATTR_JS:
    239         // <input onclick=doClick('START HERE
    240         throw new JSilverAutoEscapingException(
    241             "Attempting to start HTML parser in unsupported mode" + mode, resourceName);
    242 
    243       case ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS:
    244         // <input onclick=doClick(START HERE
    245         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, false, null);
    246 
    247       case ESCAPE_AUTO_ATTR_CSS:
    248         // <input style="START HERE
    249         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, true, null);
    250 
    251       case ESCAPE_AUTO_UNQUOTED_ATTR_CSS:
    252         // <input style=START HERE
    253         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, false, null);
    254 
    255       default:
    256         throw new JSilverAutoEscapingException("Attempting to start HTML parser in invalid mode"
    257             + mode, resourceName);
    258     }
    259   }
    260 
    261   /**
    262    * Parse the given data and update internal state accordingly.
    263    *
    264    * @param data Input to parse, usually the contents of a template.
    265    */
    266   public void parseData(String data) {
    267     try {
    268       htmlParser.parse(data);
    269     } catch (ParseException e) {
    270       // ParseException displays the proper position, so do not store line and column
    271       // number here.
    272       throw new JSilverAutoEscapingException("Error in HtmlParser: " + e, resourceName);
    273     }
    274   }
    275 
    276   /**
    277    * Lets the AutoEscapeContext know that some input was skipped.
    278    *
    279    * This method will usually be called for variables in the input stream. The AutoEscapeContext is
    280    * told that the input stream contained some additional data but does not get to see the data. It
    281    * can adjust its internal state accordingly.
    282    */
    283   public void insertText() {
    284     try {
    285       htmlParser.insertText();
    286     } catch (ParseException e) {
    287       throw new JSilverAutoEscapingException("Error during insertText(): " + e, resourceName,
    288           htmlParser.getLineNumber(), htmlParser.getColumnNumber());
    289     }
    290   }
    291 
    292   /**
    293    * Determines whether an included template that begins in state {@code start} is allowed to end in
    294    * state {@code end}. Usually included templates are only allowed to end in the same context they
    295    * begin in. This lets auto escaping parse the remainder of the parent template without needing to
    296    * know the ending context of the included template. However, there is one exception where auto
    297    * escaping will allow a different ending context: if the included template is a URI attribute
    298    * value, it is allowed to change context from {@code ATTR_URI_START} to {@code ATTR_URI}. This
    299    * does not cause any issues because the including template will call {@code insertText} when it
    300    * encounters the include command, and {@code insertText} will cause the HTML parser to switch its
    301    * internal state in the same way.
    302    */
    303   public boolean isPermittedStateChangeForIncludes(AutoEscapeState start, AutoEscapeState end) {
    304     return start.equals(end)
    305         || (start.equals(AutoEscapeState.ATTR_URI_START) && end.equals(AutoEscapeState.ATTR_URI))
    306         || (start.equals(AutoEscapeState.UNQUOTED_ATTR_URI_START) && end
    307             .equals(AutoEscapeState.UNQUOTED_ATTR_URI));
    308   }
    309 
    310   /**
    311    * Determine the correct escaping to apply for a variable.
    312    *
    313    * Looks at the current state of the htmlParser, and determines what escaping to apply to a
    314    * variable in this state.
    315    *
    316    * @return Name of escaping function to use in this state.
    317    */
    318   public String getEscapingFunctionForCurrentState() {
    319     return getCurrentState().getFunctionName();
    320   }
    321 
    322   /**
    323    * Returns the EscapeMode which will bring AutoEscapeContext into this state.
    324    *
    325    * Initializing a new AutoEscapeContext with this EscapeMode will bring it into the state that the
    326    * current AutoEscapeContext object is in.
    327    *
    328    * @return An EscapeMode object.
    329    */
    330   public EscapeMode getEscapeModeForCurrentState() {
    331     return getCurrentState().getEscapeMode();
    332   }
    333 
    334   /**
    335    * Calls the HtmlParser API to determine current state.
    336    *
    337    * This function is mostly a wrapper around the HtmlParser API. It gathers all the necessary
    338    * information using that API and returns a single enum representing the current state.
    339    *
    340    * @return AutoEscapeState enum representing the current state.
    341    */
    342   public AutoEscapeState getCurrentState() {
    343     ExternalState state = htmlParser.getState();
    344     String tag = htmlParser.getTag();
    345 
    346     // Currently we do not do any escaping inside CSS blocks, so ignore them.
    347     if (state.equals(HtmlParser.STATE_CSS_FILE) || tag.equals("style")) {
    348 
    349       return AutoEscapeState.STYLE;
    350     }
    351 
    352     // Handle variables inside <script> tags.
    353     if (htmlParser.inJavascript() && !state.equals(HtmlParser.STATE_VALUE)) {
    354       if (htmlParser.isJavascriptQuoted()) {
    355         // <script> var a = "<?cs var: Blah ?>"; </script>
    356         return AutoEscapeState.JS;
    357       } else {
    358         // <script> var a = <?cs var: Blah ?>; </script>
    359         // No quotes around the variable, hence it can inject arbitrary javascript.
    360         // So severely restrict the values it may contain.
    361         return AutoEscapeState.JS_UNQUOTED;
    362       }
    363     }
    364 
    365     // Inside an HTML tag or attribute name
    366     if (state.equals(HtmlParser.STATE_ATTR) || state.equals(HtmlParser.STATE_TAG)) {
    367       return AutoEscapeState.ATTR;
    368       // TODO: Need a strict validation function for tag and attribute names.
    369     } else if (state.equals(HtmlParser.STATE_VALUE)) {
    370       // Inside an HTML attribute value
    371       return getCurrentAttributeState();
    372     } else if (state.equals(HtmlParser.STATE_COMMENT) || state.equals(HtmlParser.STATE_TEXT)) {
    373       // Default is assumed to be HTML body
    374       // <b>Hello <?cs var: UserName ?></b> :
    375       return AutoEscapeState.HTML;
    376     }
    377 
    378     throw new JSilverAutoEscapingException("Invalid state received from HtmlParser: "
    379         + state.toString(), resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber());
    380   }
    381 
    382   private AutoEscapeState getCurrentAttributeState() {
    383     HtmlParser.ATTR_TYPE type = htmlParser.getAttributeType();
    384     boolean attrQuoted = htmlParser.isAttributeQuoted();
    385 
    386     switch (type) {
    387       case REGULAR:
    388         // <input value="<?cs var: Blah ?>"> :
    389         if (attrQuoted) {
    390           return AutoEscapeState.ATTR;
    391         } else {
    392           return AutoEscapeState.UNQUOTED_ATTR;
    393         }
    394 
    395       case URI:
    396         if (htmlParser.isUrlStart()) {
    397           // <a href="<?cs var: X ?>">
    398           if (attrQuoted) {
    399             return AutoEscapeState.ATTR_URI_START;
    400           } else {
    401             return AutoEscapeState.UNQUOTED_ATTR_URI_START;
    402           }
    403         } else {
    404           // <a href="http://www.google.com/a?x=<?cs var: X ?>">
    405           if (attrQuoted) {
    406             // TODO: Html escaping because that is what Clearsilver does right now.
    407             // May change this to url escaping soon.
    408             return AutoEscapeState.ATTR_URI;
    409           } else {
    410             return AutoEscapeState.UNQUOTED_ATTR_URI;
    411           }
    412         }
    413 
    414       case JS:
    415         if (htmlParser.isJavascriptQuoted()) {
    416           /*
    417            * Note: js_escape() hex encodes all html metacharacters. Therefore it is safe to not do
    418            * an HTML escape around this.
    419            */
    420           if (attrQuoted) {
    421             // <input onclick="alert('<?cs var:Blah ?>');">
    422             return AutoEscapeState.ATTR_JS;
    423           } else {
    424             // <input onclick=alert('<?cs var: Blah ?>');>
    425             return AutoEscapeState.UNQUOTED_ATTR_JS;
    426           }
    427         } else {
    428           if (attrQuoted) {
    429             /* <input onclick="alert(<?cs var:Blah ?>);"> */
    430             return AutoEscapeState.ATTR_UNQUOTED_JS;
    431           } else {
    432 
    433             /* <input onclick=alert(<?cs var:Blah ?>);> */
    434             return AutoEscapeState.UNQUOTED_ATTR_UNQUOTED_JS;
    435           }
    436         }
    437 
    438       case STYLE:
    439         // <input style="border:<?cs var: FancyBorder ?>"> :
    440         if (attrQuoted) {
    441           return AutoEscapeState.ATTR_CSS;
    442         } else {
    443           return AutoEscapeState.UNQUOTED_ATTR_CSS;
    444         }
    445 
    446       default:
    447         throw new JSilverAutoEscapingException("Invalid attribute type in HtmlParser: " + type,
    448             resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber());
    449     }
    450   }
    451 
    452   /**
    453    * Resets the state of the underlying html parser to a state consistent with the {@code
    454    * contentType} provided. This method should be used when the starting auto escaping context of a
    455    * resource cannot be determined from its contents - for example, a CSS stylesheet or a javascript
    456    * source file.
    457    *
    458    * @param contentType MIME type header representing the content being parsed.
    459    * @see #CONTENT_TYPE_LIST
    460    */
    461   public void setContentType(String contentType) {
    462     HtmlParser.Mode mode = CONTENT_TYPE_LIST.get(contentType);
    463     if (mode == null) {
    464       throw new JSilverAutoEscapingException("Invalid content type specified: " + contentType,
    465           resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber());
    466 
    467     }
    468     htmlParser.resetMode(mode);
    469   }
    470 
    471   /**
    472    * Enum representing states of the data being parsed.
    473    *
    474    * This enumeration lists all the states in which autoescaping would have some effect.
    475    *
    476    */
    477   public static enum AutoEscapeState {
    478     HTML("html", ESCAPE_AUTO_HTML), JS("js", ESCAPE_AUTO_JS), STYLE("css", ESCAPE_AUTO_STYLE), JS_UNQUOTED(
    479         "js_check_number", ESCAPE_AUTO_JS_UNQUOTED), ATTR("html", ESCAPE_AUTO_ATTR), UNQUOTED_ATTR(
    480         "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR), ATTR_URI("html", ESCAPE_AUTO_ATTR_URI), UNQUOTED_ATTR_URI(
    481         "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_URI), ATTR_URI_START("url_validate",
    482         ESCAPE_AUTO_ATTR_URI_START), UNQUOTED_ATTR_URI_START("url_validate_unquoted",
    483         ESCAPE_AUTO_UNQUOTED_ATTR_URI_START), ATTR_JS("js", ESCAPE_AUTO_ATTR_JS), ATTR_UNQUOTED_JS(
    484         "js_check_number", ESCAPE_AUTO_ATTR_UNQUOTED_JS), UNQUOTED_ATTR_JS("js_attr_unquoted",
    485         ESCAPE_AUTO_UNQUOTED_ATTR_JS), UNQUOTED_ATTR_UNQUOTED_JS("js_check_number",
    486         ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS), ATTR_CSS("css", ESCAPE_AUTO_ATTR_CSS), UNQUOTED_ATTR_CSS(
    487         "css_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_CSS);
    488 
    489     private final String functionName;
    490     private final EscapeMode escapeMode;
    491 
    492     private AutoEscapeState(String functionName, EscapeMode mode) {
    493       this.functionName = functionName;
    494       this.escapeMode = mode;
    495     }
    496 
    497     public String getFunctionName() {
    498       return functionName;
    499     }
    500 
    501     public EscapeMode getEscapeMode() {
    502       return escapeMode;
    503     }
    504   }
    505 }
    506