1 /* 2 * Copyright (C) 2010 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.clearsilver.jsilver.autoescape; 18 19 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR; 20 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_CSS; 21 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_JS; 22 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_UNQUOTED_JS; 23 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI; 24 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI_START; 25 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_HTML; 26 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS; 27 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS_UNQUOTED; 28 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_STYLE; 29 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR; 30 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_CSS; 31 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_JS; 32 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS; 33 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI; 34 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI_START; 35 import com.google.clearsilver.jsilver.exceptions.JSilverAutoEscapingException; 36 import com.google.streamhtmlparser.ExternalState; 37 import com.google.streamhtmlparser.HtmlParser; 38 import com.google.streamhtmlparser.HtmlParserFactory; 39 import com.google.streamhtmlparser.ParseException; 40 41 import java.util.HashMap; 42 import java.util.HashSet; 43 import java.util.Map; 44 45 /** 46 * Encapsulates auto escaping logic. 47 */ 48 public class AutoEscapeContext { 49 /** 50 * Map of content-type to corresponding {@code HtmlParser.Mode}, used by {@code setContentType} to 51 * specify the content type of provided input. Valid values and the corresponding mode are: <br> 52 * <table> 53 * <tr> 54 * <td>text/html</td> 55 * <td>HtmlParser.Mode.HTML</td> 56 * </tr> 57 * <tr> 58 * <td>text/plain</td> 59 * <td>HtmlParser.Mode.HTML</td> 60 * </tr> 61 * <tr> 62 * <td>application/javascript</td> 63 * <td>HtmlParser.Mode.JS</td> 64 * </tr> 65 * <tr> 66 * <td>application/json</td> 67 * <td>HtmlParser.Mode.JS</td> 68 * </tr> 69 * <tr> 70 * <td>text/javascript</td> 71 * <td>HtmlParser.Mode.JS</td> 72 * </tr> 73 * <tr> 74 * <td>text/css</td> 75 * <td>HtmlParser.Mode.CSS</td> 76 * </tr> 77 * </table> 78 * 79 * @see #setContentType 80 */ 81 public static final Map<String, HtmlParser.Mode> CONTENT_TYPE_LIST; 82 83 // These options are used to provide extra information to HtmlParserFactory.createParserInMode or 84 // HtmlParserFactory.createParserInAttribute, which is required for certain modes. 85 private static final HashSet<HtmlParserFactory.AttributeOptions> quotedJsAttributeOption; 86 private static final HashSet<HtmlParserFactory.AttributeOptions> partialUrlAttributeOption; 87 private static final HashSet<HtmlParserFactory.ModeOptions> jsModeOption; 88 89 private HtmlParser htmlParser; 90 91 static { 92 quotedJsAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>(); 93 quotedJsAttributeOption.add(HtmlParserFactory.AttributeOptions.JS_QUOTED); 94 95 partialUrlAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>(); 96 partialUrlAttributeOption.add(HtmlParserFactory.AttributeOptions.URL_PARTIAL); 97 98 jsModeOption = new HashSet<HtmlParserFactory.ModeOptions>(); 99 jsModeOption.add(HtmlParserFactory.ModeOptions.JS_QUOTED); 100 101 CONTENT_TYPE_LIST = new HashMap<String, HtmlParser.Mode>(); 102 CONTENT_TYPE_LIST.put("text/html", HtmlParser.Mode.HTML); 103 CONTENT_TYPE_LIST.put("text/plain", HtmlParser.Mode.HTML); 104 CONTENT_TYPE_LIST.put("application/javascript", HtmlParser.Mode.JS); 105 CONTENT_TYPE_LIST.put("application/json", HtmlParser.Mode.JS); 106 CONTENT_TYPE_LIST.put("text/javascript", HtmlParser.Mode.JS); 107 CONTENT_TYPE_LIST.put("text/css", HtmlParser.Mode.CSS); 108 } 109 110 /** 111 * Name of resource being auto escaped. Will be used in error and display messages. 112 */ 113 private String resourceName; 114 115 public AutoEscapeContext() { 116 this(EscapeMode.ESCAPE_AUTO, null); 117 } 118 119 /** 120 * Create a new context in the state represented by mode. 121 * 122 * @param mode EscapeMode object. 123 */ 124 public AutoEscapeContext(EscapeMode mode) { 125 this(mode, null); 126 } 127 128 /** 129 * Create a new context in the state represented by mode. If a non-null resourceName is provided, 130 * it will be used in displaying error messages. 131 * 132 * @param mode The initial EscapeMode for this context 133 * @param resourceName Name of the resource being auto escaped. 134 */ 135 public AutoEscapeContext(EscapeMode mode, String resourceName) { 136 this.resourceName = resourceName; 137 htmlParser = createHtmlParser(mode); 138 } 139 140 /** 141 * Create a new context that is a copy of the current state of this context. 142 * 143 * @return New {@code AutoEscapeContext} that is a snapshot of the current state of this context. 144 */ 145 public AutoEscapeContext cloneCurrentEscapeContext() { 146 AutoEscapeContext autoEscapeContext = new AutoEscapeContext(); 147 autoEscapeContext.resourceName = resourceName; 148 autoEscapeContext.htmlParser = HtmlParserFactory.createParser(htmlParser); 149 return autoEscapeContext; 150 } 151 152 /** 153 * Sets the current position in the resource being auto escaped. Useful for generating detailed 154 * error messages. 155 * 156 * @param line line number. 157 * @param column column number within line. 158 */ 159 public void setCurrentPosition(int line, int column) { 160 htmlParser.setLineNumber(line); 161 htmlParser.setColumnNumber(column); 162 } 163 164 /** 165 * Returns the name of the resource currently being auto escaped. 166 */ 167 public String getResourceName() { 168 return resourceName; 169 } 170 171 /** 172 * Returns the current line number within the resource being auto escaped. 173 */ 174 public int getLineNumber() { 175 return htmlParser.getLineNumber(); 176 } 177 178 /** 179 * Returns the current column number within the resource being auto escaped. 180 */ 181 public int getColumnNumber() { 182 return htmlParser.getColumnNumber(); 183 } 184 185 private HtmlParser createHtmlParser(EscapeMode mode) { 186 switch (mode) { 187 case ESCAPE_AUTO: 188 case ESCAPE_AUTO_HTML: 189 return HtmlParserFactory.createParser(); 190 191 case ESCAPE_AUTO_JS_UNQUOTED: 192 // <script>START HERE 193 return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, null); 194 195 case ESCAPE_AUTO_JS: 196 // <script> var a = 'START HERE 197 return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, jsModeOption); 198 199 case ESCAPE_AUTO_STYLE: 200 // <style>START HERE 201 return HtmlParserFactory.createParserInMode(HtmlParser.Mode.CSS, null); 202 203 case ESCAPE_AUTO_ATTR: 204 // <input text="START HERE 205 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, true, null); 206 207 case ESCAPE_AUTO_UNQUOTED_ATTR: 208 // <input text=START HERE 209 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, false, null); 210 211 case ESCAPE_AUTO_ATTR_URI: 212 // <a href="http://www.google.com/a?START HERE 213 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true, 214 partialUrlAttributeOption); 215 216 case ESCAPE_AUTO_UNQUOTED_ATTR_URI: 217 // <a href=http://www.google.com/a?START HERE 218 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false, 219 partialUrlAttributeOption); 220 221 case ESCAPE_AUTO_ATTR_URI_START: 222 // <a href="START HERE 223 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true, null); 224 225 case ESCAPE_AUTO_UNQUOTED_ATTR_URI_START: 226 // <a href=START HERE 227 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false, null); 228 229 case ESCAPE_AUTO_ATTR_JS: 230 // <input onclick="doClick('START HERE 231 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true, 232 quotedJsAttributeOption); 233 234 case ESCAPE_AUTO_ATTR_UNQUOTED_JS: 235 // <input onclick="doClick(START HERE 236 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true, null); 237 238 case ESCAPE_AUTO_UNQUOTED_ATTR_JS: 239 // <input onclick=doClick('START HERE 240 throw new JSilverAutoEscapingException( 241 "Attempting to start HTML parser in unsupported mode" + mode, resourceName); 242 243 case ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS: 244 // <input onclick=doClick(START HERE 245 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, false, null); 246 247 case ESCAPE_AUTO_ATTR_CSS: 248 // <input style="START HERE 249 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, true, null); 250 251 case ESCAPE_AUTO_UNQUOTED_ATTR_CSS: 252 // <input style=START HERE 253 return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, false, null); 254 255 default: 256 throw new JSilverAutoEscapingException("Attempting to start HTML parser in invalid mode" 257 + mode, resourceName); 258 } 259 } 260 261 /** 262 * Parse the given data and update internal state accordingly. 263 * 264 * @param data Input to parse, usually the contents of a template. 265 */ 266 public void parseData(String data) { 267 try { 268 htmlParser.parse(data); 269 } catch (ParseException e) { 270 // ParseException displays the proper position, so do not store line and column 271 // number here. 272 throw new JSilverAutoEscapingException("Error in HtmlParser: " + e, resourceName); 273 } 274 } 275 276 /** 277 * Lets the AutoEscapeContext know that some input was skipped. 278 * 279 * This method will usually be called for variables in the input stream. The AutoEscapeContext is 280 * told that the input stream contained some additional data but does not get to see the data. It 281 * can adjust its internal state accordingly. 282 */ 283 public void insertText() { 284 try { 285 htmlParser.insertText(); 286 } catch (ParseException e) { 287 throw new JSilverAutoEscapingException("Error during insertText(): " + e, resourceName, 288 htmlParser.getLineNumber(), htmlParser.getColumnNumber()); 289 } 290 } 291 292 /** 293 * Determines whether an included template that begins in state {@code start} is allowed to end in 294 * state {@code end}. Usually included templates are only allowed to end in the same context they 295 * begin in. This lets auto escaping parse the remainder of the parent template without needing to 296 * know the ending context of the included template. However, there is one exception where auto 297 * escaping will allow a different ending context: if the included template is a URI attribute 298 * value, it is allowed to change context from {@code ATTR_URI_START} to {@code ATTR_URI}. This 299 * does not cause any issues because the including template will call {@code insertText} when it 300 * encounters the include command, and {@code insertText} will cause the HTML parser to switch its 301 * internal state in the same way. 302 */ 303 public boolean isPermittedStateChangeForIncludes(AutoEscapeState start, AutoEscapeState end) { 304 return start.equals(end) 305 || (start.equals(AutoEscapeState.ATTR_URI_START) && end.equals(AutoEscapeState.ATTR_URI)) 306 || (start.equals(AutoEscapeState.UNQUOTED_ATTR_URI_START) && end 307 .equals(AutoEscapeState.UNQUOTED_ATTR_URI)); 308 } 309 310 /** 311 * Determine the correct escaping to apply for a variable. 312 * 313 * Looks at the current state of the htmlParser, and determines what escaping to apply to a 314 * variable in this state. 315 * 316 * @return Name of escaping function to use in this state. 317 */ 318 public String getEscapingFunctionForCurrentState() { 319 return getCurrentState().getFunctionName(); 320 } 321 322 /** 323 * Returns the EscapeMode which will bring AutoEscapeContext into this state. 324 * 325 * Initializing a new AutoEscapeContext with this EscapeMode will bring it into the state that the 326 * current AutoEscapeContext object is in. 327 * 328 * @return An EscapeMode object. 329 */ 330 public EscapeMode getEscapeModeForCurrentState() { 331 return getCurrentState().getEscapeMode(); 332 } 333 334 /** 335 * Calls the HtmlParser API to determine current state. 336 * 337 * This function is mostly a wrapper around the HtmlParser API. It gathers all the necessary 338 * information using that API and returns a single enum representing the current state. 339 * 340 * @return AutoEscapeState enum representing the current state. 341 */ 342 public AutoEscapeState getCurrentState() { 343 ExternalState state = htmlParser.getState(); 344 String tag = htmlParser.getTag(); 345 346 // Currently we do not do any escaping inside CSS blocks, so ignore them. 347 if (state.equals(HtmlParser.STATE_CSS_FILE) || tag.equals("style")) { 348 349 return AutoEscapeState.STYLE; 350 } 351 352 // Handle variables inside <script> tags. 353 if (htmlParser.inJavascript() && !state.equals(HtmlParser.STATE_VALUE)) { 354 if (htmlParser.isJavascriptQuoted()) { 355 // <script> var a = "<?cs var: Blah ?>"; </script> 356 return AutoEscapeState.JS; 357 } else { 358 // <script> var a = <?cs var: Blah ?>; </script> 359 // No quotes around the variable, hence it can inject arbitrary javascript. 360 // So severely restrict the values it may contain. 361 return AutoEscapeState.JS_UNQUOTED; 362 } 363 } 364 365 // Inside an HTML tag or attribute name 366 if (state.equals(HtmlParser.STATE_ATTR) || state.equals(HtmlParser.STATE_TAG)) { 367 return AutoEscapeState.ATTR; 368 // TODO: Need a strict validation function for tag and attribute names. 369 } else if (state.equals(HtmlParser.STATE_VALUE)) { 370 // Inside an HTML attribute value 371 return getCurrentAttributeState(); 372 } else if (state.equals(HtmlParser.STATE_COMMENT) || state.equals(HtmlParser.STATE_TEXT)) { 373 // Default is assumed to be HTML body 374 // <b>Hello <?cs var: UserName ?></b> : 375 return AutoEscapeState.HTML; 376 } 377 378 throw new JSilverAutoEscapingException("Invalid state received from HtmlParser: " 379 + state.toString(), resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); 380 } 381 382 private AutoEscapeState getCurrentAttributeState() { 383 HtmlParser.ATTR_TYPE type = htmlParser.getAttributeType(); 384 boolean attrQuoted = htmlParser.isAttributeQuoted(); 385 386 switch (type) { 387 case REGULAR: 388 // <input value="<?cs var: Blah ?>"> : 389 if (attrQuoted) { 390 return AutoEscapeState.ATTR; 391 } else { 392 return AutoEscapeState.UNQUOTED_ATTR; 393 } 394 395 case URI: 396 if (htmlParser.isUrlStart()) { 397 // <a href="<?cs var: X ?>"> 398 if (attrQuoted) { 399 return AutoEscapeState.ATTR_URI_START; 400 } else { 401 return AutoEscapeState.UNQUOTED_ATTR_URI_START; 402 } 403 } else { 404 // <a href="http://www.google.com/a?x=<?cs var: X ?>"> 405 if (attrQuoted) { 406 // TODO: Html escaping because that is what Clearsilver does right now. 407 // May change this to url escaping soon. 408 return AutoEscapeState.ATTR_URI; 409 } else { 410 return AutoEscapeState.UNQUOTED_ATTR_URI; 411 } 412 } 413 414 case JS: 415 if (htmlParser.isJavascriptQuoted()) { 416 /* 417 * Note: js_escape() hex encodes all html metacharacters. Therefore it is safe to not do 418 * an HTML escape around this. 419 */ 420 if (attrQuoted) { 421 // <input onclick="alert('<?cs var:Blah ?>');"> 422 return AutoEscapeState.ATTR_JS; 423 } else { 424 // <input onclick=alert('<?cs var: Blah ?>');> 425 return AutoEscapeState.UNQUOTED_ATTR_JS; 426 } 427 } else { 428 if (attrQuoted) { 429 /* <input onclick="alert(<?cs var:Blah ?>);"> */ 430 return AutoEscapeState.ATTR_UNQUOTED_JS; 431 } else { 432 433 /* <input onclick=alert(<?cs var:Blah ?>);> */ 434 return AutoEscapeState.UNQUOTED_ATTR_UNQUOTED_JS; 435 } 436 } 437 438 case STYLE: 439 // <input style="border:<?cs var: FancyBorder ?>"> : 440 if (attrQuoted) { 441 return AutoEscapeState.ATTR_CSS; 442 } else { 443 return AutoEscapeState.UNQUOTED_ATTR_CSS; 444 } 445 446 default: 447 throw new JSilverAutoEscapingException("Invalid attribute type in HtmlParser: " + type, 448 resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); 449 } 450 } 451 452 /** 453 * Resets the state of the underlying html parser to a state consistent with the {@code 454 * contentType} provided. This method should be used when the starting auto escaping context of a 455 * resource cannot be determined from its contents - for example, a CSS stylesheet or a javascript 456 * source file. 457 * 458 * @param contentType MIME type header representing the content being parsed. 459 * @see #CONTENT_TYPE_LIST 460 */ 461 public void setContentType(String contentType) { 462 HtmlParser.Mode mode = CONTENT_TYPE_LIST.get(contentType); 463 if (mode == null) { 464 throw new JSilverAutoEscapingException("Invalid content type specified: " + contentType, 465 resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); 466 467 } 468 htmlParser.resetMode(mode); 469 } 470 471 /** 472 * Enum representing states of the data being parsed. 473 * 474 * This enumeration lists all the states in which autoescaping would have some effect. 475 * 476 */ 477 public static enum AutoEscapeState { 478 HTML("html", ESCAPE_AUTO_HTML), JS("js", ESCAPE_AUTO_JS), STYLE("css", ESCAPE_AUTO_STYLE), JS_UNQUOTED( 479 "js_check_number", ESCAPE_AUTO_JS_UNQUOTED), ATTR("html", ESCAPE_AUTO_ATTR), UNQUOTED_ATTR( 480 "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR), ATTR_URI("html", ESCAPE_AUTO_ATTR_URI), UNQUOTED_ATTR_URI( 481 "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_URI), ATTR_URI_START("url_validate", 482 ESCAPE_AUTO_ATTR_URI_START), UNQUOTED_ATTR_URI_START("url_validate_unquoted", 483 ESCAPE_AUTO_UNQUOTED_ATTR_URI_START), ATTR_JS("js", ESCAPE_AUTO_ATTR_JS), ATTR_UNQUOTED_JS( 484 "js_check_number", ESCAPE_AUTO_ATTR_UNQUOTED_JS), UNQUOTED_ATTR_JS("js_attr_unquoted", 485 ESCAPE_AUTO_UNQUOTED_ATTR_JS), UNQUOTED_ATTR_UNQUOTED_JS("js_check_number", 486 ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS), ATTR_CSS("css", ESCAPE_AUTO_ATTR_CSS), UNQUOTED_ATTR_CSS( 487 "css_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_CSS); 488 489 private final String functionName; 490 private final EscapeMode escapeMode; 491 492 private AutoEscapeState(String functionName, EscapeMode mode) { 493 this.functionName = functionName; 494 this.escapeMode = mode; 495 } 496 497 public String getFunctionName() { 498 return functionName; 499 } 500 501 public EscapeMode getEscapeMode() { 502 return escapeMode; 503 } 504 } 505 } 506