1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.mail.utils; 17 18 import android.os.Looper; 19 import android.util.Log; 20 21 import com.android.mail.perf.Timer; 22 import com.google.common.collect.ImmutableList; 23 import com.google.common.collect.ImmutableSet; 24 25 import org.owasp.html.AttributePolicy; 26 import org.owasp.html.CssSchema; 27 import org.owasp.html.ElementPolicy; 28 import org.owasp.html.FilterUrlByProtocolAttributePolicy; 29 import org.owasp.html.Handler; 30 import org.owasp.html.HtmlPolicyBuilder; 31 import org.owasp.html.HtmlStreamRenderer; 32 import org.owasp.html.PolicyFactory; 33 34 import java.util.List; 35 36 /** 37 * This sanitizer is meant to strip all scripts and any malicious HTML from untrusted emails. It 38 * uses the <a href="https://www.owasp.org/index.php/OWASP_Java_HTML_Sanitizer_Project">OWASP Java 39 * HTML Sanitizer Project</a> to whitelist the subset of HTML elements and attributes as well as CSS 40 * properties that are considered safe. Any unmatched HTML or CSS is discarded. 41 * 42 * All URLS are scrubbed to ensure they match the blessed form of "http://the.url.here", 43 * "https://the.url.here" or "mailto:address (at) server.com" and cannot resemble "javascript:badness()" 44 * or comparable. 45 */ 46 public final class HtmlSanitizer { 47 48 /** 49 * This version number should be bumped each time a meaningful change is made to this sanitizer 50 * configuration which influences its output. It is compared against a minimum target version 51 * number. If it meets or exceeds the minimum target version, the result of the sanitizer is 52 * free to be shown in a standard webview. If it does not meet the minimum target version then 53 * the sanitized output is deemed untrustworthy and is shown in a sandboxed webview with 54 * javascript execution disabled. 55 */ 56 public static final int VERSION = 1; 57 58 private static final String LOG_TAG = LogTag.getLogTag(); 59 60 /** 61 * The following CSS properties do not appear in the default whitelist from OWASP, but they 62 * improve the fidelity of the HTML display without unacceptable risk. 63 */ 64 private static final CssSchema ADDITIONAL_CSS = CssSchema.withProperties(ImmutableSet.of( 65 "float", 66 "display" 67 )); 68 69 /** 70 * Translates the body tag into the div tag 71 */ 72 private static final ElementPolicy TRANSLATE_BODY_TO_DIV = new ElementPolicy() { 73 public String apply(String elementName, List<String> attrs) { 74 return "div"; 75 } 76 }; 77 78 /** 79 * Translates <div> tags surrounding quoted text into <div class="elided-text"> which allows 80 * quoted text collapsing in ConversationViewFragment. 81 */ 82 private static final ElementPolicy TRANSLATE_DIV_CLASS = new ElementPolicy() { 83 public String apply(String elementName, List<String> attrs) { 84 boolean showHideQuotedText = false; 85 86 // check if the class attribute is listed 87 final int classIndex = attrs.indexOf("class"); 88 if (classIndex >= 0) { 89 // remove the class attribute and its value 90 final String value = attrs.remove(classIndex + 1); 91 attrs.remove(classIndex); 92 93 // gmail and yahoo use a specific div class name to indicate quoted text 94 showHideQuotedText = "gmail_quote".equals(value) || "yahoo_quoted".equals(value); 95 } 96 97 // check if the id attribute is listed 98 final int idIndex = attrs.indexOf("id"); 99 if (idIndex >= 0) { 100 // remove the id attribute and its value 101 final String value = attrs.remove(idIndex + 1); 102 attrs.remove(idIndex); 103 104 // AOL uses a specific id value to indicate quoted text 105 showHideQuotedText = value.startsWith("AOLMsgPart"); 106 } 107 108 // insert a class attribute with a value of "elided-text" to hide/show quoted text 109 if (showHideQuotedText) { 110 attrs.add("class"); 111 attrs.add("elided-text"); 112 } 113 114 return "div"; 115 } 116 }; 117 118 /** 119 * Disallow "cid:" and "mailto:" urls on all tags not <a> or <img>. 120 */ 121 private static final AttributePolicy URL_PROTOCOLS = 122 new FilterUrlByProtocolAttributePolicy(ImmutableList.of("http", "https")); 123 124 /** 125 * Disallow the "cid:" url on links. Do allow "mailto:" urls to support sending mail. 126 */ 127 private static final AttributePolicy A_HREF_PROTOCOLS = 128 new FilterUrlByProtocolAttributePolicy(ImmutableList.of("mailto", "http", "https")); 129 130 /** 131 * Disallow the "mailto:" url on images so that "Show pictures" can't be used to start composing 132 * a bajillion emails. Do allow "cid:" urls to support inline image attachments. 133 */ 134 private static final AttributePolicy IMG_SRC_PROTOCOLS = 135 new FilterUrlByProtocolAttributePolicy(ImmutableList.of("cid", "http", "https")); 136 137 /** 138 * This sanitizer policy removes these elements and the content within: 139 * <ul> 140 * <li>APPLET</li> 141 * <li>FRAMESET</li> 142 * <li>OBJECT</li> 143 * <li>SCRIPT</li> 144 * <li>STYLE</li> 145 * <li>TITLE</li> 146 * </ul> 147 * 148 * This sanitizer policy removes these elements but preserves the content within: 149 * <ul> 150 * <li>BASEFONT</li> 151 * <li>FRAME</li> 152 * <li>HEAD</li> 153 * <li>IFRAME</li> 154 * <li>ISINDEX</li> 155 * <li>LINK</li> 156 * <li>META</li> 157 * <li>NOFRAMES</li> 158 * <li>PARAM</li> 159 * <li>NOSCRIPT</li> 160 * </ul> 161 * 162 * This sanitizer policy removes these attributes from all elements: 163 * <ul> 164 * <li>code</li> 165 * <li>codebase</li> 166 * <li>id</li> 167 * <li>for</li> 168 * <li>headers</li> 169 * <li>onblur</li> 170 * <li>onchange</li> 171 * <li>onclick</li> 172 * <li>ondblclick</li> 173 * <li>onfocus</li> 174 * <li>onkeydown</li> 175 * <li>onkeypress</li> 176 * <li>onkeyup</li> 177 * <li>onload</li> 178 * <li>onmousedown</li> 179 * <li>onmousemove</li> 180 * <li>onmouseout</li> 181 * <li>onmouseover</li> 182 * <li>onmouseup</li> 183 * <li>onreset</li> 184 * <li>onselect</li> 185 * <li>onsubmit</li> 186 * <li>onunload</li> 187 * <li>tabindex</li> 188 * </ul> 189 */ 190 private static final PolicyFactory POLICY_DEFINITION = new HtmlPolicyBuilder() 191 .allowAttributes("dir").matching(true, "ltr", "rtl").globally() 192 .allowUrlProtocols("cid", "http", "https", "mailto") 193 .allowStyling(CssSchema.union(CssSchema.DEFAULT, ADDITIONAL_CSS)) 194 .disallowTextIn("applet", "frameset", "object", "script", "style", "title") 195 .allowElements("a") 196 .allowAttributes("coords", "name", "shape").onElements("a") 197 .allowAttributes("href").matching(A_HREF_PROTOCOLS).onElements("a") 198 .allowElements("abbr").allowAttributes("title").onElements("abbr") 199 .allowElements("acronym").allowAttributes("title").onElements("acronym") 200 .allowElements("address") 201 .allowElements("area") 202 .allowAttributes("alt", "coords", "nohref", "name", "shape").onElements("area") 203 .allowAttributes("href").matching(URL_PROTOCOLS).onElements("area") 204 .allowElements("article") 205 .allowElements("aside") 206 .allowElements("b") 207 .allowElements("base") 208 .allowAttributes("href").matching(URL_PROTOCOLS).onElements("base") 209 .allowElements("bdi").allowAttributes("dir").onElements("bdi") 210 .allowElements("bdo").allowAttributes("dir").onElements("bdo") 211 .allowElements("big") 212 .allowElements("blockquote").allowAttributes("cite").onElements("blockquote") 213 .allowElements(TRANSLATE_BODY_TO_DIV, "body") 214 .allowElements("br").allowAttributes("clear").onElements("br") 215 .allowElements("button") 216 .allowAttributes("autofocus", "disabled", "form", "formaction", "formenctype", 217 "formmethod", "formnovalidate", "formtarget", "name", "type", "value") 218 .onElements("button") 219 .allowElements("canvas").allowAttributes("width", "height").onElements("canvas") 220 .allowElements("caption").allowAttributes("align").onElements("caption") 221 .allowElements("center") 222 .allowElements("cite") 223 .allowElements("code") 224 .allowElements("col") 225 .allowAttributes("align", "bgcolor", "char", "charoff", "span", "valign", "width") 226 .onElements("col") 227 .allowElements("colgroup") 228 .allowAttributes("align", "char", "charoff", "span", "valign", "width") 229 .onElements("colgroup") 230 .allowElements("datalist") 231 .allowElements("dd") 232 .allowElements("del").allowAttributes("cite", "datetime").onElements("del") 233 .allowElements("details") 234 .allowElements("dfn") 235 .allowElements("dir").allowAttributes("compact").onElements("dir") 236 .allowElements(TRANSLATE_DIV_CLASS, "div") 237 .allowAttributes("align", "background", "class", "id") 238 .onElements("div") 239 .allowElements("dl") 240 .allowElements("dt") 241 .allowElements("em") 242 .allowElements("fieldset") 243 .allowAttributes("disabled", "form", "name") 244 .onElements("fieldset") 245 .allowElements("figcaption") 246 .allowElements("figure") 247 .allowElements("font").allowAttributes("color", "face", "size").onElements("font") 248 .allowElements("footer") 249 .allowElements("form") 250 .allowAttributes("accept", "action", "accept-charset", "autocomplete", "enctype", 251 "method", "name", "novalidate", "target") 252 .onElements("form") 253 .allowElements("header") 254 .allowElements("h1").allowAttributes("align").onElements("h1") 255 .allowElements("h2").allowAttributes("align").onElements("h2") 256 .allowElements("h3").allowAttributes("align").onElements("h3") 257 .allowElements("h4").allowAttributes("align").onElements("h4") 258 .allowElements("h5").allowAttributes("align").onElements("h5") 259 .allowElements("h6").allowAttributes("align").onElements("h6") 260 .allowElements("hr") 261 .allowAttributes("align", "noshade", "size", "width") 262 .onElements("hr") 263 .allowElements("i") 264 .allowElements("img") 265 .allowAttributes("src").matching(IMG_SRC_PROTOCOLS).onElements("img") 266 .allowAttributes("longdesc").matching(URL_PROTOCOLS).onElements("img") 267 .allowAttributes("align", "alt", "border", "crossorigin", "height", "hspace", 268 "ismap", "usemap", "vspace", "width") 269 .onElements("img") 270 .allowElements("input") 271 .allowAttributes("src").matching(URL_PROTOCOLS).onElements("input") 272 .allowAttributes("formaction").matching(URL_PROTOCOLS).onElements("input") 273 .allowAttributes("accept", "align", "alt", "autocomplete", "autofocus", "checked", 274 "disabled", "form", "formenctype", "formmethod", "formnovalidate", 275 "formtarget", "height", "list", "max", "maxlength", "min", "multiple", 276 "name", "pattern", "placeholder", "readonly", "required", "size", "step", 277 "type", "value", "width") 278 .onElements("input") 279 .allowElements("ins") 280 .allowAttributes("cite").matching(URL_PROTOCOLS).onElements("ins") 281 .allowAttributes("datetime").onElements("ins") 282 .allowElements("kbd") 283 .allowElements("keygen") 284 .allowAttributes("autofocus", "challenge", "disabled", "form", "keytype", "name") 285 .onElements("keygen") 286 .allowElements("label").allowAttributes("form").onElements("label") 287 .allowElements("legend").allowAttributes("align").onElements("legend") 288 .allowElements("li").allowAttributes("type", "value").onElements("li") 289 .allowElements("main") 290 .allowElements("map").allowAttributes("name").onElements("map") 291 .allowElements("mark") 292 .allowElements("menu").allowAttributes("label", "type").onElements("menu") 293 .allowElements("menuitem") 294 .allowAttributes("icon").matching(URL_PROTOCOLS).onElements("menuitem") 295 .allowAttributes("checked", "command", "default", "disabled", "label", "type", 296 "radiogroup").onElements("menuitem") 297 .allowElements("meter") 298 .allowAttributes("form", "high", "low", "max", "min", "optimum", "value") 299 .onElements("meter") 300 .allowElements("nav") 301 .allowElements("ol") 302 .allowAttributes("compact", "reversed", "start", "type") 303 .onElements("ol") 304 .allowElements("optgroup").allowAttributes("disabled", "label").onElements("optgroup") 305 .allowElements("option") 306 .allowAttributes("disabled", "label", "selected", "value") 307 .onElements("option") 308 .allowElements("output").allowAttributes("form", "name").onElements("output") 309 .allowElements("p").allowAttributes("align").onElements("p") 310 .allowElements("pre").allowAttributes("width").onElements("pre") 311 .allowElements("progress").allowAttributes("max", "value").onElements("progress") 312 .allowElements("q").allowAttributes("cite").matching(URL_PROTOCOLS).onElements("q") 313 .allowElements("rp") 314 .allowElements("rt") 315 .allowElements("ruby") 316 .allowElements("s") 317 .allowElements("samp") 318 .allowElements("section") 319 .allowElements("select") 320 .allowAttributes("autofocus", "disabled", "form", "multiple", "name", "required", 321 "size") 322 .onElements("select") 323 .allowElements("small") 324 .allowElements("span") 325 .allowElements("strike") 326 .allowElements("strong") 327 .allowElements("sub") 328 .allowElements("summary") 329 .allowElements("sup") 330 .allowElements("table") 331 .allowAttributes("align", "bgcolor", "border", "cellpadding", "cellspacing", 332 "frame", "rules", "sortable", "summary", "width") 333 .onElements("table") 334 .allowElements("tbody") 335 .allowAttributes("align", "char", "charoff", "valign").onElements("tbody") 336 .allowElements("td") 337 .allowAttributes("abbr", "align", "axis", "bgcolor", "char", "charoff", "colspan", 338 "height", "nowrap", "rowspan", "scope", "valign", "width") 339 .onElements("td") 340 .allowElements("textarea") 341 .allowAttributes("autofocus", "cols", "disabled", "form", "maxlength", "name", 342 "placeholder", "readonly", "required", "rows", "wrap") 343 .onElements("textarea") 344 .allowElements("tfoot") 345 .allowAttributes("align", "char", "charoff", "valign").onElements("tfoot") 346 .allowElements("th") 347 .allowAttributes("abbr", "align", "axis", "bgcolor", "char", "charoff", "colspan", 348 "height", "nowrap", "rowspan", "scope", "sorted", "valign", "width") 349 .onElements("th") 350 .allowElements("thead") 351 .allowAttributes("align", "char", "charoff", "valign").onElements("thead") 352 .allowElements("time").allowAttributes("datetime").onElements("time") 353 .allowElements("tr") 354 .allowAttributes("align", "bgcolor", "char", "charoff", "valign").onElements("tr") 355 .allowElements("tt") 356 .allowElements("u") 357 .allowElements("ul").allowAttributes("compact", "type").onElements("ul") 358 .allowElements("var") 359 .allowElements("wbr") 360 .toFactory(); 361 362 private HtmlSanitizer() {} 363 364 /** 365 * Sanitizing email is treated as an expensive operation; this method should be called from 366 * a background Thread. 367 * 368 * @param rawHtml the unsanitized, suspicious html 369 * @return the sanitized form of the <code>rawHtml</code>; <code>null</code> if 370 * <code>rawHtml</code> was <code>null</code> 371 */ 372 public static String sanitizeHtml(final String rawHtml) { 373 if (Looper.getMainLooper() == Looper.myLooper()) { 374 throw new IllegalStateException("sanitizing email should not occur on the main thread"); 375 } 376 377 if (rawHtml == null) { 378 return null; 379 } 380 381 // create the builder into which the sanitized email will be written 382 final StringBuilder htmlBuilder = new StringBuilder(rawHtml.length()); 383 384 // create the renderer that will write the sanitized HTML to the builder 385 final HtmlStreamRenderer renderer = HtmlStreamRenderer.create( 386 htmlBuilder, 387 Handler.PROPAGATE, 388 // log errors resulting from exceptionally bizarre inputs 389 new Handler<String>() { 390 public void handle(final String x) { 391 Log.wtf(LOG_TAG, "Mangled HTML content cannot be parsed: " + x); 392 throw new AssertionError(x); 393 } 394 } 395 ); 396 397 // create a thread-specific policy 398 final org.owasp.html.HtmlSanitizer.Policy policy = POLICY_DEFINITION.apply(renderer); 399 400 // run the html through the sanitizer 401 Timer.startTiming("sanitizingHTMLEmail"); 402 try { 403 org.owasp.html.HtmlSanitizer.sanitize(rawHtml, policy); 404 } finally { 405 Timer.stopTiming("sanitizingHTMLEmail"); 406 } 407 408 // return the resulting HTML from the builder 409 return htmlBuilder.toString(); 410 } 411 } 412