Home | History | Annotate | Download | only in html
      1 // Copyright (c) 2011, Mike Samuel
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions
      6 // are met:
      7 //
      8 // Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 // Redistributions in binary form must reproduce the above copyright
     11 // notice, this list of conditions and the following disclaimer in the
     12 // documentation and/or other materials provided with the distribution.
     13 // Neither the name of the OWASP nor the names of its contributors may
     14 // be used to endorse or promote products derived from this software
     15 // without specific prior written permission.
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27 // POSSIBILITY OF SUCH DAMAGE.
     28 
     29 package org.owasp.html;
     30 
     31 import java.util.List;
     32 import java.util.Map;
     33 import java.util.Set;
     34 import java.util.regex.Pattern;
     35 
     36 import javax.annotation.Nullable;
     37 import javax.annotation.concurrent.NotThreadSafe;
     38 
     39 import com.google.common.base.Predicate;
     40 import com.google.common.collect.ImmutableList;
     41 import com.google.common.collect.ImmutableMap;
     42 import com.google.common.collect.ImmutableSet;
     43 import com.google.common.collect.Maps;
     44 import com.google.common.collect.Sets;
     45 
     46 
     47 /**
     48  * Conveniences for configuring policies for the {@link HtmlSanitizer}.
     49  *
     50  * <h3>Usage</h3>
     51  * <p>
     52  * To create a policy, first construct an instance of this class; then call
     53  * <code>allow&hellip;</code> methods to turn on tags, attributes, and other
     54  * processing modes; and finally call <code>build(renderer)</code> or
     55  * <code>toFactory()</code>.
     56  * </p>
     57  * <pre class="prettyprint lang-java">
     58  * // Define the policy.
     59  * Function&lt;HtmlStreamEventReceiver, HtmlSanitizer.Policy&gt; policy
     60  *     = new HtmlPolicyBuilder()
     61  *         .allowElements("a", "p")
     62  *         .allowAttributes("href").onElements("a")
     63  *         .toFactory();
     64  *
     65  * // Sanitize your output.
     66  * HtmlSanitizer.sanitize(myHtml, policy.apply(myHtmlStreamRenderer));
     67  * </pre>
     68  *
     69  * <h3>Embedded Content</h3>
     70  * <p>
     71  * Embedded URLs are filtered by
     72  * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}.
     73  * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy}
     74  * so you can easily white-list widely used policies that don't violate the
     75  * current pages origin.  See "Customization" below for ways to do further
     76  * filtering.  If you allow links it might be worthwhile to
     77  * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require}
     78  * {@code rel=nofollow}.
     79  * </p>
     80  * <p>
     81  * This class simply throws out all embedded JS.
     82  * Use a custom element or attribute policy to allow through
     83  * signed or otherwise known-safe code.
     84  * Check out the Caja project if you need a way to contain third-party JS.
     85  * </p>
     86  * <p>
     87  * This class does not attempt to faithfully parse and sanitize CSS.
     88  * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option
     89  * that allows through a few CSS properties that allow textual styling, but that
     90  * disallow image loading, history stealing, layout breaking, code execution,
     91  * etc.
     92  * </p>
     93  *
     94  * <h3>Customization</h3>
     95  * <p>
     96  * You can easily do custom processing on tags and attributes by supplying your
     97  * own {@link ElementPolicy element policy} or
     98  * {@link AttributePolicy attribute policy} when calling
     99  * <code>allow&hellip;</code>.
    100  * E.g. to convert headers into {@code <div>}s, you could use an element policy
    101  * </p>
    102  * <pre class="prettyprint lang-java">
    103  * new HtmlPolicyBuilder()
    104  *   .allowElement(
    105  *     new ElementPolicy() {
    106  *       public String apply(String elementName, List&lt;String> attributes) {
    107  *         attributes.add("class");
    108  *         attributes.add("header-" + elementName);
    109  *         return "div";
    110  *       }
    111  *     },
    112  *     "h1", "h2", "h3", "h4", "h5", "h6")
    113  *   .build(outputChannel)
    114  * </pre>
    115  *
    116  * <h3>Rules of Thumb</h3>
    117  * <p>
    118  * Throughout this class, several rules hold:
    119  * <ul>
    120  *   <li>Everything is denied by default.  There are
    121  *     <code>disallow&hellip;</code> methods, but those reverse
    122  *     allows instead of rolling back overly permissive defaults.
    123  *   <li>The order of allows and disallows does not matter.
    124  *     Disallows trump allows whether they occur before or after them.
    125  *     The only method that needs to be called in a particular place is
    126  *     {@link HtmlPolicyBuilder#build}.
    127  *     Allows or disallows after {@code build} is called have no
    128  *     effect on the already built policy.
    129  *   <li>Element and attribute policies are applied in the following order:
    130  *     element specific attribute policy, global attribute policy, element
    131  *     policy.
    132  *     Element policies come last so they can observe all the post-processed
    133  *     attributes, and so they can add attributes that are exempt from
    134  *     attribute policies.
    135  *     Element specific policies go first, so they can normalize content to
    136  *     a form that might be acceptable to a more simplistic global policy.
    137  * </ul>
    138  *
    139  * <h3>Thread safety and efficiency</h3>
    140  * <p>
    141  * This class is not thread-safe.  The resulting policy will not violate its
    142  * security guarantees as a result of race conditions, but is not thread safe
    143  * because it maintains state to track whether text inside disallowed elements
    144  * should be suppressed.
    145  * <p>
    146  * The resulting policy can be reused, but if you use the
    147  * {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then
    148  * binding policies to output channels is cheap so there's no need.
    149  * </p>
    150  *
    151  * @author Mike Samuel <mikesamuel (at) gmail.com>
    152  */
    153 @TCB
    154 @NotThreadSafe
    155 public class HtmlPolicyBuilder {
    156   /**
    157    * The default set of elements that are removed if they have no attributes.
    158    * Since {@code <img>} is in this set, by default, a policy will remove
    159    * {@code <img src=javascript:alert(1337)>} because its URL is not allowed
    160    * and it has no other attributes that would warrant it appearing in the
    161    * output.
    162    */
    163   public static final ImmutableSet<String> DEFAULT_SKIP_IF_EMPTY
    164       = ImmutableSet.of("a", "font", "img", "input", "span");
    165 
    166   private final Map<String, ElementPolicy> elPolicies = Maps.newLinkedHashMap();
    167   private final Map<String, Map<String, AttributePolicy>> attrPolicies
    168       = Maps.newLinkedHashMap();
    169   private final Map<String, AttributePolicy> globalAttrPolicies
    170       = Maps.newLinkedHashMap();
    171   private final Set<String> allowedProtocols = Sets.newLinkedHashSet();
    172   private final Set<String> skipIfEmpty = Sets.newLinkedHashSet(
    173       DEFAULT_SKIP_IF_EMPTY);
    174   private final Map<String, Boolean> textContainers = Maps.newLinkedHashMap();
    175   private boolean requireRelNofollowOnLinks;
    176 
    177   /**
    178    * Allows the named elements.
    179    */
    180   public HtmlPolicyBuilder allowElements(String... elementNames) {
    181     return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames);
    182   }
    183 
    184   /**
    185    * Disallows the named elements.  Elements are disallowed by default, so
    186    * there is no need to disallow elements, unless you are making an exception
    187    * based on an earlier allow.
    188    */
    189   public HtmlPolicyBuilder disallowElements(String... elementNames) {
    190     return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames);
    191   }
    192 
    193   /**
    194    * Allow the given elements with the given policy.
    195    *
    196    * @param policy May remove or add attributes, change the element name, or
    197    *    deny the element.
    198    */
    199   public HtmlPolicyBuilder allowElements(
    200       ElementPolicy policy, String... elementNames) {
    201     invalidateCompiledState();
    202     for (String elementName : elementNames) {
    203       elementName = HtmlLexer.canonicalName(elementName);
    204       ElementPolicy newPolicy = ElementPolicy.Util.join(
    205           elPolicies.get(elementName), policy);
    206       // Don't remove if newPolicy is the always reject policy since we want
    207       // that to infect later allowElement calls for this particular element
    208       // name.  rejects should have higher priority than allows.
    209       elPolicies.put(elementName, newPolicy);
    210       if (!textContainers.containsKey(elementName)
    211           && TagBalancingHtmlStreamEventReceiver
    212               .allowsPlainTextualContent(elementName)) {
    213         textContainers.put(elementName, true);
    214       }
    215     }
    216     return this;
    217   }
    218 
    219   /**
    220    * A canned policy that allows a number of common formatting elements.
    221    */
    222   public HtmlPolicyBuilder allowCommonInlineFormattingElements() {
    223     return allowElements(
    224         "b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong",
    225         "strike", "tt", "code", "big", "small", "br", "span");
    226   }
    227 
    228   /**
    229    * A canned policy that allows a number of common block elements.
    230    */
    231   public HtmlPolicyBuilder allowCommonBlockElements() {
    232     return allowElements(
    233         "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li",
    234         "blockquote");
    235   }
    236 
    237   /**
    238    * Allows text content in the named elements.
    239    * By default, text content is allowed in any
    240    * {@link #allowElements allowed elements} that can contain character data per
    241    * the HTML5 spec, but text content is not allowed by default in elements that
    242    * contain content of other kinds (like JavaScript in {@code <script>}
    243    * elements.
    244    * <p>
    245    * To write a policy that whitelists {@code <script>} or {@code <style>}
    246    * elements, first {@code allowTextIn("script")}.
    247    */
    248   public HtmlPolicyBuilder allowTextIn(String... elementNames) {
    249     invalidateCompiledState();
    250     for (String elementName : elementNames) {
    251       elementName = HtmlLexer.canonicalName(elementName);
    252       textContainers.put(elementName, true);
    253     }
    254     return this;
    255   }
    256 
    257   public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
    258     invalidateCompiledState();
    259     for (String elementName : elementNames) {
    260       elementName = HtmlLexer.canonicalName(elementName);
    261       textContainers.put(elementName, false);
    262     }
    263     return this;
    264   }
    265 
    266   /**
    267    * Assuming the given elements are allowed, allows them to appear without
    268    * attributes.
    269    *
    270    * @see #DEFAULT_SKIP_IF_EMPTY
    271    * @see #disallowWithoutAttributes
    272    */
    273   public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
    274     invalidateCompiledState();
    275     for (String elementName : elementNames) {
    276       elementName = HtmlLexer.canonicalName(elementName);
    277       skipIfEmpty.remove(elementName);
    278     }
    279     return this;
    280   }
    281 
    282   /**
    283    * Disallows the given elements from appearing without attributes.
    284    *
    285    * @see #DEFAULT_SKIP_IF_EMPTY
    286    * @see #allowWithoutAttributes
    287    */
    288   public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
    289     invalidateCompiledState();
    290     for (String elementName : elementNames) {
    291       elementName = HtmlLexer.canonicalName(elementName);
    292       skipIfEmpty.add(elementName);
    293     }
    294     return this;
    295   }
    296 
    297   /**
    298    * Returns an object that lets you associate policies with the given
    299    * attributes, and allow them globally or on specific elements.
    300    */
    301   public AttributeBuilder allowAttributes(String... attributeNames) {
    302     ImmutableList.Builder<String> b = ImmutableList.builder();
    303     for (String attributeName : attributeNames) {
    304       b.add(HtmlLexer.canonicalName(attributeName));
    305     }
    306     return new AttributeBuilder(b.build());
    307   }
    308 
    309   /**
    310    * Reverse an earlier attribute {@link #allowAttributes allow}.
    311    * <p>
    312    * For this to have an effect you must call at least one of
    313    * {@link AttributeBuilder#globally} and {@link AttributeBuilder#onElements}.
    314    * <p>
    315    * Attributes are disallowed by default, so there is no need to call this
    316    * with a laundry list of attribute/element pairs.
    317    */
    318   public AttributeBuilder disallowAttributes(String... attributeNames) {
    319     return this.allowAttributes(attributeNames)
    320         .matching(AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY);
    321   }
    322 
    323 
    324   private HtmlPolicyBuilder allowAttributesGlobally(
    325       AttributePolicy policy, List<String> attributeNames) {
    326     invalidateCompiledState();
    327     for (String attributeName : attributeNames) {
    328       // We reinterpret the identity policy later via policy joining since its
    329       // the default passed from the policy-less method, but we don't do
    330       // anything here since we don't know until build() is called whether the
    331       // policy author wants to allow certain URL protocols or wants to deal
    332       // with styles.
    333       AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName);
    334       globalAttrPolicies.put(
    335           attributeName, AttributePolicy.Util.join(oldPolicy, policy));
    336     }
    337     return this;
    338   }
    339 
    340   private HtmlPolicyBuilder allowAttributesOnElements(
    341       AttributePolicy policy, List<String> attributeNames,
    342       List<String> elementNames) {
    343     invalidateCompiledState();
    344     for (String elementName : elementNames) {
    345       Map<String, AttributePolicy> policies = attrPolicies.get(elementName);
    346       if (policies == null) {
    347         policies = Maps.newLinkedHashMap();
    348         attrPolicies.put(elementName, policies);
    349       }
    350       for (String attributeName : attributeNames) {
    351         AttributePolicy oldPolicy = policies.get(attributeName);
    352         policies.put(
    353             attributeName,
    354             AttributePolicy.Util.join(oldPolicy, policy));
    355       }
    356     }
    357     return this;
    358   }
    359 
    360   /**
    361    * Adds <a href="http://en.wikipedia.org/wiki/Nofollow"><code>rel=nofollow</code></a>
    362    * to links.
    363    */
    364   public HtmlPolicyBuilder requireRelNofollowOnLinks() {
    365     invalidateCompiledState();
    366     this.requireRelNofollowOnLinks = true;
    367     return this;
    368   }
    369 
    370   /**
    371    * Adds to the set of protocols that are allowed in URL attributes.
    372    * For each URL attribute that is allowed, we further constrain it by
    373    * only allowing the value through if it specifies no protocol, or if it
    374    * specifies one in the allowedProtocols white-list.
    375    * This is done regardless of whether any protocols have been allowed, so
    376    * allowing the attribute "href" globally with the identity policy but
    377    * not white-listing any protocols, effectively disallows the "href"
    378    * attribute globally.
    379    * <p>
    380    * Do not allow any <code>*script</code> such as <code>javascript</code>
    381    * protocols if you might use this policy with untrusted code.
    382    */
    383   public HtmlPolicyBuilder allowUrlProtocols(String... protocols) {
    384     invalidateCompiledState();
    385     // If there is at least one allowed protocol, then allow URLs and
    386     // add a filter that checks href and src values.
    387 
    388     // Do not allow href and srcs through otherwise, and only allow on images
    389     // and links.
    390     for (String protocol : protocols) {
    391       protocol = Strings.toLowerCase(protocol);
    392       allowedProtocols.add(protocol);
    393     }
    394     return this;
    395   }
    396 
    397   /**
    398    * Reverses a decision made by {@link #allowUrlProtocols}.
    399    */
    400   public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) {
    401     invalidateCompiledState();
    402     for (String protocol : protocols) {
    403       protocol = Strings.toLowerCase(protocol);
    404       allowedProtocols.remove(protocol);
    405     }
    406     return this;
    407   }
    408 
    409   /**
    410    * A canned URL protocol policy that allows <code>http</code>,
    411    * <code>https</code>, and <code>mailto</code>.
    412    */
    413   public HtmlPolicyBuilder allowStandardUrlProtocols() {
    414     return allowUrlProtocols("http", "https", "mailto");
    415   }
    416 
    417   /**
    418    * Convert <code>style="&lt;CSS&gt;"</code> to sanitized CSS which allows
    419    * color, font-size, type-face, and other styling using the default schema;
    420    * but which does not allow content to escape its clipping context.
    421    */
    422   public HtmlPolicyBuilder allowStyling() {
    423     allowStyling(CssSchema.DEFAULT);
    424     return this;
    425   }
    426 
    427   /**
    428    * Convert <code>style="&lt;CSS&gt;"</code> to sanitized CSS which allows
    429    * color, font-size, type-face, and other styling using the given schema.
    430    */
    431   public HtmlPolicyBuilder allowStyling(CssSchema whitelist) {
    432     invalidateCompiledState();
    433     allowAttributesGlobally(
    434         new StylingPolicy(whitelist), ImmutableList.of("style"));
    435     return this;
    436   }
    437 
    438   /**
    439    * Names of attributes from HTML 4 whose values are URLs.
    440    * Other attributes, e.g. <code>style</code> may contain URLs even though
    441    * there values are not URLs.
    442    */
    443   private static final Set<String> URL_ATTRIBUTE_NAMES = ImmutableSet.of(
    444       "action", "archive", "background", "cite", "classid", "codebase", "data",
    445       "dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster",
    446       "profile", "src", "srcset", "usemap");
    447 
    448   /**
    449    * Produces a policy based on the allow and disallow calls previously made.
    450    *
    451    * @param out receives calls to open only tags allowed by
    452    *      previous calls to this object.
    453    *      Typically a {@link HtmlStreamRenderer}.
    454    */
    455   public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) {
    456     return toFactory().apply(out);
    457   }
    458 
    459   /**
    460    * Produces a policy based on the allow and disallow calls previously made.
    461    *
    462    * @param out receives calls to open only tags allowed by
    463    *      previous calls to this object.
    464    *      Typically a {@link HtmlStreamRenderer}.
    465    * @param listener is notified of dropped tags and attributes so that
    466    *      intrusion detection systems can be alerted to questionable HTML.
    467    *      If {@code null} then no notifications are sent.
    468    * @param context if {@code (listener != null)} then the context value passed
    469    *      with alerts.  This can be used to let the listener know from which
    470    *      connection or request the questionable HTML was received.
    471    */
    472   public <CTX> HtmlSanitizer.Policy build(
    473       HtmlStreamEventReceiver out,
    474       @Nullable HtmlChangeListener<? super CTX> listener,
    475       @Nullable CTX context) {
    476     return toFactory().apply(out, listener, context);
    477   }
    478 
    479   /**
    480    * Like {@link #build} but can be reused to create many different policies
    481    * each backed by a different output channel.
    482    */
    483   public PolicyFactory toFactory() {
    484     ImmutableSet.Builder<String> textContainers = ImmutableSet.builder();
    485     for (Map.Entry<String, Boolean> textContainer
    486          : this.textContainers.entrySet()) {
    487       if (Boolean.TRUE.equals(textContainer.getValue())) {
    488         textContainers.add(textContainer.getKey());
    489       }
    490     }
    491     return new PolicyFactory(compilePolicies(), textContainers.build(),
    492                              ImmutableMap.copyOf(globalAttrPolicies));
    493   }
    494 
    495   // Speed up subsequent builds by caching the compiled policies.
    496   private transient ImmutableMap<String, ElementAndAttributePolicies>
    497       compiledPolicies;
    498 
    499   /** Called by mutators to signal that any compiled policy is out-of-date. */
    500   private void invalidateCompiledState() {
    501     compiledPolicies = null;
    502   }
    503 
    504   private ImmutableMap<String, ElementAndAttributePolicies> compilePolicies() {
    505     if (compiledPolicies != null) { return compiledPolicies; }
    506 
    507     // Copy maps before normalizing in case builder is reused.
    508     Map<String, ElementPolicy> elPolicies
    509         = Maps.newLinkedHashMap(this.elPolicies);
    510     Map<String, Map<String, AttributePolicy>> attrPolicies
    511         = Maps.newLinkedHashMap(this.attrPolicies);
    512     for (Map.Entry<String, Map<String, AttributePolicy>> e :
    513          attrPolicies.entrySet()) {
    514       e.setValue(Maps.newLinkedHashMap(e.getValue()));
    515     }
    516     Map<String, AttributePolicy> globalAttrPolicies
    517         = Maps.newLinkedHashMap(this.globalAttrPolicies);
    518     Set<String> allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols);
    519 
    520     // Implement requireRelNofollowOnLinks
    521     if (requireRelNofollowOnLinks) {
    522       ElementPolicy linkPolicy = elPolicies.get("a");
    523       if (linkPolicy == null) {
    524         linkPolicy = ElementPolicy.REJECT_ALL_ELEMENT_POLICY;
    525       }
    526       elPolicies.put(
    527           "a",
    528           ElementPolicy.Util.join(
    529               linkPolicy,
    530               new ElementPolicy() {
    531                 public String apply(String elementName, List<String> attrs) {
    532                   for (int i = 0, n = attrs.size(); i < n; i += 2) {
    533                     if ("href".equals(attrs.get(i))) {
    534                       attrs.add("rel");
    535                       attrs.add("nofollow");
    536                       break;
    537                     }
    538                   }
    539                   return elementName;
    540                 }
    541               }));
    542     }
    543 
    544     // Implement protocol policies.
    545     // For each URL attribute that is allowed, we further constrain it by
    546     // only allowing the value through if it specifies no protocol, or if it
    547     // specifies one in the allowedProtocols white-list.
    548     // This is done regardless of whether any protocols have been allowed, so
    549     // allowing the attribute "href" globally with the identity policy but
    550     // not white-listing any protocols, effectively disallows the "href"
    551     // attribute globally.
    552     {
    553       AttributePolicy urlAttributePolicy;
    554       if (allowedProtocols.size() == 3
    555           && allowedProtocols.contains("mailto")
    556           && allowedProtocols.contains("http")
    557           && allowedProtocols.contains("https")) {
    558         urlAttributePolicy = StandardUrlAttributePolicy.INSTANCE;
    559       } else {
    560         urlAttributePolicy = new FilterUrlByProtocolAttributePolicy(
    561             allowedProtocols);
    562       }
    563       Set<String> toGuard = Sets.newLinkedHashSet(URL_ATTRIBUTE_NAMES);
    564       for (String urlAttributeName : URL_ATTRIBUTE_NAMES) {
    565         if (globalAttrPolicies.containsKey(urlAttributeName)) {
    566           toGuard.remove(urlAttributeName);
    567           globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join(
    568               urlAttributePolicy, globalAttrPolicies.get(urlAttributeName)));
    569         }
    570       }
    571       // Implement guards not implemented on global policies in the per-element
    572       // policy maps.
    573       for (Map.Entry<String, Map<String, AttributePolicy>> e
    574            : attrPolicies.entrySet()) {
    575         Map<String, AttributePolicy> policies = e.getValue();
    576         for (String urlAttributeName : toGuard) {
    577           if (policies.containsKey(urlAttributeName)) {
    578             policies.put(urlAttributeName, AttributePolicy.Util.join(
    579                 urlAttributePolicy, policies.get(urlAttributeName)));
    580           }
    581         }
    582       }
    583     }
    584 
    585     ImmutableMap.Builder<String, ElementAndAttributePolicies> policiesBuilder
    586         = ImmutableMap.builder();
    587     for (Map.Entry<String, ElementPolicy> e : elPolicies.entrySet()) {
    588       String elementName = e.getKey();
    589       ElementPolicy elPolicy = e.getValue();
    590       if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) {
    591         continue;
    592       }
    593 
    594       Map<String, AttributePolicy> elAttrPolicies
    595           = attrPolicies.get(elementName);
    596       if (elAttrPolicies == null) { elAttrPolicies = ImmutableMap.of(); }
    597       ImmutableMap.Builder<String, AttributePolicy> attrs
    598           = ImmutableMap.builder();
    599       for (Map.Entry<String, AttributePolicy> ape : elAttrPolicies.entrySet()) {
    600         String attributeName = ape.getKey();
    601         // Handle below so we don't end up putting the same key into the map
    602         // twice.  ImmutableMap.Builder hates that.
    603         if (globalAttrPolicies.containsKey(attributeName)) { continue; }
    604         AttributePolicy policy = ape.getValue();
    605         if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
    606           attrs.put(attributeName, policy);
    607         }
    608       }
    609       for (Map.Entry<String, AttributePolicy> ape
    610            : globalAttrPolicies.entrySet()) {
    611         String attributeName = ape.getKey();
    612         AttributePolicy policy = AttributePolicy.Util.join(
    613             elAttrPolicies.get(attributeName), ape.getValue());
    614         if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
    615           attrs.put(attributeName, policy);
    616         }
    617       }
    618 
    619       policiesBuilder.put(
    620           elementName,
    621           new ElementAndAttributePolicies(
    622               elementName,
    623               elPolicy, attrs.build(), skipIfEmpty.contains(elementName)));
    624     }
    625     return compiledPolicies = policiesBuilder.build();
    626   }
    627 
    628   /**
    629    * Builds the relationship between attributes, the values that they may have,
    630    * and the elements on which they may appear.
    631    *
    632    * @author Mike Samuel
    633    */
    634   public final class AttributeBuilder {
    635     private final List<String> attributeNames;
    636     private AttributePolicy policy = AttributePolicy.IDENTITY_ATTRIBUTE_POLICY;
    637 
    638     AttributeBuilder(List<? extends String> attributeNames) {
    639       this.attributeNames = ImmutableList.copyOf(attributeNames);
    640     }
    641 
    642     /**
    643      * Filters and/or transforms the attribute values
    644      * allowed by later {@code allow*} calls.
    645      * Multiple calls to {@code matching} are combined so that the policies
    646      * receive the value in order, each seeing the value after any
    647      * transformation by a previous policy.
    648      */
    649     public AttributeBuilder matching(AttributePolicy policy) {
    650       this.policy = AttributePolicy.Util.join(this.policy, policy);
    651       return this;
    652     }
    653 
    654     /**
    655      * Restrict the values allowed by later {@code allow*} calls to those
    656      * matching the pattern.
    657      * Multiple calls to {@code matching} are combined to restrict to the
    658      * intersection of possible matched values.
    659      */
    660     public AttributeBuilder matching(final Pattern pattern) {
    661       return matching(new AttributePolicy() {
    662         public @Nullable String apply(
    663             String elementName, String attributeName, String value) {
    664           return pattern.matcher(value).matches() ? value : null;
    665         }
    666       });
    667     }
    668 
    669     /**
    670      * Restrict the values allowed by later {@code allow*} calls to those
    671      * matching the given predicate.
    672      * Multiple calls to {@code matching} are combined to restrict to the
    673      * intersection of possible matched values.
    674      */
    675     public AttributeBuilder matching(
    676         final Predicate<? super String> filter) {
    677       return matching(new AttributePolicy() {
    678         public @Nullable String apply(
    679             String elementName, String attributeName, String value) {
    680           return filter.apply(value) ? value : null;
    681         }
    682       });
    683     }
    684 
    685     /**
    686      * Restrict the values allowed by later {@code allow*} calls to those
    687      * supplied.
    688      * Multiple calls to {@code matching} are combined to restrict to the
    689      * intersection of possible matched values.
    690      */
    691     public AttributeBuilder matching(
    692         boolean ignoreCase, String... allowedValues) {
    693       return matching(ignoreCase, ImmutableSet.copyOf(allowedValues));
    694     }
    695 
    696     /**
    697      * Restrict the values allowed by later {@code allow*} calls to those
    698      * supplied.
    699      * Multiple calls to {@code matching} are combined to restrict to the
    700      * intersection of possible matched values.
    701      */
    702     public AttributeBuilder matching(
    703         final boolean ignoreCase, Set<? extends String> allowedValues) {
    704       final ImmutableSet<String> allowed = ImmutableSet.copyOf(allowedValues);
    705       return matching(new AttributePolicy() {
    706         public @Nullable String apply(
    707             String elementName, String attributeName, String value) {
    708           if (ignoreCase) { value = Strings.toLowerCase(value); }
    709           return allowed.contains(value) ? value : null;
    710         }
    711       });
    712     }
    713 
    714     /**
    715      * Allows the given attributes on any elements but filters the
    716      * attributes' values based on previous calls to {@code matching(...)}.
    717      * Global attribute policies are applied after element specific policies.
    718      * Be careful of using this with attributes like <code>type</code> which
    719      * have different meanings on different attributes.
    720      * Also be careful of allowing globally attributes like <code>href</code>
    721      * which can have more far-reaching effects on tags like
    722      * <code>&lt;base&gt;</code> and <code>&lt;link&gt;</code> than on
    723      * <code>&lt;a&gt;</code> because in the former, they have an effect without
    724      * user interaction and can change the behavior of the current page.
    725      */
    726     public HtmlPolicyBuilder globally() {
    727       return HtmlPolicyBuilder.this.allowAttributesGlobally(
    728           policy, attributeNames);
    729     }
    730 
    731     /**
    732      * Allows the named attributes on the given elements but filters the
    733      * attributes' values based on previous calls to {@code matching(...)}.
    734      */
    735     public HtmlPolicyBuilder onElements(String... elementNames) {
    736       ImmutableList.Builder<String> b = ImmutableList.builder();
    737       for (String elementName : elementNames) {
    738         b.add(HtmlLexer.canonicalName(elementName));
    739       }
    740       return HtmlPolicyBuilder.this.allowAttributesOnElements(
    741           policy, attributeNames, b.build());
    742     }
    743   }
    744 }
    745