Home | History | Annotate | Download | only in html
      1 // Copyright (c) 2011, Mike Samuel
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions
      6 // are met:
      7 //
      8 // Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 // Redistributions in binary form must reproduce the above copyright
     11 // notice, this list of conditions and the following disclaimer in the
     12 // documentation and/or other materials provided with the distribution.
     13 // Neither the name of the OWASP nor the names of its contributors may
     14 // be used to endorse or promote products derived from this software
     15 // without specific prior written permission.
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27 // POSSIBILITY OF SUCH DAMAGE.
     28 
     29 package org.owasp.html;
     30 
     31 import javax.annotation.Nullable;
     32 
     33 import com.google.common.collect.ImmutableSet;
     34 
     35 /**
     36  * An attribute policy for attributes whose values are URLs that requires that
     37  * the value have no protocol or have an allowed protocol.
     38  *
     39  * <p>
     40  * URLs with protocols must match the protocol set passed to the constructor.
     41  * URLs without protocols but which specify an origin different from the
     42  * containing page (e.g. {@code //example.org}) are only allowed if the
     43  * {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy}
     44  * allows both {@code http} and {@code https} which are normally used to serve
     45  * HTML.
     46  * Same-origin URLs, URLs without any protocol or authority part are always
     47  * allowed.
     48  * </p>
     49  *
     50  * <p>
     51  * This class assumes that URLs are either hierarchical, or are opaque, but
     52  * do not look like they contain an authority portion.
     53  * </p>
     54  *
     55  * @author Mike Samuel <mikesamuel (at) gmail.com>
     56  */
     57 @TCB
     58 public class FilterUrlByProtocolAttributePolicy implements AttributePolicy {
     59   private final ImmutableSet<String> protocols;
     60 
     61   public FilterUrlByProtocolAttributePolicy(
     62       Iterable<? extends String> protocols) {
     63     this.protocols = ImmutableSet.copyOf(protocols);
     64   }
     65 
     66   public @Nullable String apply(
     67       String elementName, String attributeName, String s) {
     68     protocol_loop:
     69     for (int i = 0, n = s.length(); i < n; ++i) {
     70       switch (s.charAt(i)) {
     71         case '/': case '#': case '?':  // No protocol.
     72           // Check for domain relative URLs like //www.evil.org/
     73           if (s.startsWith("//")
     74               // or the protocols by which HTML is normally served are OK.
     75               && !allowProtocolRelativeUrls()) {
     76             return null;
     77           }
     78           break protocol_loop;
     79         case ':':
     80           String protocol = Strings.toLowerCase(s.substring(0, i));
     81           if (!protocols.contains(protocol)) { return null; }
     82           break protocol_loop;
     83       }
     84     }
     85     return normalizeUri(s);
     86   }
     87 
     88   protected boolean allowProtocolRelativeUrls() {
     89     return protocols.contains("http") && protocols.contains("https");
     90   }
     91 
     92   /** Percent encodes anything that looks like a colon, or a parenthesis. */
     93   static String normalizeUri(String s) {
     94     int n = s.length();
     95     boolean colonsIrrelevant = false;
     96     for (int i = 0; i < n; ++i) {
     97       char ch = s.charAt(i);
     98       switch (ch) {
     99         case '/': case '#': case '?': case ':':
    100           colonsIrrelevant = true;
    101           break;
    102         case '(': case ')': case '\uff1a':
    103           StringBuilder sb = new StringBuilder(n + 16);
    104           int pos = 0;
    105           for (; i < n; ++i) {
    106             ch = s.charAt(i);
    107             switch (ch) {
    108               case '(':
    109                 sb.append(s, pos, i).append("%28");
    110                 pos = i + 1;
    111                 break;
    112               case ')':
    113                 sb.append(s, pos, i).append("%29");
    114                 pos = i + 1;
    115                 break;
    116               default:
    117                 if (ch > 0x100 && !colonsIrrelevant) {
    118                   // Other colon like characters.
    119                   // TODO: do we need to encode non-colon characters if we're
    120                   // not dealing with URLs that haven't been copy/pasted into
    121                   // the URL bar?
    122                   // Is it safe to assume UTF-8 here?
    123                   switch (ch) {
    124                     case '\u0589':
    125                       sb.append(s, pos, i).append("%d6%89");
    126                       pos = i + 1;
    127                       break;
    128                     case '\u05c3':
    129                       sb.append(s, pos, i).append("%d7%83");
    130                       pos = i + 1;
    131                       break;
    132                     case '\u2236':
    133                       sb.append(s, pos, i).append("%e2%88%b6");
    134                       pos = i + 1;
    135                       break;
    136                     case '\uff1a':
    137                       sb.append(s, pos, i).append("%ef%bc%9a");
    138                       pos = i + 1;
    139                       break;
    140                   }
    141                 }
    142                 break;
    143             }
    144           }
    145           return sb.append(s, pos, n).toString();
    146       }
    147     }
    148     return s;
    149   }
    150 
    151   @Override
    152   public boolean equals(Object o) {
    153     return o != null && this.getClass() == o.getClass()
    154         && protocols.equals(((FilterUrlByProtocolAttributePolicy) o).protocols);
    155   }
    156 
    157   @Override
    158   public int hashCode() {
    159     return protocols.hashCode();
    160   }
    161 
    162 }
    163