1 // Copyright (c) 2011, Mike Samuel 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions 6 // are met: 7 // 8 // Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // Redistributions in binary form must reproduce the above copyright 11 // notice, this list of conditions and the following disclaimer in the 12 // documentation and/or other materials provided with the distribution. 13 // Neither the name of the OWASP nor the names of its contributors may 14 // be used to endorse or promote products derived from this software 15 // without specific prior written permission. 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 29 package org.owasp.html; 30 31 import javax.annotation.Nullable; 32 33 import com.google.common.collect.ImmutableSet; 34 35 /** 36 * An attribute policy for attributes whose values are URLs that requires that 37 * the value have no protocol or have an allowed protocol. 38 * 39 * <p> 40 * URLs with protocols must match the protocol set passed to the constructor. 41 * URLs without protocols but which specify an origin different from the 42 * containing page (e.g. {@code //example.org}) are only allowed if the 43 * {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy} 44 * allows both {@code http} and {@code https} which are normally used to serve 45 * HTML. 46 * Same-origin URLs, URLs without any protocol or authority part are always 47 * allowed. 48 * </p> 49 * 50 * <p> 51 * This class assumes that URLs are either hierarchical, or are opaque, but 52 * do not look like they contain an authority portion. 53 * </p> 54 * 55 * @author Mike Samuel <mikesamuel (at) gmail.com> 56 */ 57 @TCB 58 public class FilterUrlByProtocolAttributePolicy implements AttributePolicy { 59 private final ImmutableSet<String> protocols; 60 61 public FilterUrlByProtocolAttributePolicy( 62 Iterable<? extends String> protocols) { 63 this.protocols = ImmutableSet.copyOf(protocols); 64 } 65 66 public @Nullable String apply( 67 String elementName, String attributeName, String s) { 68 protocol_loop: 69 for (int i = 0, n = s.length(); i < n; ++i) { 70 switch (s.charAt(i)) { 71 case '/': case '#': case '?': // No protocol. 72 // Check for domain relative URLs like //www.evil.org/ 73 if (s.startsWith("//") 74 // or the protocols by which HTML is normally served are OK. 75 && !allowProtocolRelativeUrls()) { 76 return null; 77 } 78 break protocol_loop; 79 case ':': 80 String protocol = Strings.toLowerCase(s.substring(0, i)); 81 if (!protocols.contains(protocol)) { return null; } 82 break protocol_loop; 83 } 84 } 85 return normalizeUri(s); 86 } 87 88 protected boolean allowProtocolRelativeUrls() { 89 return protocols.contains("http") && protocols.contains("https"); 90 } 91 92 /** Percent encodes anything that looks like a colon, or a parenthesis. */ 93 static String normalizeUri(String s) { 94 int n = s.length(); 95 boolean colonsIrrelevant = false; 96 for (int i = 0; i < n; ++i) { 97 char ch = s.charAt(i); 98 switch (ch) { 99 case '/': case '#': case '?': case ':': 100 colonsIrrelevant = true; 101 break; 102 case '(': case ')': case '\uff1a': 103 StringBuilder sb = new StringBuilder(n + 16); 104 int pos = 0; 105 for (; i < n; ++i) { 106 ch = s.charAt(i); 107 switch (ch) { 108 case '(': 109 sb.append(s, pos, i).append("%28"); 110 pos = i + 1; 111 break; 112 case ')': 113 sb.append(s, pos, i).append("%29"); 114 pos = i + 1; 115 break; 116 default: 117 if (ch > 0x100 && !colonsIrrelevant) { 118 // Other colon like characters. 119 // TODO: do we need to encode non-colon characters if we're 120 // not dealing with URLs that haven't been copy/pasted into 121 // the URL bar? 122 // Is it safe to assume UTF-8 here? 123 switch (ch) { 124 case '\u0589': 125 sb.append(s, pos, i).append("%d6%89"); 126 pos = i + 1; 127 break; 128 case '\u05c3': 129 sb.append(s, pos, i).append("%d7%83"); 130 pos = i + 1; 131 break; 132 case '\u2236': 133 sb.append(s, pos, i).append("%e2%88%b6"); 134 pos = i + 1; 135 break; 136 case '\uff1a': 137 sb.append(s, pos, i).append("%ef%bc%9a"); 138 pos = i + 1; 139 break; 140 } 141 } 142 break; 143 } 144 } 145 return sb.append(s, pos, n).toString(); 146 } 147 } 148 return s; 149 } 150 151 @Override 152 public boolean equals(Object o) { 153 return o != null && this.getClass() == o.getClass() 154 && protocols.equals(((FilterUrlByProtocolAttributePolicy) o).protocols); 155 } 156 157 @Override 158 public int hashCode() { 159 return protocols.hashCode(); 160 } 161 162 } 163