1 /* 2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "UserContentURLPattern.h" 28 #include "KURL.h" 29 #include <wtf/StdLibExtras.h> 30 31 namespace WebCore { 32 33 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist) 34 { 35 // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist. 36 // If there is no whitelist at all, then all URLs are assumed to be in the whitelist. 37 bool matchesWhitelist = !whitelist || whitelist->isEmpty(); 38 if (!matchesWhitelist) { 39 for (unsigned i = 0; i < whitelist->size(); ++i) { 40 UserContentURLPattern contentPattern(whitelist->at(i)); 41 if (contentPattern.matches(url)) { 42 matchesWhitelist = true; 43 break; 44 } 45 } 46 } 47 48 bool matchesBlacklist = false; 49 if (blacklist) { 50 for (unsigned i = 0; i < blacklist->size(); ++i) { 51 UserContentURLPattern contentPattern(blacklist->at(i)); 52 if (contentPattern.matches(url)) { 53 matchesBlacklist = true; 54 break; 55 } 56 } 57 } 58 59 return matchesWhitelist && !matchesBlacklist; 60 } 61 62 bool UserContentURLPattern::parse(const String& pattern) 63 { 64 DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://")); 65 66 size_t schemeEndPos = pattern.find(schemeSeparator); 67 if (schemeEndPos == notFound) 68 return false; 69 70 m_scheme = pattern.left(schemeEndPos); 71 72 unsigned hostStartPos = schemeEndPos + schemeSeparator.length(); 73 if (hostStartPos >= pattern.length()) 74 return false; 75 76 int pathStartPos = 0; 77 78 if (equalIgnoringCase(m_scheme, "file")) 79 pathStartPos = hostStartPos; 80 else { 81 size_t hostEndPos = pattern.find("/", hostStartPos); 82 if (hostEndPos == notFound) 83 return false; 84 85 m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos); 86 m_matchSubdomains = false; 87 88 if (m_host == "*") { 89 // The pattern can be just '*', which means match all domains. 90 m_host = ""; 91 m_matchSubdomains = true; 92 } else if (m_host.startsWith("*.")) { 93 // The first component can be '*', which means to match all subdomains. 94 m_host = m_host.substring(2); // Length of "*." 95 m_matchSubdomains = true; 96 } 97 98 // No other '*' can occur in the host. 99 if (m_host.find("*") != notFound) 100 return false; 101 102 pathStartPos = hostEndPos; 103 } 104 105 m_path = pattern.right(pattern.length() - pathStartPos); 106 107 return true; 108 } 109 110 bool UserContentURLPattern::matches(const KURL& test) const 111 { 112 if (m_invalid) 113 return false; 114 115 if (!equalIgnoringCase(test.protocol(), m_scheme)) 116 return false; 117 118 if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test)) 119 return false; 120 121 return matchesPath(test); 122 } 123 124 bool UserContentURLPattern::matchesHost(const KURL& test) const 125 { 126 const String& host = test.host(); 127 if (equalIgnoringCase(host, m_host)) 128 return true; 129 130 if (!m_matchSubdomains) 131 return false; 132 133 // If we're matching subdomains, and we have no host, that means the pattern 134 // was <scheme>://*/<whatever>, so we match anything. 135 if (!m_host.length()) 136 return true; 137 138 // Check if the domain is a subdomain of our host. 139 if (!host.endsWith(m_host, false)) 140 return false; 141 142 ASSERT(host.length() > m_host.length()); 143 144 // Check that the character before the suffix is a period. 145 return host[host.length() - m_host.length() - 1] == '.'; 146 } 147 148 struct MatchTester 149 { 150 const String m_pattern; 151 unsigned m_patternIndex; 152 153 const String m_test; 154 unsigned m_testIndex; 155 156 MatchTester(const String& pattern, const String& test) 157 : m_pattern(pattern) 158 , m_patternIndex(0) 159 , m_test(test) 160 , m_testIndex(0) 161 { 162 } 163 164 bool testStringFinished() const { return m_testIndex >= m_test.length(); } 165 bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); } 166 167 void eatWildcard() 168 { 169 while (!patternStringFinished()) { 170 if (m_pattern[m_patternIndex] != '*') 171 return; 172 m_patternIndex++; 173 } 174 } 175 176 void eatSameChars() 177 { 178 while (!patternStringFinished() && !testStringFinished()) { 179 if (m_pattern[m_patternIndex] == '*') 180 return; 181 if (m_pattern[m_patternIndex] != m_test[m_testIndex]) 182 return; 183 m_patternIndex++; 184 m_testIndex++; 185 } 186 } 187 188 bool test() 189 { 190 // Eat all the matching chars. 191 eatSameChars(); 192 193 // If the string is finished, then the pattern must be empty too, or contains 194 // only wildcards. 195 if (testStringFinished()) { 196 eatWildcard(); 197 if (patternStringFinished()) 198 return true; 199 return false; 200 } 201 202 // Pattern is empty but not string, this is not a match. 203 if (patternStringFinished()) 204 return false; 205 206 // If we don't encounter a *, then we're hosed. 207 if (m_pattern[m_patternIndex] != '*') 208 return false; 209 210 while (!testStringFinished()) { 211 MatchTester nextMatch(*this); 212 nextMatch.m_patternIndex++; 213 if (nextMatch.test()) 214 return true; 215 m_testIndex++; 216 } 217 218 // We reached the end of the string. Let's see if the pattern contains only 219 // wildcards. 220 eatWildcard(); 221 return patternStringFinished(); 222 } 223 }; 224 225 bool UserContentURLPattern::matchesPath(const KURL& test) const 226 { 227 MatchTester match(m_path, test.path()); 228 return match.test(); 229 } 230 231 } // namespace WebCore 232