1 /* 2 * Copyright (C) 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "UserContentURLPattern.h" 28 #include "KURL.h" 29 #include <wtf/StdLibExtras.h> 30 31 namespace WebCore { 32 33 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist) 34 { 35 // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist. 36 // If there is no whitelist at all, then all URLs are assumed to be in the whitelist. 37 bool matchesWhitelist = !whitelist || whitelist->isEmpty(); 38 if (!matchesWhitelist) { 39 for (unsigned i = 0; i < whitelist->size(); ++i) { 40 UserContentURLPattern contentPattern(whitelist->at(i)); 41 if (contentPattern.matches(url)) { 42 matchesWhitelist = true; 43 break; 44 } 45 } 46 } 47 48 bool matchesBlacklist = false; 49 if (blacklist) { 50 for (unsigned i = 0; i < blacklist->size(); ++i) { 51 UserContentURLPattern contentPattern(blacklist->at(i)); 52 if (contentPattern.matches(url)) { 53 matchesBlacklist = true; 54 break; 55 } 56 } 57 } 58 59 return matchesWhitelist && !matchesBlacklist; 60 } 61 62 bool UserContentURLPattern::parse(const String& pattern) 63 { 64 DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://")); 65 66 int schemeEndPos = pattern.find(schemeSeparator); 67 if (schemeEndPos == -1) 68 return false; 69 70 m_scheme = pattern.left(schemeEndPos); 71 72 int hostStartPos = schemeEndPos + schemeSeparator.length(); 73 if (hostStartPos >= static_cast<int>(pattern.length())) 74 return false; 75 76 int pathStartPos = 0; 77 78 if (m_scheme == "file") 79 pathStartPos = hostStartPos; 80 else { 81 int hostEndPos = pattern.find("/", hostStartPos); 82 if (hostEndPos == -1) 83 return false; 84 85 m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos); 86 87 // The first component can be '*', which means to match all subdomains. 88 Vector<String> hostComponents; 89 m_host.split(".", hostComponents); 90 if (hostComponents[0] == "*") { 91 m_matchSubdomains = true; 92 m_host = ""; 93 for (unsigned i = 1; i < hostComponents.size(); ++i) { 94 m_host = m_host + hostComponents[i]; 95 if (i < hostComponents.size() - 1) 96 m_host = m_host + "."; 97 } 98 } 99 100 // No other '*' can occur in the host. 101 if (m_host.find("*") != -1) 102 return false; 103 104 pathStartPos = hostEndPos; 105 } 106 107 m_path = pattern.right(pattern.length() - pathStartPos); 108 109 return true; 110 } 111 112 bool UserContentURLPattern::matches(const KURL& test) const 113 { 114 if (m_invalid) 115 return false; 116 117 if (test.protocol() != m_scheme) 118 return false; 119 120 if (!matchesHost(test)) 121 return false; 122 123 return matchesPath(test); 124 } 125 126 bool UserContentURLPattern::matchesHost(const KURL& test) const 127 { 128 if (test.host() == m_host) 129 return true; 130 131 if (!m_matchSubdomains) 132 return false; 133 134 // If we're matching subdomains, and we have no host, that means the pattern 135 // was <scheme>://*/<whatever>, so we match anything. 136 if (!m_host.length()) 137 return true; 138 139 // Check if the test host is a subdomain of our host. 140 return test.host().endsWith(m_host, false); 141 } 142 143 struct MatchTester 144 { 145 const String m_pattern; 146 unsigned m_patternIndex; 147 148 const String m_test; 149 unsigned m_testIndex; 150 151 MatchTester(const String& pattern, const String& test) 152 : m_pattern(pattern) 153 , m_patternIndex(0) 154 , m_test(test) 155 , m_testIndex(0) 156 { 157 } 158 159 bool testStringFinished() const { return m_testIndex >= m_test.length(); } 160 bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); } 161 162 void eatWildcard() 163 { 164 while (!patternStringFinished()) { 165 if (m_pattern[m_patternIndex] != '*') 166 return; 167 m_patternIndex++; 168 } 169 } 170 171 void eatSameChars() 172 { 173 while (!patternStringFinished() && !testStringFinished()) { 174 if (m_pattern[m_patternIndex] == '*') 175 return; 176 if (m_pattern[m_patternIndex] != m_test[m_testIndex]) 177 return; 178 m_patternIndex++; 179 m_testIndex++; 180 } 181 } 182 183 bool test() 184 { 185 // Eat all the matching chars. 186 eatSameChars(); 187 188 // If the string is finished, then the pattern must be empty too, or contains 189 // only wildcards. 190 if (testStringFinished()) { 191 eatWildcard(); 192 if (patternStringFinished()) 193 return true; 194 return false; 195 } 196 197 // Pattern is empty but not string, this is not a match. 198 if (patternStringFinished()) 199 return false; 200 201 // If we don't encounter a *, then we're hosed. 202 if (m_pattern[m_patternIndex] != '*') 203 return false; 204 205 while (!testStringFinished()) { 206 MatchTester nextMatch(*this); 207 nextMatch.m_patternIndex++; 208 if (nextMatch.test()) 209 return true; 210 m_testIndex++; 211 } 212 213 // We reached the end of the string. Let's see if the pattern contains only 214 // wildcards. 215 eatWildcard(); 216 return patternStringFinished(); 217 } 218 }; 219 220 bool UserContentURLPattern::matchesPath(const KURL& test) const 221 { 222 MatchTester match(m_path, test.path()); 223 return match.test(); 224 } 225 226 } // namespace WebCore 227