Home | History | Annotate | Download | only in page
      1 /*
      2  * Copyright (C) 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "UserContentURLPattern.h"
     28 #include "KURL.h"
     29 #include <wtf/StdLibExtras.h>
     30 
     31 namespace WebCore {
     32 
     33 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist)
     34 {
     35     // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
     36     // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
     37     bool matchesWhitelist = !whitelist || whitelist->isEmpty();
     38     if (!matchesWhitelist) {
     39         for (unsigned i = 0; i < whitelist->size(); ++i) {
     40             UserContentURLPattern contentPattern(whitelist->at(i));
     41             if (contentPattern.matches(url)) {
     42                 matchesWhitelist = true;
     43                 break;
     44             }
     45         }
     46     }
     47 
     48     bool matchesBlacklist = false;
     49     if (blacklist) {
     50         for (unsigned i = 0; i < blacklist->size(); ++i) {
     51             UserContentURLPattern contentPattern(blacklist->at(i));
     52             if (contentPattern.matches(url)) {
     53                 matchesBlacklist = true;
     54                 break;
     55             }
     56         }
     57     }
     58 
     59     return matchesWhitelist && !matchesBlacklist;
     60 }
     61 
     62 bool UserContentURLPattern::parse(const String& pattern)
     63 {
     64     DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
     65 
     66     int schemeEndPos = pattern.find(schemeSeparator);
     67     if (schemeEndPos == -1)
     68         return false;
     69 
     70     m_scheme = pattern.left(schemeEndPos);
     71 
     72     int hostStartPos = schemeEndPos + schemeSeparator.length();
     73     if (hostStartPos >= static_cast<int>(pattern.length()))
     74         return false;
     75 
     76     int pathStartPos = 0;
     77 
     78     if (m_scheme == "file")
     79         pathStartPos = hostStartPos;
     80     else {
     81         int hostEndPos = pattern.find("/", hostStartPos);
     82         if (hostEndPos == -1)
     83             return false;
     84 
     85         m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
     86 
     87         // The first component can be '*', which means to match all subdomains.
     88         Vector<String> hostComponents;
     89         m_host.split(".", hostComponents);
     90         if (hostComponents[0] == "*") {
     91             m_matchSubdomains = true;
     92             m_host = "";
     93             for (unsigned i = 1; i < hostComponents.size(); ++i) {
     94                 m_host = m_host + hostComponents[i];
     95                 if (i < hostComponents.size() - 1)
     96                     m_host = m_host + ".";
     97             }
     98         }
     99 
    100         // No other '*' can occur in the host.
    101         if (m_host.find("*") != -1)
    102             return false;
    103 
    104         pathStartPos = hostEndPos;
    105     }
    106 
    107     m_path = pattern.right(pattern.length() - pathStartPos);
    108 
    109     return true;
    110 }
    111 
    112 bool UserContentURLPattern::matches(const KURL& test) const
    113 {
    114     if (m_invalid)
    115         return false;
    116 
    117     if (test.protocol() != m_scheme)
    118         return false;
    119 
    120     if (!matchesHost(test))
    121         return false;
    122 
    123     return matchesPath(test);
    124 }
    125 
    126 bool UserContentURLPattern::matchesHost(const KURL& test) const
    127 {
    128     if (test.host() == m_host)
    129         return true;
    130 
    131     if (!m_matchSubdomains)
    132         return false;
    133 
    134     // If we're matching subdomains, and we have no host, that means the pattern
    135     // was <scheme>://*/<whatever>, so we match anything.
    136     if (!m_host.length())
    137         return true;
    138 
    139     // Check if the test host is a subdomain of our host.
    140     return test.host().endsWith(m_host, false);
    141 }
    142 
    143 struct MatchTester
    144 {
    145     const String m_pattern;
    146     unsigned m_patternIndex;
    147 
    148     const String m_test;
    149     unsigned m_testIndex;
    150 
    151     MatchTester(const String& pattern, const String& test)
    152     : m_pattern(pattern)
    153     , m_patternIndex(0)
    154     , m_test(test)
    155     , m_testIndex(0)
    156     {
    157     }
    158 
    159     bool testStringFinished() const { return m_testIndex >= m_test.length(); }
    160     bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
    161 
    162     void eatWildcard()
    163     {
    164         while (!patternStringFinished()) {
    165             if (m_pattern[m_patternIndex] != '*')
    166                 return;
    167             m_patternIndex++;
    168         }
    169     }
    170 
    171     void eatSameChars()
    172     {
    173         while (!patternStringFinished() && !testStringFinished()) {
    174             if (m_pattern[m_patternIndex] == '*')
    175                 return;
    176             if (m_pattern[m_patternIndex] != m_test[m_testIndex])
    177                 return;
    178             m_patternIndex++;
    179             m_testIndex++;
    180         }
    181     }
    182 
    183     bool test()
    184     {
    185         // Eat all the matching chars.
    186         eatSameChars();
    187 
    188         // If the string is finished, then the pattern must be empty too, or contains
    189         // only wildcards.
    190         if (testStringFinished()) {
    191             eatWildcard();
    192             if (patternStringFinished())
    193                 return true;
    194             return false;
    195         }
    196 
    197         // Pattern is empty but not string, this is not a match.
    198         if (patternStringFinished())
    199             return false;
    200 
    201         // If we don't encounter a *, then we're hosed.
    202         if (m_pattern[m_patternIndex] != '*')
    203             return false;
    204 
    205         while (!testStringFinished()) {
    206             MatchTester nextMatch(*this);
    207             nextMatch.m_patternIndex++;
    208             if (nextMatch.test())
    209                 return true;
    210             m_testIndex++;
    211         }
    212 
    213         // We reached the end of the string.  Let's see if the pattern contains only
    214         // wildcards.
    215         eatWildcard();
    216         return patternStringFinished();
    217     }
    218 };
    219 
    220 bool UserContentURLPattern::matchesPath(const KURL& test) const
    221 {
    222     MatchTester match(m_path, test.path());
    223     return match.test();
    224 }
    225 
    226 } // namespace WebCore
    227