Home | History | Annotate | Download | only in page
      1 /*
      2  * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "UserContentURLPattern.h"
     28 #include "KURL.h"
     29 #include <wtf/StdLibExtras.h>
     30 
     31 namespace WebCore {
     32 
     33 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist)
     34 {
     35     // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
     36     // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
     37     bool matchesWhitelist = !whitelist || whitelist->isEmpty();
     38     if (!matchesWhitelist) {
     39         for (unsigned i = 0; i < whitelist->size(); ++i) {
     40             UserContentURLPattern contentPattern(whitelist->at(i));
     41             if (contentPattern.matches(url)) {
     42                 matchesWhitelist = true;
     43                 break;
     44             }
     45         }
     46     }
     47 
     48     bool matchesBlacklist = false;
     49     if (blacklist) {
     50         for (unsigned i = 0; i < blacklist->size(); ++i) {
     51             UserContentURLPattern contentPattern(blacklist->at(i));
     52             if (contentPattern.matches(url)) {
     53                 matchesBlacklist = true;
     54                 break;
     55             }
     56         }
     57     }
     58 
     59     return matchesWhitelist && !matchesBlacklist;
     60 }
     61 
     62 bool UserContentURLPattern::parse(const String& pattern)
     63 {
     64     DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
     65 
     66     size_t schemeEndPos = pattern.find(schemeSeparator);
     67     if (schemeEndPos == notFound)
     68         return false;
     69 
     70     m_scheme = pattern.left(schemeEndPos);
     71 
     72     unsigned hostStartPos = schemeEndPos + schemeSeparator.length();
     73     if (hostStartPos >= pattern.length())
     74         return false;
     75 
     76     int pathStartPos = 0;
     77 
     78     if (equalIgnoringCase(m_scheme, "file"))
     79         pathStartPos = hostStartPos;
     80     else {
     81         size_t hostEndPos = pattern.find("/", hostStartPos);
     82         if (hostEndPos == notFound)
     83             return false;
     84 
     85         m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
     86         m_matchSubdomains = false;
     87 
     88         if (m_host == "*") {
     89             // The pattern can be just '*', which means match all domains.
     90             m_host = "";
     91             m_matchSubdomains = true;
     92         } else if (m_host.startsWith("*.")) {
     93             // The first component can be '*', which means to match all subdomains.
     94             m_host = m_host.substring(2); // Length of "*."
     95             m_matchSubdomains = true;
     96         }
     97 
     98         // No other '*' can occur in the host.
     99         if (m_host.find("*") != notFound)
    100             return false;
    101 
    102         pathStartPos = hostEndPos;
    103     }
    104 
    105     m_path = pattern.right(pattern.length() - pathStartPos);
    106 
    107     return true;
    108 }
    109 
    110 bool UserContentURLPattern::matches(const KURL& test) const
    111 {
    112     if (m_invalid)
    113         return false;
    114 
    115     if (!equalIgnoringCase(test.protocol(), m_scheme))
    116         return false;
    117 
    118     if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
    119         return false;
    120 
    121     return matchesPath(test);
    122 }
    123 
    124 bool UserContentURLPattern::matchesHost(const KURL& test) const
    125 {
    126     const String& host = test.host();
    127     if (equalIgnoringCase(host, m_host))
    128         return true;
    129 
    130     if (!m_matchSubdomains)
    131         return false;
    132 
    133     // If we're matching subdomains, and we have no host, that means the pattern
    134     // was <scheme>://*/<whatever>, so we match anything.
    135     if (!m_host.length())
    136         return true;
    137 
    138     // Check if the domain is a subdomain of our host.
    139     if (!host.endsWith(m_host, false))
    140         return false;
    141 
    142     ASSERT(host.length() > m_host.length());
    143 
    144     // Check that the character before the suffix is a period.
    145     return host[host.length() - m_host.length() - 1] == '.';
    146 }
    147 
    148 struct MatchTester
    149 {
    150     const String m_pattern;
    151     unsigned m_patternIndex;
    152 
    153     const String m_test;
    154     unsigned m_testIndex;
    155 
    156     MatchTester(const String& pattern, const String& test)
    157     : m_pattern(pattern)
    158     , m_patternIndex(0)
    159     , m_test(test)
    160     , m_testIndex(0)
    161     {
    162     }
    163 
    164     bool testStringFinished() const { return m_testIndex >= m_test.length(); }
    165     bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
    166 
    167     void eatWildcard()
    168     {
    169         while (!patternStringFinished()) {
    170             if (m_pattern[m_patternIndex] != '*')
    171                 return;
    172             m_patternIndex++;
    173         }
    174     }
    175 
    176     void eatSameChars()
    177     {
    178         while (!patternStringFinished() && !testStringFinished()) {
    179             if (m_pattern[m_patternIndex] == '*')
    180                 return;
    181             if (m_pattern[m_patternIndex] != m_test[m_testIndex])
    182                 return;
    183             m_patternIndex++;
    184             m_testIndex++;
    185         }
    186     }
    187 
    188     bool test()
    189     {
    190         // Eat all the matching chars.
    191         eatSameChars();
    192 
    193         // If the string is finished, then the pattern must be empty too, or contains
    194         // only wildcards.
    195         if (testStringFinished()) {
    196             eatWildcard();
    197             if (patternStringFinished())
    198                 return true;
    199             return false;
    200         }
    201 
    202         // Pattern is empty but not string, this is not a match.
    203         if (patternStringFinished())
    204             return false;
    205 
    206         // If we don't encounter a *, then we're hosed.
    207         if (m_pattern[m_patternIndex] != '*')
    208             return false;
    209 
    210         while (!testStringFinished()) {
    211             MatchTester nextMatch(*this);
    212             nextMatch.m_patternIndex++;
    213             if (nextMatch.test())
    214                 return true;
    215             m_testIndex++;
    216         }
    217 
    218         // We reached the end of the string.  Let's see if the pattern contains only
    219         // wildcards.
    220         eatWildcard();
    221         return patternStringFinished();
    222     }
    223 };
    224 
    225 bool UserContentURLPattern::matchesPath(const KURL& test) const
    226 {
    227     MatchTester match(m_path, test.path());
    228     return match.test();
    229 }
    230 
    231 } // namespace WebCore
    232