Home | History | Annotate | Download | only in page
      1 /*
      2  * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "core/page/UserContentURLPattern.h"
     28 
     29 #include "weborigin/KURL.h"
     30 #include "wtf/StdLibExtras.h"
     31 
     32 namespace WebCore {
     33 
     34 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>& whitelist, const Vector<String>& blacklist)
     35 {
     36     // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
     37     // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
     38     bool matchesWhitelist = whitelist.isEmpty();
     39     if (!matchesWhitelist) {
     40         size_t whitelistSize = whitelist.size();
     41         for (size_t i = 0; i < whitelistSize; ++i) {
     42             UserContentURLPattern contentPattern(whitelist[i]);
     43             if (contentPattern.matches(url)) {
     44                 matchesWhitelist = true;
     45                 break;
     46             }
     47         }
     48     }
     49 
     50     bool matchesBlacklist = false;
     51     if (!blacklist.isEmpty()) {
     52         size_t blacklistSize = blacklist.size();
     53         for (size_t i = 0; i < blacklistSize; ++i) {
     54             UserContentURLPattern contentPattern(blacklist[i]);
     55             if (contentPattern.matches(url)) {
     56                 matchesBlacklist = true;
     57                 break;
     58             }
     59         }
     60     }
     61 
     62     return matchesWhitelist && !matchesBlacklist;
     63 }
     64 
     65 bool UserContentURLPattern::parse(const String& pattern)
     66 {
     67     DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
     68 
     69     size_t schemeEndPos = pattern.find(schemeSeparator);
     70     if (schemeEndPos == notFound)
     71         return false;
     72 
     73     m_scheme = pattern.left(schemeEndPos);
     74 
     75     unsigned hostStartPos = schemeEndPos + schemeSeparator.length();
     76     if (hostStartPos >= pattern.length())
     77         return false;
     78 
     79     int pathStartPos = 0;
     80 
     81     if (equalIgnoringCase(m_scheme, "file"))
     82         pathStartPos = hostStartPos;
     83     else {
     84         size_t hostEndPos = pattern.find("/", hostStartPos);
     85         if (hostEndPos == notFound)
     86             return false;
     87 
     88         m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
     89         m_matchSubdomains = false;
     90 
     91         if (m_host == "*") {
     92             // The pattern can be just '*', which means match all domains.
     93             m_host = "";
     94             m_matchSubdomains = true;
     95         } else if (m_host.startsWith("*.")) {
     96             // The first component can be '*', which means to match all subdomains.
     97             m_host = m_host.substring(2); // Length of "*."
     98             m_matchSubdomains = true;
     99         }
    100 
    101         // No other '*' can occur in the host.
    102         if (m_host.find("*") != notFound)
    103             return false;
    104 
    105         pathStartPos = hostEndPos;
    106     }
    107 
    108     m_path = pattern.right(pattern.length() - pathStartPos);
    109 
    110     return true;
    111 }
    112 
    113 bool UserContentURLPattern::matches(const KURL& test) const
    114 {
    115     if (m_invalid)
    116         return false;
    117 
    118     if (!equalIgnoringCase(test.protocol(), m_scheme))
    119         return false;
    120 
    121     if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
    122         return false;
    123 
    124     return matchesPath(test);
    125 }
    126 
    127 bool UserContentURLPattern::matchesHost(const KURL& test) const
    128 {
    129     const String& host = test.host();
    130     if (equalIgnoringCase(host, m_host))
    131         return true;
    132 
    133     if (!m_matchSubdomains)
    134         return false;
    135 
    136     // If we're matching subdomains, and we have no host, that means the pattern
    137     // was <scheme>://*/<whatever>, so we match anything.
    138     if (!m_host.length())
    139         return true;
    140 
    141     // Check if the domain is a subdomain of our host.
    142     if (!host.endsWith(m_host, false))
    143         return false;
    144 
    145     ASSERT(host.length() > m_host.length());
    146 
    147     // Check that the character before the suffix is a period.
    148     return host[host.length() - m_host.length() - 1] == '.';
    149 }
    150 
    151 struct MatchTester
    152 {
    153     const String m_pattern;
    154     unsigned m_patternIndex;
    155 
    156     const String m_test;
    157     unsigned m_testIndex;
    158 
    159     MatchTester(const String& pattern, const String& test)
    160     : m_pattern(pattern)
    161     , m_patternIndex(0)
    162     , m_test(test)
    163     , m_testIndex(0)
    164     {
    165     }
    166 
    167     bool testStringFinished() const { return m_testIndex >= m_test.length(); }
    168     bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
    169 
    170     void eatWildcard()
    171     {
    172         while (!patternStringFinished()) {
    173             if (m_pattern[m_patternIndex] != '*')
    174                 return;
    175             m_patternIndex++;
    176         }
    177     }
    178 
    179     void eatSameChars()
    180     {
    181         while (!patternStringFinished() && !testStringFinished()) {
    182             if (m_pattern[m_patternIndex] == '*')
    183                 return;
    184             if (m_pattern[m_patternIndex] != m_test[m_testIndex])
    185                 return;
    186             m_patternIndex++;
    187             m_testIndex++;
    188         }
    189     }
    190 
    191     bool test()
    192     {
    193         // Eat all the matching chars.
    194         eatSameChars();
    195 
    196         // If the string is finished, then the pattern must be empty too, or contains
    197         // only wildcards.
    198         if (testStringFinished()) {
    199             eatWildcard();
    200             if (patternStringFinished())
    201                 return true;
    202             return false;
    203         }
    204 
    205         // Pattern is empty but not string, this is not a match.
    206         if (patternStringFinished())
    207             return false;
    208 
    209         // If we don't encounter a *, then we're hosed.
    210         if (m_pattern[m_patternIndex] != '*')
    211             return false;
    212 
    213         while (!testStringFinished()) {
    214             MatchTester nextMatch(*this);
    215             nextMatch.m_patternIndex++;
    216             if (nextMatch.test())
    217                 return true;
    218             m_testIndex++;
    219         }
    220 
    221         // We reached the end of the string.  Let's see if the pattern contains only
    222         // wildcards.
    223         eatWildcard();
    224         return patternStringFinished();
    225     }
    226 };
    227 
    228 bool UserContentURLPattern::matchesPath(const KURL& test) const
    229 {
    230     MatchTester match(m_path, test.path());
    231     return match.test();
    232 }
    233 
    234 } // namespace WebCore
    235