Home | History | Annotate | Download | only in platform
      1 /*
      2  * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "platform/URLPatternMatcher.h"
     28 
     29 #include "platform/weborigin/KURL.h"
     30 #include "wtf/StdLibExtras.h"
     31 
     32 namespace WebCore {
     33 
     34 bool URLPatternMatcher::matchesPatterns(const KURL& url, const Vector<String>& whitelist)
     35 {
     36     // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
     37     if (whitelist.isEmpty())
     38         return true;
     39 
     40     for (size_t i = 0; i < whitelist.size(); ++i) {
     41         URLPatternMatcher contentPattern(whitelist[i]);
     42         if (contentPattern.matches(url))
     43             return true;
     44     }
     45 
     46     return false;
     47 }
     48 
     49 bool URLPatternMatcher::parse(const String& pattern)
     50 {
     51     DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
     52 
     53     size_t schemeEndPos = pattern.find(schemeSeparator);
     54     if (schemeEndPos == kNotFound)
     55         return false;
     56 
     57     m_scheme = pattern.left(schemeEndPos);
     58 
     59     unsigned hostStartPos = schemeEndPos + schemeSeparator.length();
     60     if (hostStartPos >= pattern.length())
     61         return false;
     62 
     63     int pathStartPos = 0;
     64 
     65     if (equalIgnoringCase(m_scheme, "file")) {
     66         pathStartPos = hostStartPos;
     67     } else {
     68         size_t hostEndPos = pattern.find("/", hostStartPos);
     69         if (hostEndPos == kNotFound)
     70             return false;
     71 
     72         m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
     73         m_matchSubdomains = false;
     74 
     75         if (m_host == "*") {
     76             // The pattern can be just '*', which means match all domains.
     77             m_host = "";
     78             m_matchSubdomains = true;
     79         } else if (m_host.startsWith("*.")) {
     80             // The first component can be '*', which means to match all subdomains.
     81             m_host = m_host.substring(2); // Length of "*."
     82             m_matchSubdomains = true;
     83         }
     84 
     85         // No other '*' can occur in the host.
     86         if (m_host.find("*") != kNotFound)
     87             return false;
     88 
     89         pathStartPos = hostEndPos;
     90     }
     91 
     92     m_path = pattern.right(pattern.length() - pathStartPos);
     93 
     94     return true;
     95 }
     96 
     97 bool URLPatternMatcher::matches(const KURL& test) const
     98 {
     99     if (m_invalid)
    100         return false;
    101 
    102     if (!equalIgnoringCase(test.protocol(), m_scheme))
    103         return false;
    104 
    105     if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
    106         return false;
    107 
    108     return matchesPath(test);
    109 }
    110 
    111 bool URLPatternMatcher::matchesHost(const KURL& test) const
    112 {
    113     const String& host = test.host();
    114     if (equalIgnoringCase(host, m_host))
    115         return true;
    116 
    117     if (!m_matchSubdomains)
    118         return false;
    119 
    120     // If we're matching subdomains, and we have no host, that means the pattern
    121     // was <scheme>://*/<whatever>, so we match anything.
    122     if (!m_host.length())
    123         return true;
    124 
    125     // Check if the domain is a subdomain of our host.
    126     if (!host.endsWith(m_host, false))
    127         return false;
    128 
    129     ASSERT(host.length() > m_host.length());
    130 
    131     // Check that the character before the suffix is a period.
    132     return host[host.length() - m_host.length() - 1] == '.';
    133 }
    134 
    135 struct MatchTester {
    136     const String m_pattern;
    137     unsigned m_patternIndex;
    138 
    139     const String m_test;
    140     unsigned m_testIndex;
    141 
    142     MatchTester(const String& pattern, const String& test)
    143     : m_pattern(pattern)
    144     , m_patternIndex(0)
    145     , m_test(test)
    146     , m_testIndex(0)
    147     {
    148     }
    149 
    150     bool testStringFinished() const { return m_testIndex >= m_test.length(); }
    151     bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
    152 
    153     void eatWildcard()
    154     {
    155         while (!patternStringFinished()) {
    156             if (m_pattern[m_patternIndex] != '*')
    157                 return;
    158             m_patternIndex++;
    159         }
    160     }
    161 
    162     void eatSameChars()
    163     {
    164         while (!patternStringFinished() && !testStringFinished()) {
    165             if (m_pattern[m_patternIndex] == '*')
    166                 return;
    167             if (m_pattern[m_patternIndex] != m_test[m_testIndex])
    168                 return;
    169             m_patternIndex++;
    170             m_testIndex++;
    171         }
    172     }
    173 
    174     bool test()
    175     {
    176         // Eat all the matching chars.
    177         eatSameChars();
    178 
    179         // If the string is finished, then the pattern must be empty too, or contains
    180         // only wildcards.
    181         if (testStringFinished()) {
    182             eatWildcard();
    183             if (patternStringFinished())
    184                 return true;
    185             return false;
    186         }
    187 
    188         // Pattern is empty but not string, this is not a match.
    189         if (patternStringFinished())
    190             return false;
    191 
    192         // If we don't encounter a *, then we're hosed.
    193         if (m_pattern[m_patternIndex] != '*')
    194             return false;
    195 
    196         while (!testStringFinished()) {
    197             MatchTester nextMatch(*this);
    198             nextMatch.m_patternIndex++;
    199             if (nextMatch.test())
    200                 return true;
    201             m_testIndex++;
    202         }
    203 
    204         // We reached the end of the string. Let's see if the pattern contains only wildcards.
    205         eatWildcard();
    206         return patternStringFinished();
    207     }
    208 };
    209 
    210 bool URLPatternMatcher::matchesPath(const KURL& test) const
    211 {
    212     MatchTester match(m_path, test.path());
    213     return match.test();
    214 }
    215 
    216 } // namespace WebCore
    217