Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
      3  * Copyright (C) 2008 Collabora Ltd.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "RegularExpression.h"
     29 
     30 #include "Logging.h"
     31 #include <pcre/pcre.h>
     32 
     33 namespace WebCore {
     34 
     35 class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
     36 public:
     37     static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity);
     38     ~Private();
     39 
     40     JSRegExp* regexp() const { return m_regexp; }
     41     int lastMatchLength;
     42 
     43 private:
     44     Private(const String& pattern, TextCaseSensitivity);
     45     static JSRegExp* compile(const String& pattern, TextCaseSensitivity);
     46 
     47     JSRegExp* m_regexp;
     48 };
     49 
     50 inline JSRegExp* RegularExpression::Private::compile(const String& pattern, TextCaseSensitivity caseSensitivity)
     51 {
     52     const char* errorMessage;
     53     JSRegExp* regexp = jsRegExpCompile(pattern.characters(), pattern.length(),
     54         caseSensitivity == TextCaseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine,
     55         0, &errorMessage);
     56     if (!regexp)
     57         LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage);
     58     return regexp;
     59 }
     60 
     61 inline RegularExpression::Private::Private(const String& pattern, TextCaseSensitivity caseSensitivity)
     62     : lastMatchLength(-1)
     63     , m_regexp(compile(pattern, caseSensitivity))
     64 {
     65 }
     66 
     67 inline PassRefPtr<RegularExpression::Private> RegularExpression::Private::create(const String& pattern, TextCaseSensitivity caseSensitivity)
     68 {
     69     return adoptRef(new Private(pattern, caseSensitivity));
     70 }
     71 
     72 RegularExpression::Private::~Private()
     73 {
     74     jsRegExpFree(m_regexp);
     75 }
     76 
     77 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity)
     78     : d(Private::create(pattern, caseSensitivity))
     79 {
     80 }
     81 
     82 RegularExpression::RegularExpression(const RegularExpression& re)
     83     : d(re.d)
     84 {
     85 }
     86 
     87 RegularExpression::~RegularExpression()
     88 {
     89 }
     90 
     91 RegularExpression& RegularExpression::operator=(const RegularExpression& re)
     92 {
     93     d = re.d;
     94     return *this;
     95 }
     96 
     97 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
     98 {
     99     if (!d->regexp())
    100         return -1;
    101 
    102     if (str.isNull())
    103         return -1;
    104 
    105     // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre
    106     static const size_t maxOffsets = 3;
    107     int offsets[maxOffsets];
    108     int result = jsRegExpExecute(d->regexp(), str.characters(), str.length(), startFrom, offsets, maxOffsets);
    109     if (result < 0) {
    110         if (result != JSRegExpErrorNoMatch)
    111             LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", result);
    112         d->lastMatchLength = -1;
    113         return -1;
    114     }
    115 
    116     // 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
    117     d->lastMatchLength = offsets[1] - offsets[0];
    118     if (matchLength)
    119         *matchLength = d->lastMatchLength;
    120     return offsets[0];
    121 }
    122 
    123 int RegularExpression::searchRev(const String& str) const
    124 {
    125     // FIXME: This could be faster if it actually searched backwards.
    126     // Instead, it just searches forwards, multiple times until it finds the last match.
    127 
    128     int start = 0;
    129     int pos;
    130     int lastPos = -1;
    131     int lastMatchLength = -1;
    132     do {
    133         int matchLength;
    134         pos = match(str, start, &matchLength);
    135         if (pos >= 0) {
    136             if (pos + matchLength > lastPos + lastMatchLength) {
    137                 // replace last match if this one is later and not a subset of the last match
    138                 lastPos = pos;
    139                 lastMatchLength = matchLength;
    140             }
    141             start = pos + 1;
    142         }
    143     } while (pos != -1);
    144     d->lastMatchLength = lastMatchLength;
    145     return lastPos;
    146 }
    147 
    148 int RegularExpression::matchedLength() const
    149 {
    150     return d->lastMatchLength;
    151 }
    152 
    153 void replace(String& string, const RegularExpression& target, const String& replacement)
    154 {
    155     int index = 0;
    156     while (index < static_cast<int>(string.length())) {
    157         int matchLength;
    158         index = target.match(string, index, &matchLength);
    159         if (index < 0)
    160             break;
    161         string.replace(index, matchLength, replacement);
    162         index += replacement.length();
    163         if (!matchLength)
    164             break;  // Avoid infinite loop on 0-length matches, e.g. [a-z]*
    165     }
    166 }
    167 
    168 } // namespace WebCore
    169