Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
      3  * Copyright (C) 2008 Collabora Ltd.
      4  * Copyright (C) 2011 Peter Varga (pvarga (at) webkit.org), University of Szeged
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "RegularExpression.h"
     30 
     31 #include <wtf/BumpPointerAllocator.h>
     32 #include <yarr/Yarr.h>
     33 #include "Logging.h"
     34 
     35 namespace WebCore {
     36 
     37 class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
     38 public:
     39     static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity)
     40     {
     41         return adoptRef(new Private(pattern, caseSensitivity));
     42     }
     43 
     44     int lastMatchLength;
     45 
     46     unsigned m_numSubpatterns;
     47     OwnPtr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
     48 
     49 private:
     50     Private(const String& pattern, TextCaseSensitivity caseSensitivity)
     51         : lastMatchLength(-1)
     52         , m_regExpByteCode(compile(pattern, caseSensitivity))
     53         , m_constructionError(0)
     54     {
     55     }
     56 
     57     PassOwnPtr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity)
     58     {
     59         JSC::Yarr::YarrPattern pattern(JSC::UString(patternString.impl()), (caseSensitivity == TextCaseInsensitive), false, &m_constructionError);
     60         if (m_constructionError) {
     61             LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError);
     62             return PassOwnPtr<JSC::Yarr::BytecodePattern>();
     63         }
     64 
     65         m_numSubpatterns = pattern.m_numSubpatterns;
     66 
     67         return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
     68     }
     69 
     70     BumpPointerAllocator m_regexAllocator;
     71     const char* m_constructionError;
     72 };
     73 
     74 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity)
     75     : d(Private::create(pattern, caseSensitivity))
     76 {
     77 }
     78 
     79 RegularExpression::RegularExpression(const RegularExpression& re)
     80     : d(re.d)
     81 {
     82 }
     83 
     84 RegularExpression::~RegularExpression()
     85 {
     86 }
     87 
     88 RegularExpression& RegularExpression::operator=(const RegularExpression& re)
     89 {
     90     d = re.d;
     91     return *this;
     92 }
     93 
     94 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
     95 {
     96     if (!d->m_regExpByteCode)
     97         return -1;
     98 
     99     if (str.isNull())
    100         return -1;
    101 
    102     int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
    103     int* offsetVector;
    104     Vector<int, 32> nonReturnedOvector;
    105 
    106     nonReturnedOvector.resize(offsetVectorSize);
    107     offsetVector = nonReturnedOvector.data();
    108 
    109     ASSERT(offsetVector);
    110     for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
    111         offsetVector[j] = -1;
    112 
    113     int result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str.characters(), startFrom, str.length(), offsetVector);
    114     ASSERT(result >= -1);
    115 
    116     if (result < 0) {
    117         d->lastMatchLength = -1;
    118         return -1;
    119     }
    120 
    121     // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
    122     d->lastMatchLength = offsetVector[1] - offsetVector[0];
    123     if (matchLength)
    124         *matchLength = d->lastMatchLength;
    125     return offsetVector[0];
    126 }
    127 
    128 int RegularExpression::searchRev(const String& str) const
    129 {
    130     // FIXME: This could be faster if it actually searched backwards.
    131     // Instead, it just searches forwards, multiple times until it finds the last match.
    132 
    133     int start = 0;
    134     int pos;
    135     int lastPos = -1;
    136     int lastMatchLength = -1;
    137     do {
    138         int matchLength;
    139         pos = match(str, start, &matchLength);
    140         if (pos >= 0) {
    141             if (pos + matchLength > lastPos + lastMatchLength) {
    142                 // replace last match if this one is later and not a subset of the last match
    143                 lastPos = pos;
    144                 lastMatchLength = matchLength;
    145             }
    146             start = pos + 1;
    147         }
    148     } while (pos != -1);
    149     d->lastMatchLength = lastMatchLength;
    150     return lastPos;
    151 }
    152 
    153 int RegularExpression::matchedLength() const
    154 {
    155     return d->lastMatchLength;
    156 }
    157 
    158 void replace(String& string, const RegularExpression& target, const String& replacement)
    159 {
    160     int index = 0;
    161     while (index < static_cast<int>(string.length())) {
    162         int matchLength;
    163         index = target.match(string, index, &matchLength);
    164         if (index < 0)
    165             break;
    166         string.replace(index, matchLength, replacement);
    167         index += replacement.length();
    168         if (!matchLength)
    169             break;  // Avoid infinite loop on 0-length matches, e.g. [a-z]*
    170     }
    171 }
    172 
    173 } // namespace WebCore
    174