1 /* 2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. 3 * Copyright (C) 2008 Collabora Ltd. 4 * Copyright (C) 2011 Peter Varga (pvarga (at) webkit.org), University of Szeged 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 #include "RegularExpression.h" 30 31 #include <wtf/BumpPointerAllocator.h> 32 #include <yarr/Yarr.h> 33 #include "Logging.h" 34 35 namespace WebCore { 36 37 class RegularExpression::Private : public RefCounted<RegularExpression::Private> { 38 public: 39 static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity) 40 { 41 return adoptRef(new Private(pattern, caseSensitivity)); 42 } 43 44 int lastMatchLength; 45 46 unsigned m_numSubpatterns; 47 OwnPtr<JSC::Yarr::BytecodePattern> m_regExpByteCode; 48 49 private: 50 Private(const String& pattern, TextCaseSensitivity caseSensitivity) 51 : lastMatchLength(-1) 52 , m_regExpByteCode(compile(pattern, caseSensitivity)) 53 , m_constructionError(0) 54 { 55 } 56 57 PassOwnPtr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity) 58 { 59 JSC::Yarr::YarrPattern pattern(JSC::UString(patternString.impl()), (caseSensitivity == TextCaseInsensitive), false, &m_constructionError); 60 if (m_constructionError) { 61 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError); 62 return PassOwnPtr<JSC::Yarr::BytecodePattern>(); 63 } 64 65 m_numSubpatterns = pattern.m_numSubpatterns; 66 67 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator); 68 } 69 70 BumpPointerAllocator m_regexAllocator; 71 const char* m_constructionError; 72 }; 73 74 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity) 75 : d(Private::create(pattern, caseSensitivity)) 76 { 77 } 78 79 RegularExpression::RegularExpression(const RegularExpression& re) 80 : d(re.d) 81 { 82 } 83 84 RegularExpression::~RegularExpression() 85 { 86 } 87 88 RegularExpression& RegularExpression::operator=(const RegularExpression& re) 89 { 90 d = re.d; 91 return *this; 92 } 93 94 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const 95 { 96 if (!d->m_regExpByteCode) 97 return -1; 98 99 if (str.isNull()) 100 return -1; 101 102 int offsetVectorSize = (d->m_numSubpatterns + 1) * 2; 103 int* offsetVector; 104 Vector<int, 32> nonReturnedOvector; 105 106 nonReturnedOvector.resize(offsetVectorSize); 107 offsetVector = nonReturnedOvector.data(); 108 109 ASSERT(offsetVector); 110 for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++) 111 offsetVector[j] = -1; 112 113 int result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str.characters(), startFrom, str.length(), offsetVector); 114 ASSERT(result >= -1); 115 116 if (result < 0) { 117 d->lastMatchLength = -1; 118 return -1; 119 } 120 121 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector. 122 d->lastMatchLength = offsetVector[1] - offsetVector[0]; 123 if (matchLength) 124 *matchLength = d->lastMatchLength; 125 return offsetVector[0]; 126 } 127 128 int RegularExpression::searchRev(const String& str) const 129 { 130 // FIXME: This could be faster if it actually searched backwards. 131 // Instead, it just searches forwards, multiple times until it finds the last match. 132 133 int start = 0; 134 int pos; 135 int lastPos = -1; 136 int lastMatchLength = -1; 137 do { 138 int matchLength; 139 pos = match(str, start, &matchLength); 140 if (pos >= 0) { 141 if (pos + matchLength > lastPos + lastMatchLength) { 142 // replace last match if this one is later and not a subset of the last match 143 lastPos = pos; 144 lastMatchLength = matchLength; 145 } 146 start = pos + 1; 147 } 148 } while (pos != -1); 149 d->lastMatchLength = lastMatchLength; 150 return lastPos; 151 } 152 153 int RegularExpression::matchedLength() const 154 { 155 return d->lastMatchLength; 156 } 157 158 void replace(String& string, const RegularExpression& target, const String& replacement) 159 { 160 int index = 0; 161 while (index < static_cast<int>(string.length())) { 162 int matchLength; 163 index = target.match(string, index, &matchLength); 164 if (index < 0) 165 break; 166 string.replace(index, matchLength, replacement); 167 index += replacement.length(); 168 if (!matchLength) 169 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* 170 } 171 } 172 173 } // namespace WebCore 174