1 /* 2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. 3 * Copyright (C) 2008 Collabora Ltd. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "RegularExpression.h" 29 30 #include "Logging.h" 31 #include <pcre/pcre.h> 32 33 namespace WebCore { 34 35 class RegularExpression::Private : public RefCounted<RegularExpression::Private> { 36 public: 37 static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity); 38 ~Private(); 39 40 JSRegExp* regexp() const { return m_regexp; } 41 int lastMatchLength; 42 43 private: 44 Private(const String& pattern, TextCaseSensitivity); 45 static JSRegExp* compile(const String& pattern, TextCaseSensitivity); 46 47 JSRegExp* m_regexp; 48 }; 49 50 inline JSRegExp* RegularExpression::Private::compile(const String& pattern, TextCaseSensitivity caseSensitivity) 51 { 52 const char* errorMessage; 53 JSRegExp* regexp = jsRegExpCompile(pattern.characters(), pattern.length(), 54 caseSensitivity == TextCaseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine, 55 0, &errorMessage); 56 if (!regexp) 57 LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage); 58 return regexp; 59 } 60 61 inline RegularExpression::Private::Private(const String& pattern, TextCaseSensitivity caseSensitivity) 62 : lastMatchLength(-1) 63 , m_regexp(compile(pattern, caseSensitivity)) 64 { 65 } 66 67 inline PassRefPtr<RegularExpression::Private> RegularExpression::Private::create(const String& pattern, TextCaseSensitivity caseSensitivity) 68 { 69 return adoptRef(new Private(pattern, caseSensitivity)); 70 } 71 72 RegularExpression::Private::~Private() 73 { 74 jsRegExpFree(m_regexp); 75 } 76 77 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity) 78 : d(Private::create(pattern, caseSensitivity)) 79 { 80 } 81 82 RegularExpression::RegularExpression(const RegularExpression& re) 83 : d(re.d) 84 { 85 } 86 87 RegularExpression::~RegularExpression() 88 { 89 } 90 91 RegularExpression& RegularExpression::operator=(const RegularExpression& re) 92 { 93 d = re.d; 94 return *this; 95 } 96 97 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const 98 { 99 if (!d->regexp()) 100 return -1; 101 102 if (str.isNull()) 103 return -1; 104 105 // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre 106 static const size_t maxOffsets = 3; 107 int offsets[maxOffsets]; 108 int result = jsRegExpExecute(d->regexp(), str.characters(), str.length(), startFrom, offsets, maxOffsets); 109 if (result < 0) { 110 if (result != JSRegExpErrorNoMatch) 111 LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", result); 112 d->lastMatchLength = -1; 113 return -1; 114 } 115 116 // 1 means 1 match; 0 means more than one match. First match is recorded in offsets. 117 d->lastMatchLength = offsets[1] - offsets[0]; 118 if (matchLength) 119 *matchLength = d->lastMatchLength; 120 return offsets[0]; 121 } 122 123 int RegularExpression::searchRev(const String& str) const 124 { 125 // FIXME: This could be faster if it actually searched backwards. 126 // Instead, it just searches forwards, multiple times until it finds the last match. 127 128 int start = 0; 129 int pos; 130 int lastPos = -1; 131 int lastMatchLength = -1; 132 do { 133 int matchLength; 134 pos = match(str, start, &matchLength); 135 if (pos >= 0) { 136 if (pos + matchLength > lastPos + lastMatchLength) { 137 // replace last match if this one is later and not a subset of the last match 138 lastPos = pos; 139 lastMatchLength = matchLength; 140 } 141 start = pos + 1; 142 } 143 } while (pos != -1); 144 d->lastMatchLength = lastMatchLength; 145 return lastPos; 146 } 147 148 int RegularExpression::matchedLength() const 149 { 150 return d->lastMatchLength; 151 } 152 153 void replace(String& string, const RegularExpression& target, const String& replacement) 154 { 155 int index = 0; 156 while (index < static_cast<int>(string.length())) { 157 int matchLength; 158 index = target.match(string, index, &matchLength); 159 if (index < 0) 160 break; 161 string.replace(index, matchLength, replacement); 162 index += replacement.length(); 163 if (!matchLength) 164 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* 165 } 166 } 167 168 } // namespace WebCore 169