1 /* 2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten (at) kde.org) 3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Torch Mobile, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 */ 21 22 #include "config.h" 23 #include "RegExp.h" 24 #include "Lexer.h" 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <wtf/Assertions.h> 29 #include <wtf/OwnArrayPtr.h> 30 31 32 #if ENABLE(YARR) 33 34 #include "yarr/RegexCompiler.h" 35 #if ENABLE(YARR_JIT) 36 #include "yarr/RegexJIT.h" 37 #else 38 #include "yarr/RegexInterpreter.h" 39 #endif 40 41 #else 42 43 #if ENABLE(WREC) 44 #include "JIT.h" 45 #include "WRECGenerator.h" 46 #endif 47 #include <pcre/pcre.h> 48 49 #endif 50 51 namespace JSC { 52 53 #if ENABLE(WREC) 54 using namespace WREC; 55 #endif 56 57 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern) 58 : m_pattern(pattern) 59 , m_flagBits(0) 60 , m_constructionError(0) 61 , m_numSubpatterns(0) 62 { 63 compile(globalData); 64 } 65 66 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags) 67 : m_pattern(pattern) 68 , m_flagBits(0) 69 , m_constructionError(0) 70 , m_numSubpatterns(0) 71 { 72 // NOTE: The global flag is handled on a case-by-case basis by functions like 73 // String::match and RegExpObject::match. 74 if (flags.find('g') != -1) 75 m_flagBits |= Global; 76 if (flags.find('i') != -1) 77 m_flagBits |= IgnoreCase; 78 if (flags.find('m') != -1) 79 m_flagBits |= Multiline; 80 81 compile(globalData); 82 } 83 84 #if !ENABLE(YARR) 85 RegExp::~RegExp() 86 { 87 jsRegExpFree(m_regExp); 88 } 89 #endif 90 91 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern) 92 { 93 return adoptRef(new RegExp(globalData, pattern)); 94 } 95 96 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags) 97 { 98 return adoptRef(new RegExp(globalData, pattern, flags)); 99 } 100 101 #if ENABLE(YARR) 102 103 void RegExp::compile(JSGlobalData* globalData) 104 { 105 #if ENABLE(YARR_JIT) 106 Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()); 107 #else 108 UNUSED_PARAM(globalData); 109 m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline())); 110 #endif 111 } 112 113 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) 114 { 115 if (startOffset < 0) 116 startOffset = 0; 117 if (ovector) 118 ovector->clear(); 119 120 if (startOffset > s.size() || s.isNull()) 121 return -1; 122 123 #if ENABLE(YARR_JIT) 124 if (!!m_regExpJITCode) { 125 #else 126 if (m_regExpBytecode) { 127 #endif 128 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre. 129 int* offsetVector; 130 Vector<int, 32> nonReturnedOvector; 131 if (ovector) { 132 ovector->resize(offsetVectorSize); 133 offsetVector = ovector->data(); 134 } else { 135 nonReturnedOvector.resize(offsetVectorSize); 136 offsetVector = nonReturnedOvector.data(); 137 } 138 139 ASSERT(offsetVector); 140 for (int j = 0; j < offsetVectorSize; ++j) 141 offsetVector[j] = -1; 142 143 144 #if ENABLE(YARR_JIT) 145 int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize); 146 #else 147 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector); 148 #endif 149 150 if (result < 0) { 151 #ifndef NDEBUG 152 // TODO: define up a symbol, rather than magic -1 153 if (result != -1) 154 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result); 155 #endif 156 if (ovector) 157 ovector->clear(); 158 } 159 return result; 160 } 161 162 return -1; 163 } 164 165 #else 166 167 void RegExp::compile(JSGlobalData* globalData) 168 { 169 m_regExp = 0; 170 #if ENABLE(WREC) 171 m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline()); 172 if (m_wrecFunction || m_constructionError) 173 return; 174 // Fall through to non-WREC case. 175 #else 176 UNUSED_PARAM(globalData); 177 #endif 178 179 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; 180 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine; 181 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError); 182 } 183 184 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) 185 { 186 if (startOffset < 0) 187 startOffset = 0; 188 if (ovector) 189 ovector->clear(); 190 191 if (startOffset > s.size() || s.isNull()) 192 return -1; 193 194 #if ENABLE(WREC) 195 if (m_wrecFunction) { 196 int offsetVectorSize = (m_numSubpatterns + 1) * 2; 197 int* offsetVector; 198 Vector<int, 32> nonReturnedOvector; 199 if (ovector) { 200 ovector->resize(offsetVectorSize); 201 offsetVector = ovector->data(); 202 } else { 203 nonReturnedOvector.resize(offsetVectorSize); 204 offsetVector = nonReturnedOvector.data(); 205 } 206 ASSERT(offsetVector); 207 for (int j = 0; j < offsetVectorSize; ++j) 208 offsetVector[j] = -1; 209 210 int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector); 211 212 if (result < 0) { 213 #ifndef NDEBUG 214 // TODO: define up a symbol, rather than magic -1 215 if (result != -1) 216 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result); 217 #endif 218 if (ovector) 219 ovector->clear(); 220 } 221 return result; 222 } else 223 #endif 224 if (m_regExp) { 225 // Set up the offset vector for the result. 226 // First 2/3 used for result, the last third used by PCRE. 227 int* offsetVector; 228 int offsetVectorSize; 229 int fixedSizeOffsetVector[3]; 230 if (!ovector) { 231 offsetVectorSize = 3; 232 offsetVector = fixedSizeOffsetVector; 233 } else { 234 offsetVectorSize = (m_numSubpatterns + 1) * 3; 235 ovector->resize(offsetVectorSize); 236 offsetVector = ovector->data(); 237 } 238 239 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize); 240 241 if (numMatches < 0) { 242 #ifndef NDEBUG 243 if (numMatches != JSRegExpErrorNoMatch) 244 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches); 245 #endif 246 if (ovector) 247 ovector->clear(); 248 return -1; 249 } 250 251 return offsetVector[0]; 252 } 253 254 return -1; 255 } 256 257 #endif 258 259 } // namespace JSC 260