1 /* 2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten (at) kde.org) 3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Torch Mobile, Inc. 5 * Copyright (C) 2010 Peter Varga (pvarga (at) inf.u-szeged.hu), University of Szeged 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this library; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 * 21 */ 22 23 #include "config.h" 24 #include "RegExp.h" 25 26 #include "Lexer.h" 27 #include "yarr/Yarr.h" 28 #include "yarr/YarrJIT.h" 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <wtf/Assertions.h> 33 #include <wtf/OwnArrayPtr.h> 34 35 namespace JSC { 36 37 RegExpFlags regExpFlags(const UString& string) 38 { 39 RegExpFlags flags = NoFlags; 40 41 for (unsigned i = 0; i < string.length(); ++i) { 42 switch (string.characters()[i]) { 43 case 'g': 44 if (flags & FlagGlobal) 45 return InvalidFlags; 46 flags = static_cast<RegExpFlags>(flags | FlagGlobal); 47 break; 48 49 case 'i': 50 if (flags & FlagIgnoreCase) 51 return InvalidFlags; 52 flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase); 53 break; 54 55 case 'm': 56 if (flags & FlagMultiline) 57 return InvalidFlags; 58 flags = static_cast<RegExpFlags>(flags | FlagMultiline); 59 break; 60 61 default: 62 return InvalidFlags; 63 } 64 } 65 66 return flags; 67 } 68 69 struct RegExpRepresentation { 70 #if ENABLE(YARR_JIT) 71 Yarr::YarrCodeBlock m_regExpJITCode; 72 #endif 73 OwnPtr<Yarr::BytecodePattern> m_regExpBytecode; 74 }; 75 76 inline RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) 77 : m_patternString(patternString) 78 , m_flags(flags) 79 , m_constructionError(0) 80 , m_numSubpatterns(0) 81 #if ENABLE(REGEXP_TRACING) 82 , m_rtMatchCallCount(0) 83 , m_rtMatchFoundCount(0) 84 #endif 85 , m_representation(adoptPtr(new RegExpRepresentation)) 86 { 87 m_state = compile(globalData); 88 } 89 90 RegExp::~RegExp() 91 { 92 } 93 94 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags) 95 { 96 RefPtr<RegExp> res = adoptRef(new RegExp(globalData, patternString, flags)); 97 #if ENABLE(REGEXP_TRACING) 98 globalData->addRegExpToTrace(res); 99 #endif 100 return res.release(); 101 } 102 103 RegExp::RegExpState RegExp::compile(JSGlobalData* globalData) 104 { 105 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); 106 if (m_constructionError) 107 return ParseError; 108 109 m_numSubpatterns = pattern.m_numSubpatterns; 110 111 RegExpState res = ByteCode; 112 113 #if ENABLE(YARR_JIT) 114 if (!pattern.m_containsBackreferences && globalData->canUseJIT()) { 115 Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode); 116 #if ENABLE(YARR_JIT_DEBUG) 117 if (!m_representation->m_regExpJITCode.isFallBack()) 118 res = JITCode; 119 else 120 res = ByteCode; 121 #else 122 if (!m_representation->m_regExpJITCode.isFallBack()) 123 return JITCode; 124 #endif 125 } 126 #endif 127 128 m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator); 129 130 return res; 131 } 132 133 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) 134 { 135 if (startOffset < 0) 136 startOffset = 0; 137 138 #if ENABLE(REGEXP_TRACING) 139 m_rtMatchCallCount++; 140 #endif 141 142 if (static_cast<unsigned>(startOffset) > s.length() || s.isNull()) 143 return -1; 144 145 if (m_state != ParseError) { 146 int offsetVectorSize = (m_numSubpatterns + 1) * 2; 147 int* offsetVector; 148 Vector<int, 32> nonReturnedOvector; 149 if (ovector) { 150 ovector->resize(offsetVectorSize); 151 offsetVector = ovector->data(); 152 } else { 153 nonReturnedOvector.resize(offsetVectorSize); 154 offsetVector = nonReturnedOvector.data(); 155 } 156 157 ASSERT(offsetVector); 158 // Initialize offsetVector with the return value (index 0) and the 159 // first subpattern start indicies (even index values) set to -1. 160 // No need to init the subpattern end indicies. 161 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) 162 offsetVector[j] = -1; 163 164 int result; 165 #if ENABLE(YARR_JIT) 166 if (m_state == JITCode) { 167 result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector); 168 #if ENABLE(YARR_JIT_DEBUG) 169 matchCompareWithInterpreter(s, startOffset, offsetVector, result); 170 #endif 171 } else 172 #endif 173 result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector); 174 ASSERT(result >= -1); 175 176 #if ENABLE(REGEXP_TRACING) 177 if (result != -1) 178 m_rtMatchFoundCount++; 179 #endif 180 181 return result; 182 } 183 184 return -1; 185 } 186 187 188 #if ENABLE(YARR_JIT_DEBUG) 189 void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult) 190 { 191 int offsetVectorSize = (m_numSubpatterns + 1) * 2; 192 Vector<int, 32> interpreterOvector; 193 interpreterOvector.resize(offsetVectorSize); 194 int* interpreterOffsetVector = interpreterOvector.data(); 195 int interpreterResult = 0; 196 int differences = 0; 197 198 // Initialize interpreterOffsetVector with the return value (index 0) and the 199 // first subpattern start indicies (even index values) set to -1. 200 // No need to init the subpattern end indicies. 201 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) 202 interpreterOffsetVector[j] = -1; 203 204 interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector); 205 206 if (jitResult != interpreterResult) 207 differences++; 208 209 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) 210 if ((offsetVector[j] != interpreterOffsetVector[j]) 211 || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))) 212 differences++; 213 214 if (differences) { 215 fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); 216 unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset); 217 218 fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); 219 220 if (jitResult != interpreterResult) { 221 fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); 222 differences--; 223 } else { 224 fprintf(stderr, " Correct result = %d\n", jitResult); 225 } 226 227 if (differences) { 228 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) { 229 if (offsetVector[j] != interpreterOffsetVector[j]) 230 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); 231 if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])) 232 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); 233 } 234 } 235 } 236 } 237 #endif 238 239 #if ENABLE(REGEXP_TRACING) 240 void RegExp::printTraceData() 241 { 242 char formattedPattern[41]; 243 char rawPattern[41]; 244 245 strncpy(rawPattern, pattern().utf8().data(), 40); 246 rawPattern[40]= '\0'; 247 248 int pattLen = strlen(rawPattern); 249 250 snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern); 251 252 #if ENABLE(YARR_JIT) 253 Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode; 254 255 const size_t jitAddrSize = 20; 256 char jitAddr[jitAddrSize]; 257 if (m_state == JITCode) 258 snprintf(jitAddr, jitAddrSize, "fallback"); 259 else 260 snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr())); 261 #else 262 const char* jitAddr = "JIT Off"; 263 #endif 264 265 printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount); 266 } 267 #endif 268 269 } // namespace JSC 270