Home | History | Annotate | Download | only in runtime
      1 /*
      2  *  Copyright (C) 1999-2001, 2004 Harri Porten (porten (at) kde.org)
      3  *  Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
      4  *  Copyright (C) 2009 Torch Mobile, Inc.
      5  *  Copyright (C) 2010 Peter Varga (pvarga (at) inf.u-szeged.hu), University of Szeged
      6  *
      7  *  This library is free software; you can redistribute it and/or
      8  *  modify it under the terms of the GNU Lesser General Public
      9  *  License as published by the Free Software Foundation; either
     10  *  version 2 of the License, or (at your option) any later version.
     11  *
     12  *  This library is distributed in the hope that it will be useful,
     13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  *  Lesser General Public License for more details.
     16  *
     17  *  You should have received a copy of the GNU Lesser General Public
     18  *  License along with this library; if not, write to the Free Software
     19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
     20  *
     21  */
     22 
     23 #include "config.h"
     24 #include "RegExp.h"
     25 
     26 #include "Lexer.h"
     27 #include "yarr/Yarr.h"
     28 #include "yarr/YarrJIT.h"
     29 #include <stdio.h>
     30 #include <stdlib.h>
     31 #include <string.h>
     32 #include <wtf/Assertions.h>
     33 #include <wtf/OwnArrayPtr.h>
     34 
     35 namespace JSC {
     36 
     37 RegExpFlags regExpFlags(const UString& string)
     38 {
     39     RegExpFlags flags = NoFlags;
     40 
     41     for (unsigned i = 0; i < string.length(); ++i) {
     42         switch (string.characters()[i]) {
     43         case 'g':
     44             if (flags & FlagGlobal)
     45                 return InvalidFlags;
     46             flags = static_cast<RegExpFlags>(flags | FlagGlobal);
     47             break;
     48 
     49         case 'i':
     50             if (flags & FlagIgnoreCase)
     51                 return InvalidFlags;
     52             flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
     53             break;
     54 
     55         case 'm':
     56             if (flags & FlagMultiline)
     57                 return InvalidFlags;
     58             flags = static_cast<RegExpFlags>(flags | FlagMultiline);
     59             break;
     60 
     61         default:
     62             return InvalidFlags;
     63         }
     64     }
     65 
     66     return flags;
     67 }
     68 
     69 struct RegExpRepresentation {
     70 #if ENABLE(YARR_JIT)
     71     Yarr::YarrCodeBlock m_regExpJITCode;
     72 #endif
     73     OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
     74 };
     75 
     76 inline RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
     77     : m_patternString(patternString)
     78     , m_flags(flags)
     79     , m_constructionError(0)
     80     , m_numSubpatterns(0)
     81 #if ENABLE(REGEXP_TRACING)
     82     , m_rtMatchCallCount(0)
     83     , m_rtMatchFoundCount(0)
     84 #endif
     85     , m_representation(adoptPtr(new RegExpRepresentation))
     86 {
     87     m_state = compile(globalData);
     88 }
     89 
     90 RegExp::~RegExp()
     91 {
     92 }
     93 
     94 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
     95 {
     96     RefPtr<RegExp> res = adoptRef(new RegExp(globalData, patternString, flags));
     97 #if ENABLE(REGEXP_TRACING)
     98     globalData->addRegExpToTrace(res);
     99 #endif
    100     return res.release();
    101 }
    102 
    103 RegExp::RegExpState RegExp::compile(JSGlobalData* globalData)
    104 {
    105     Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
    106     if (m_constructionError)
    107         return ParseError;
    108 
    109     m_numSubpatterns = pattern.m_numSubpatterns;
    110 
    111     RegExpState res = ByteCode;
    112 
    113 #if ENABLE(YARR_JIT)
    114     if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
    115         Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
    116 #if ENABLE(YARR_JIT_DEBUG)
    117         if (!m_representation->m_regExpJITCode.isFallBack())
    118             res = JITCode;
    119         else
    120             res = ByteCode;
    121 #else
    122         if (!m_representation->m_regExpJITCode.isFallBack())
    123             return JITCode;
    124 #endif
    125     }
    126 #endif
    127 
    128     m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
    129 
    130     return res;
    131 }
    132 
    133 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
    134 {
    135     if (startOffset < 0)
    136         startOffset = 0;
    137 
    138 #if ENABLE(REGEXP_TRACING)
    139     m_rtMatchCallCount++;
    140 #endif
    141 
    142     if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
    143         return -1;
    144 
    145     if (m_state != ParseError) {
    146         int offsetVectorSize = (m_numSubpatterns + 1) * 2;
    147         int* offsetVector;
    148         Vector<int, 32> nonReturnedOvector;
    149         if (ovector) {
    150             ovector->resize(offsetVectorSize);
    151             offsetVector = ovector->data();
    152         } else {
    153             nonReturnedOvector.resize(offsetVectorSize);
    154             offsetVector = nonReturnedOvector.data();
    155         }
    156 
    157         ASSERT(offsetVector);
    158         // Initialize offsetVector with the return value (index 0) and the
    159         // first subpattern start indicies (even index values) set to -1.
    160         // No need to init the subpattern end indicies.
    161         for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
    162             offsetVector[j] = -1;
    163 
    164         int result;
    165 #if ENABLE(YARR_JIT)
    166         if (m_state == JITCode) {
    167             result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
    168 #if ENABLE(YARR_JIT_DEBUG)
    169             matchCompareWithInterpreter(s, startOffset, offsetVector, result);
    170 #endif
    171         } else
    172 #endif
    173             result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
    174         ASSERT(result >= -1);
    175 
    176 #if ENABLE(REGEXP_TRACING)
    177         if (result != -1)
    178             m_rtMatchFoundCount++;
    179 #endif
    180 
    181         return result;
    182     }
    183 
    184     return -1;
    185 }
    186 
    187 
    188 #if ENABLE(YARR_JIT_DEBUG)
    189 void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult)
    190 {
    191     int offsetVectorSize = (m_numSubpatterns + 1) * 2;
    192     Vector<int, 32> interpreterOvector;
    193     interpreterOvector.resize(offsetVectorSize);
    194     int* interpreterOffsetVector = interpreterOvector.data();
    195     int interpreterResult = 0;
    196     int differences = 0;
    197 
    198     // Initialize interpreterOffsetVector with the return value (index 0) and the
    199     // first subpattern start indicies (even index values) set to -1.
    200     // No need to init the subpattern end indicies.
    201     for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
    202         interpreterOffsetVector[j] = -1;
    203 
    204     interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
    205 
    206     if (jitResult != interpreterResult)
    207         differences++;
    208 
    209     for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
    210         if ((offsetVector[j] != interpreterOffsetVector[j])
    211             || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
    212             differences++;
    213 
    214     if (differences) {
    215         fprintf(stderr, "RegExp Discrepency for /%s/\n    string input ", pattern().utf8().data());
    216         unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
    217 
    218         fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
    219 
    220         if (jitResult != interpreterResult) {
    221             fprintf(stderr, "    JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
    222             differences--;
    223         } else {
    224             fprintf(stderr, "    Correct result = %d\n", jitResult);
    225         }
    226 
    227         if (differences) {
    228             for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
    229                 if (offsetVector[j] != interpreterOffsetVector[j])
    230                     fprintf(stderr, "    JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
    231                 if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
    232                     fprintf(stderr, "    JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
    233             }
    234         }
    235     }
    236 }
    237 #endif
    238 
    239 #if ENABLE(REGEXP_TRACING)
    240     void RegExp::printTraceData()
    241     {
    242         char formattedPattern[41];
    243         char rawPattern[41];
    244 
    245         strncpy(rawPattern, pattern().utf8().data(), 40);
    246         rawPattern[40]= '\0';
    247 
    248         int pattLen = strlen(rawPattern);
    249 
    250         snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
    251 
    252 #if ENABLE(YARR_JIT)
    253         Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
    254 
    255         const size_t jitAddrSize = 20;
    256         char jitAddr[jitAddrSize];
    257         if (m_state == JITCode)
    258             snprintf(jitAddr, jitAddrSize, "fallback");
    259         else
    260             snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
    261 #else
    262         const char* jitAddr = "JIT Off";
    263 #endif
    264 
    265         printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
    266     }
    267 #endif
    268 
    269 } // namespace JSC
    270