Home | History | Annotate | Download | only in runtime
      1 /*
      2  *  Copyright (C) 1999-2001, 2004 Harri Porten (porten (at) kde.org)
      3  *  Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
      4  *  Copyright (C) 2009 Torch Mobile, Inc.
      5  *
      6  *  This library is free software; you can redistribute it and/or
      7  *  modify it under the terms of the GNU Lesser General Public
      8  *  License as published by the Free Software Foundation; either
      9  *  version 2 of the License, or (at your option) any later version.
     10  *
     11  *  This library is distributed in the hope that it will be useful,
     12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  *  Lesser General Public License for more details.
     15  *
     16  *  You should have received a copy of the GNU Lesser General Public
     17  *  License along with this library; if not, write to the Free Software
     18  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
     19  *
     20  */
     21 
     22 #include "config.h"
     23 #include "RegExp.h"
     24 #include "Lexer.h"
     25 #include <stdio.h>
     26 #include <stdlib.h>
     27 #include <string.h>
     28 #include <wtf/Assertions.h>
     29 #include <wtf/OwnArrayPtr.h>
     30 
     31 
     32 #if ENABLE(YARR)
     33 
     34 #include "yarr/RegexCompiler.h"
     35 #if ENABLE(YARR_JIT)
     36 #include "yarr/RegexJIT.h"
     37 #else
     38 #include "yarr/RegexInterpreter.h"
     39 #endif
     40 
     41 #else
     42 
     43 #if ENABLE(WREC)
     44 #include "JIT.h"
     45 #include "WRECGenerator.h"
     46 #endif
     47 #include <pcre/pcre.h>
     48 
     49 #endif
     50 
     51 namespace JSC {
     52 
     53 #if ENABLE(WREC)
     54 using namespace WREC;
     55 #endif
     56 
     57 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
     58     : m_pattern(pattern)
     59     , m_flagBits(0)
     60     , m_constructionError(0)
     61     , m_numSubpatterns(0)
     62 {
     63     compile(globalData);
     64 }
     65 
     66 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
     67     : m_pattern(pattern)
     68     , m_flagBits(0)
     69     , m_constructionError(0)
     70     , m_numSubpatterns(0)
     71 {
     72     // NOTE: The global flag is handled on a case-by-case basis by functions like
     73     // String::match and RegExpObject::match.
     74     if (flags.find('g') != -1)
     75         m_flagBits |= Global;
     76     if (flags.find('i') != -1)
     77         m_flagBits |= IgnoreCase;
     78     if (flags.find('m') != -1)
     79         m_flagBits |= Multiline;
     80 
     81     compile(globalData);
     82 }
     83 
     84 #if !ENABLE(YARR)
     85 RegExp::~RegExp()
     86 {
     87     jsRegExpFree(m_regExp);
     88 }
     89 #endif
     90 
     91 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
     92 {
     93     return adoptRef(new RegExp(globalData, pattern));
     94 }
     95 
     96 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
     97 {
     98     return adoptRef(new RegExp(globalData, pattern, flags));
     99 }
    100 
    101 #if ENABLE(YARR)
    102 
    103 void RegExp::compile(JSGlobalData* globalData)
    104 {
    105 #if ENABLE(YARR_JIT)
    106     Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
    107 #else
    108     UNUSED_PARAM(globalData);
    109     m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
    110 #endif
    111 }
    112 
    113 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
    114 {
    115     if (startOffset < 0)
    116         startOffset = 0;
    117     if (ovector)
    118         ovector->clear();
    119 
    120     if (startOffset > s.size() || s.isNull())
    121         return -1;
    122 
    123 #if ENABLE(YARR_JIT)
    124     if (!!m_regExpJITCode) {
    125 #else
    126     if (m_regExpBytecode) {
    127 #endif
    128         int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
    129         int* offsetVector;
    130         Vector<int, 32> nonReturnedOvector;
    131         if (ovector) {
    132             ovector->resize(offsetVectorSize);
    133             offsetVector = ovector->data();
    134         } else {
    135             nonReturnedOvector.resize(offsetVectorSize);
    136             offsetVector = nonReturnedOvector.data();
    137         }
    138 
    139         ASSERT(offsetVector);
    140         for (int j = 0; j < offsetVectorSize; ++j)
    141             offsetVector[j] = -1;
    142 
    143 
    144 #if ENABLE(YARR_JIT)
    145         int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
    146 #else
    147         int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
    148 #endif
    149 
    150         if (result < 0) {
    151 #ifndef NDEBUG
    152             // TODO: define up a symbol, rather than magic -1
    153             if (result != -1)
    154                 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
    155 #endif
    156             if (ovector)
    157                 ovector->clear();
    158         }
    159         return result;
    160     }
    161 
    162     return -1;
    163 }
    164 
    165 #else
    166 
    167 void RegExp::compile(JSGlobalData* globalData)
    168 {
    169     m_regExp = 0;
    170 #if ENABLE(WREC)
    171     m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline());
    172     if (m_wrecFunction || m_constructionError)
    173         return;
    174     // Fall through to non-WREC case.
    175 #else
    176     UNUSED_PARAM(globalData);
    177 #endif
    178 
    179     JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
    180     JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
    181     m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
    182 }
    183 
    184 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
    185 {
    186     if (startOffset < 0)
    187         startOffset = 0;
    188     if (ovector)
    189         ovector->clear();
    190 
    191     if (startOffset > s.size() || s.isNull())
    192         return -1;
    193 
    194 #if ENABLE(WREC)
    195     if (m_wrecFunction) {
    196         int offsetVectorSize = (m_numSubpatterns + 1) * 2;
    197         int* offsetVector;
    198         Vector<int, 32> nonReturnedOvector;
    199         if (ovector) {
    200             ovector->resize(offsetVectorSize);
    201             offsetVector = ovector->data();
    202         } else {
    203             nonReturnedOvector.resize(offsetVectorSize);
    204             offsetVector = nonReturnedOvector.data();
    205         }
    206         ASSERT(offsetVector);
    207         for (int j = 0; j < offsetVectorSize; ++j)
    208             offsetVector[j] = -1;
    209 
    210         int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector);
    211 
    212         if (result < 0) {
    213 #ifndef NDEBUG
    214             // TODO: define up a symbol, rather than magic -1
    215             if (result != -1)
    216                 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
    217 #endif
    218             if (ovector)
    219                 ovector->clear();
    220         }
    221         return result;
    222     } else
    223 #endif
    224     if (m_regExp) {
    225         // Set up the offset vector for the result.
    226         // First 2/3 used for result, the last third used by PCRE.
    227         int* offsetVector;
    228         int offsetVectorSize;
    229         int fixedSizeOffsetVector[3];
    230         if (!ovector) {
    231             offsetVectorSize = 3;
    232             offsetVector = fixedSizeOffsetVector;
    233         } else {
    234             offsetVectorSize = (m_numSubpatterns + 1) * 3;
    235             ovector->resize(offsetVectorSize);
    236             offsetVector = ovector->data();
    237         }
    238 
    239         int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
    240 
    241         if (numMatches < 0) {
    242 #ifndef NDEBUG
    243             if (numMatches != JSRegExpErrorNoMatch)
    244                 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
    245 #endif
    246             if (ovector)
    247                 ovector->clear();
    248             return -1;
    249         }
    250 
    251         return offsetVector[0];
    252     }
    253 
    254     return -1;
    255 }
    256 
    257 #endif
    258 
    259 } // namespace JSC
    260