Home | History | Annotate | Download | only in src
      1 // Copyright 2008 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include "v8.h"
     29 #include "ast.h"
     30 #include "assembler.h"
     31 #include "regexp-stack.h"
     32 #include "regexp-macro-assembler.h"
     33 #include "simulator.h"
     34 
     35 namespace v8 {
     36 namespace internal {
     37 
     38 RegExpMacroAssembler::RegExpMacroAssembler() : slow_safe_compiler_(false) {
     39 }
     40 
     41 
     42 RegExpMacroAssembler::~RegExpMacroAssembler() {
     43 }
     44 
     45 
     46 bool RegExpMacroAssembler::CanReadUnaligned() {
     47 #ifdef V8_HOST_CAN_READ_UNALIGNED
     48   return true;
     49 #else
     50   return false;
     51 #endif
     52 }
     53 
     54 
     55 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
     56 
     57 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler()
     58     : RegExpMacroAssembler() {
     59 }
     60 
     61 
     62 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
     63 }
     64 
     65 
     66 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
     67 #ifdef V8_TARGET_CAN_READ_UNALIGNED
     68   return !slow_safe();
     69 #else
     70   return false;
     71 #endif
     72 }
     73 
     74 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
     75     String* subject,
     76     int start_index) {
     77   // Not just flat, but ultra flat.
     78   ASSERT(subject->IsExternalString() || subject->IsSeqString());
     79   ASSERT(start_index >= 0);
     80   ASSERT(start_index <= subject->length());
     81   if (subject->IsAsciiRepresentation()) {
     82     const byte* address;
     83     if (StringShape(subject).IsExternal()) {
     84       const char* data = ExternalAsciiString::cast(subject)->GetChars();
     85       address = reinterpret_cast<const byte*>(data);
     86     } else {
     87       ASSERT(subject->IsSeqAsciiString());
     88       char* data = SeqAsciiString::cast(subject)->GetChars();
     89       address = reinterpret_cast<const byte*>(data);
     90     }
     91     return address + start_index;
     92   }
     93   const uc16* data;
     94   if (StringShape(subject).IsExternal()) {
     95     data = ExternalTwoByteString::cast(subject)->GetChars();
     96   } else {
     97     ASSERT(subject->IsSeqTwoByteString());
     98     data = SeqTwoByteString::cast(subject)->GetChars();
     99   }
    100   return reinterpret_cast<const byte*>(data + start_index);
    101 }
    102 
    103 
    104 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
    105     Handle<Code> regexp_code,
    106     Handle<String> subject,
    107     int* offsets_vector,
    108     int offsets_vector_length,
    109     int previous_index,
    110     Isolate* isolate) {
    111 
    112   ASSERT(subject->IsFlat());
    113   ASSERT(previous_index >= 0);
    114   ASSERT(previous_index <= subject->length());
    115 
    116   // No allocations before calling the regexp, but we can't use
    117   // AssertNoAllocation, since regexps might be preempted, and another thread
    118   // might do allocation anyway.
    119 
    120   String* subject_ptr = *subject;
    121   // Character offsets into string.
    122   int start_offset = previous_index;
    123   int char_length = subject_ptr->length() - start_offset;
    124   int slice_offset = 0;
    125 
    126   // The string has been flattened, so if it is a cons string it contains the
    127   // full string in the first part.
    128   if (StringShape(subject_ptr).IsCons()) {
    129     ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
    130     subject_ptr = ConsString::cast(subject_ptr)->first();
    131   } else if (StringShape(subject_ptr).IsSliced()) {
    132     SlicedString* slice = SlicedString::cast(subject_ptr);
    133     subject_ptr = slice->parent();
    134     slice_offset = slice->offset();
    135   }
    136   // Ensure that an underlying string has the same ASCII-ness.
    137   bool is_ascii = subject_ptr->IsAsciiRepresentation();
    138   ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
    139   // String is now either Sequential or External
    140   int char_size_shift = is_ascii ? 0 : 1;
    141 
    142   const byte* input_start =
    143       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
    144   int byte_length = char_length << char_size_shift;
    145   const byte* input_end = input_start + byte_length;
    146   Result res = Execute(*regexp_code,
    147                        *subject,
    148                        start_offset,
    149                        input_start,
    150                        input_end,
    151                        offsets_vector,
    152                        isolate);
    153   return res;
    154 }
    155 
    156 
    157 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
    158     Code* code,
    159     String* input,  // This needs to be the unpacked (sliced, cons) string.
    160     int start_offset,
    161     const byte* input_start,
    162     const byte* input_end,
    163     int* output,
    164     Isolate* isolate) {
    165   ASSERT(isolate == Isolate::Current());
    166   // Ensure that the minimum stack has been allocated.
    167   RegExpStackScope stack_scope(isolate);
    168   Address stack_base = stack_scope.stack()->stack_base();
    169 
    170   int direct_call = 0;
    171   int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
    172                                           input,
    173                                           start_offset,
    174                                           input_start,
    175                                           input_end,
    176                                           output,
    177                                           stack_base,
    178                                           direct_call,
    179                                           isolate);
    180   ASSERT(result <= SUCCESS);
    181   ASSERT(result >= RETRY);
    182 
    183   if (result == EXCEPTION && !isolate->has_pending_exception()) {
    184     // We detected a stack overflow (on the backtrack stack) in RegExp code,
    185     // but haven't created the exception yet.
    186     isolate->StackOverflow();
    187   }
    188   return static_cast<Result>(result);
    189 }
    190 
    191 
    192 const byte NativeRegExpMacroAssembler::word_character_map[] = {
    193     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    194     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    196     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    197 
    198     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    199     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    200     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
    201     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
    202 
    203     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
    204     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
    205     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
    206     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
    207 
    208     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
    209     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
    210     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
    211     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
    212 };
    213 
    214 
    215 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
    216     Address byte_offset1,
    217     Address byte_offset2,
    218     size_t byte_length,
    219     Isolate* isolate) {
    220   ASSERT(isolate == Isolate::Current());
    221   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
    222       isolate->regexp_macro_assembler_canonicalize();
    223   // This function is not allowed to cause a garbage collection.
    224   // A GC might move the calling generated code and invalidate the
    225   // return address on the stack.
    226   ASSERT(byte_length % 2 == 0);
    227   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
    228   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
    229   size_t length = byte_length >> 1;
    230 
    231   for (size_t i = 0; i < length; i++) {
    232     unibrow::uchar c1 = substring1[i];
    233     unibrow::uchar c2 = substring2[i];
    234     if (c1 != c2) {
    235       unibrow::uchar s1[1] = { c1 };
    236       canonicalize->get(c1, '\0', s1);
    237       if (s1[0] != c2) {
    238         unibrow::uchar s2[1] = { c2 };
    239         canonicalize->get(c2, '\0', s2);
    240         if (s1[0] != s2[0]) {
    241           return 0;
    242         }
    243       }
    244     }
    245   }
    246   return 1;
    247 }
    248 
    249 
    250 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
    251                                               Address* stack_base,
    252                                               Isolate* isolate) {
    253   ASSERT(isolate == Isolate::Current());
    254   RegExpStack* regexp_stack = isolate->regexp_stack();
    255   size_t size = regexp_stack->stack_capacity();
    256   Address old_stack_base = regexp_stack->stack_base();
    257   ASSERT(old_stack_base == *stack_base);
    258   ASSERT(stack_pointer <= old_stack_base);
    259   ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
    260   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
    261   if (new_stack_base == NULL) {
    262     return NULL;
    263   }
    264   *stack_base = new_stack_base;
    265   intptr_t stack_content_size = old_stack_base - stack_pointer;
    266   return new_stack_base - stack_content_size;
    267 }
    268 
    269 #endif  // V8_INTERPRETED_REGEXP
    270 
    271 } }  // namespace v8::internal
    272