Home | History | Annotate | Download | only in src
      1 // Copyright 2008 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include "v8.h"
     29 #include "ast.h"
     30 #include "assembler.h"
     31 #include "regexp-stack.h"
     32 #include "regexp-macro-assembler.h"
     33 #include "simulator.h"
     34 
     35 namespace v8 {
     36 namespace internal {
     37 
     38 RegExpMacroAssembler::RegExpMacroAssembler() : slow_safe_compiler_(false) {
     39 }
     40 
     41 
     42 RegExpMacroAssembler::~RegExpMacroAssembler() {
     43 }
     44 
     45 
     46 bool RegExpMacroAssembler::CanReadUnaligned() {
     47 #ifdef V8_HOST_CAN_READ_UNALIGNED
     48   return true;
     49 #else
     50   return false;
     51 #endif
     52 }
     53 
     54 
     55 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
     56 
     57 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler()
     58     : RegExpMacroAssembler() {
     59 }
     60 
     61 
     62 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
     63 }
     64 
     65 
     66 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
     67 #ifdef V8_TARGET_CAN_READ_UNALIGNED
     68   return !slow_safe();
     69 #else
     70   return false;
     71 #endif
     72 }
     73 
     74 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
     75     String* subject,
     76     int start_index) {
     77   // Not just flat, but ultra flat.
     78   ASSERT(subject->IsExternalString() || subject->IsSeqString());
     79   ASSERT(start_index >= 0);
     80   ASSERT(start_index <= subject->length());
     81   if (subject->IsAsciiRepresentation()) {
     82     const byte* address;
     83     if (StringShape(subject).IsExternal()) {
     84       const char* data = ExternalAsciiString::cast(subject)->resource()->data();
     85       address = reinterpret_cast<const byte*>(data);
     86     } else {
     87       ASSERT(subject->IsSeqAsciiString());
     88       char* data = SeqAsciiString::cast(subject)->GetChars();
     89       address = reinterpret_cast<const byte*>(data);
     90     }
     91     return address + start_index;
     92   }
     93   const uc16* data;
     94   if (StringShape(subject).IsExternal()) {
     95     data = ExternalTwoByteString::cast(subject)->resource()->data();
     96   } else {
     97     ASSERT(subject->IsSeqTwoByteString());
     98     data = SeqTwoByteString::cast(subject)->GetChars();
     99   }
    100   return reinterpret_cast<const byte*>(data + start_index);
    101 }
    102 
    103 
    104 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
    105     Handle<Code> regexp_code,
    106     Handle<String> subject,
    107     int* offsets_vector,
    108     int offsets_vector_length,
    109     int previous_index,
    110     Isolate* isolate) {
    111 
    112   ASSERT(subject->IsFlat());
    113   ASSERT(previous_index >= 0);
    114   ASSERT(previous_index <= subject->length());
    115 
    116   // No allocations before calling the regexp, but we can't use
    117   // AssertNoAllocation, since regexps might be preempted, and another thread
    118   // might do allocation anyway.
    119 
    120   String* subject_ptr = *subject;
    121   // Character offsets into string.
    122   int start_offset = previous_index;
    123   int end_offset = subject_ptr->length();
    124 
    125   // The string has been flattened, so it it is a cons string it contains the
    126   // full string in the first part.
    127   if (StringShape(subject_ptr).IsCons()) {
    128     ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
    129     subject_ptr = ConsString::cast(subject_ptr)->first();
    130   }
    131   // Ensure that an underlying string has the same ascii-ness.
    132   bool is_ascii = subject_ptr->IsAsciiRepresentation();
    133   ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
    134   // String is now either Sequential or External
    135   int char_size_shift = is_ascii ? 0 : 1;
    136   int char_length = end_offset - start_offset;
    137 
    138   const byte* input_start =
    139       StringCharacterPosition(subject_ptr, start_offset);
    140   int byte_length = char_length << char_size_shift;
    141   const byte* input_end = input_start + byte_length;
    142   Result res = Execute(*regexp_code,
    143                        subject_ptr,
    144                        start_offset,
    145                        input_start,
    146                        input_end,
    147                        offsets_vector,
    148                        isolate);
    149   return res;
    150 }
    151 
    152 
    153 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
    154     Code* code,
    155     String* input,
    156     int start_offset,
    157     const byte* input_start,
    158     const byte* input_end,
    159     int* output,
    160     Isolate* isolate) {
    161   ASSERT(isolate == Isolate::Current());
    162   // Ensure that the minimum stack has been allocated.
    163   RegExpStackScope stack_scope(isolate);
    164   Address stack_base = stack_scope.stack()->stack_base();
    165 
    166   int direct_call = 0;
    167   int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
    168                                           input,
    169                                           start_offset,
    170                                           input_start,
    171                                           input_end,
    172                                           output,
    173                                           stack_base,
    174                                           direct_call,
    175                                           isolate);
    176   ASSERT(result <= SUCCESS);
    177   ASSERT(result >= RETRY);
    178 
    179   if (result == EXCEPTION && !isolate->has_pending_exception()) {
    180     // We detected a stack overflow (on the backtrack stack) in RegExp code,
    181     // but haven't created the exception yet.
    182     isolate->StackOverflow();
    183   }
    184   return static_cast<Result>(result);
    185 }
    186 
    187 
    188 const byte NativeRegExpMacroAssembler::word_character_map[] = {
    189     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    190     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    191     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    192     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    193 
    194     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    196     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
    197     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
    198 
    199     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
    200     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
    201     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
    202     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
    203 
    204     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
    205     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
    206     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
    207     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
    208 };
    209 
    210 
    211 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
    212     Address byte_offset1,
    213     Address byte_offset2,
    214     size_t byte_length,
    215     Isolate* isolate) {
    216   ASSERT(isolate == Isolate::Current());
    217   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
    218       isolate->regexp_macro_assembler_canonicalize();
    219   // This function is not allowed to cause a garbage collection.
    220   // A GC might move the calling generated code and invalidate the
    221   // return address on the stack.
    222   ASSERT(byte_length % 2 == 0);
    223   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
    224   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
    225   size_t length = byte_length >> 1;
    226 
    227   for (size_t i = 0; i < length; i++) {
    228     unibrow::uchar c1 = substring1[i];
    229     unibrow::uchar c2 = substring2[i];
    230     if (c1 != c2) {
    231       unibrow::uchar s1[1] = { c1 };
    232       canonicalize->get(c1, '\0', s1);
    233       if (s1[0] != c2) {
    234         unibrow::uchar s2[1] = { c2 };
    235         canonicalize->get(c2, '\0', s2);
    236         if (s1[0] != s2[0]) {
    237           return 0;
    238         }
    239       }
    240     }
    241   }
    242   return 1;
    243 }
    244 
    245 
    246 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
    247                                               Address* stack_base,
    248                                               Isolate* isolate) {
    249   ASSERT(isolate == Isolate::Current());
    250   RegExpStack* regexp_stack = isolate->regexp_stack();
    251   size_t size = regexp_stack->stack_capacity();
    252   Address old_stack_base = regexp_stack->stack_base();
    253   ASSERT(old_stack_base == *stack_base);
    254   ASSERT(stack_pointer <= old_stack_base);
    255   ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
    256   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
    257   if (new_stack_base == NULL) {
    258     return NULL;
    259   }
    260   *stack_base = new_stack_base;
    261   intptr_t stack_content_size = old_stack_base - stack_pointer;
    262   return new_stack_base - stack_content_size;
    263 }
    264 
    265 #endif  // V8_INTERPRETED_REGEXP
    266 
    267 } }  // namespace v8::internal
    268