Home | History | Annotate | Download | only in src
      1 // Copyright 2008 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include "v8.h"
     29 #include "ast.h"
     30 #include "assembler.h"
     31 #include "regexp-stack.h"
     32 #include "regexp-macro-assembler.h"
     33 #include "simulator.h"
     34 
     35 namespace v8 {
     36 namespace internal {
     37 
     38 RegExpMacroAssembler::RegExpMacroAssembler() {
     39 }
     40 
     41 
     42 RegExpMacroAssembler::~RegExpMacroAssembler() {
     43 }
     44 
     45 
     46 bool RegExpMacroAssembler::CanReadUnaligned() {
     47 #ifdef V8_HOST_CAN_READ_UNALIGNED
     48   return true;
     49 #else
     50   return false;
     51 #endif
     52 }
     53 
     54 
     55 #ifdef V8_NATIVE_REGEXP  // Avoid unused code, e.g., on ARM.
     56 
     57 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
     58 }
     59 
     60 
     61 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
     62 }
     63 
     64 
     65 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
     66 #ifdef V8_TARGET_CAN_READ_UNALIGNED
     67   return true;
     68 #else
     69   return false;
     70 #endif
     71 }
     72 
     73 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
     74     String* subject,
     75     int start_index) {
     76   // Not just flat, but ultra flat.
     77   ASSERT(subject->IsExternalString() || subject->IsSeqString());
     78   ASSERT(start_index >= 0);
     79   ASSERT(start_index <= subject->length());
     80   if (subject->IsAsciiRepresentation()) {
     81     const byte* address;
     82     if (StringShape(subject).IsExternal()) {
     83       const char* data = ExternalAsciiString::cast(subject)->resource()->data();
     84       address = reinterpret_cast<const byte*>(data);
     85     } else {
     86       ASSERT(subject->IsSeqAsciiString());
     87       char* data = SeqAsciiString::cast(subject)->GetChars();
     88       address = reinterpret_cast<const byte*>(data);
     89     }
     90     return address + start_index;
     91   }
     92   const uc16* data;
     93   if (StringShape(subject).IsExternal()) {
     94     data = ExternalTwoByteString::cast(subject)->resource()->data();
     95   } else {
     96     ASSERT(subject->IsSeqTwoByteString());
     97     data = SeqTwoByteString::cast(subject)->GetChars();
     98   }
     99   return reinterpret_cast<const byte*>(data + start_index);
    100 }
    101 
    102 
    103 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
    104     Handle<Code> regexp_code,
    105     Handle<String> subject,
    106     int* offsets_vector,
    107     int offsets_vector_length,
    108     int previous_index) {
    109 
    110   ASSERT(subject->IsFlat());
    111   ASSERT(previous_index >= 0);
    112   ASSERT(previous_index <= subject->length());
    113 
    114   // No allocations before calling the regexp, but we can't use
    115   // AssertNoAllocation, since regexps might be preempted, and another thread
    116   // might do allocation anyway.
    117 
    118   String* subject_ptr = *subject;
    119   // Character offsets into string.
    120   int start_offset = previous_index;
    121   int end_offset = subject_ptr->length();
    122 
    123   bool is_ascii = subject->IsAsciiRepresentation();
    124 
    125   // The string has been flattened, so it it is a cons string it contains the
    126   // full string in the first part.
    127   if (StringShape(subject_ptr).IsCons()) {
    128     ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
    129     subject_ptr = ConsString::cast(subject_ptr)->first();
    130   }
    131   // Ensure that an underlying string has the same ascii-ness.
    132   ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
    133   ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
    134   // String is now either Sequential or External
    135   int char_size_shift = is_ascii ? 0 : 1;
    136   int char_length = end_offset - start_offset;
    137 
    138   const byte* input_start =
    139       StringCharacterPosition(subject_ptr, start_offset);
    140   int byte_length = char_length << char_size_shift;
    141   const byte* input_end = input_start + byte_length;
    142   Result res = Execute(*regexp_code,
    143                        subject_ptr,
    144                        start_offset,
    145                        input_start,
    146                        input_end,
    147                        offsets_vector);
    148   return res;
    149 }
    150 
    151 
    152 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
    153     Code* code,
    154     String* input,
    155     int start_offset,
    156     const byte* input_start,
    157     const byte* input_end,
    158     int* output) {
    159   typedef int (*matcher)(String*, int, const byte*,
    160                          const byte*, int*, Address, int);
    161   matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
    162 
    163   // Ensure that the minimum stack has been allocated.
    164   RegExpStack stack;
    165   Address stack_base = RegExpStack::stack_base();
    166 
    167   int direct_call = 0;
    168   int result = CALL_GENERATED_REGEXP_CODE(matcher_func,
    169                                           input,
    170                                           start_offset,
    171                                           input_start,
    172                                           input_end,
    173                                           output,
    174                                           stack_base,
    175                                           direct_call);
    176   ASSERT(result <= SUCCESS);
    177   ASSERT(result >= RETRY);
    178 
    179   if (result == EXCEPTION && !Top::has_pending_exception()) {
    180     // We detected a stack overflow (on the backtrack stack) in RegExp code,
    181     // but haven't created the exception yet.
    182     Top::StackOverflow();
    183   }
    184   return static_cast<Result>(result);
    185 }
    186 
    187 
    188 static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
    189 
    190 
    191 byte NativeRegExpMacroAssembler::word_character_map[] = {
    192     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    193     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    194     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    196 
    197     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    198     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    199     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
    200     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
    201 
    202     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
    203     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
    204     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
    205     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
    206 
    207     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
    208     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
    209     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
    210     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
    211 };
    212 
    213 
    214 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
    215     Address byte_offset1,
    216     Address byte_offset2,
    217     size_t byte_length) {
    218   // This function is not allowed to cause a garbage collection.
    219   // A GC might move the calling generated code and invalidate the
    220   // return address on the stack.
    221   ASSERT(byte_length % 2 == 0);
    222   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
    223   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
    224   size_t length = byte_length >> 1;
    225 
    226   for (size_t i = 0; i < length; i++) {
    227     unibrow::uchar c1 = substring1[i];
    228     unibrow::uchar c2 = substring2[i];
    229     if (c1 != c2) {
    230       unibrow::uchar s1[1] = { c1 };
    231       canonicalize.get(c1, '\0', s1);
    232       if (s1[0] != c2) {
    233         unibrow::uchar s2[1] = { c2 };
    234         canonicalize.get(c2, '\0', s2);
    235         if (s1[0] != s2[0]) {
    236           return 0;
    237         }
    238       }
    239     }
    240   }
    241   return 1;
    242 }
    243 
    244 
    245 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
    246                                               Address* stack_base) {
    247   size_t size = RegExpStack::stack_capacity();
    248   Address old_stack_base = RegExpStack::stack_base();
    249   ASSERT(old_stack_base == *stack_base);
    250   ASSERT(stack_pointer <= old_stack_base);
    251   ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
    252   Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
    253   if (new_stack_base == NULL) {
    254     return NULL;
    255   }
    256   *stack_base = new_stack_base;
    257   intptr_t stack_content_size = old_stack_base - stack_pointer;
    258   return new_stack_base - stack_content_size;
    259 }
    260 
    261 #endif  // V8_NATIVE_REGEXP
    262 } }  // namespace v8::internal
    263