Home | History | Annotate | Download | only in src
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include "v8.h"
     29 #include "ast.h"
     30 #include "assembler.h"
     31 #include "regexp-stack.h"
     32 #include "regexp-macro-assembler.h"
     33 #include "simulator.h"
     34 
     35 namespace v8 {
     36 namespace internal {
     37 
     38 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
     39   : slow_safe_compiler_(false),
     40     global_mode_(NOT_GLOBAL),
     41     zone_(zone) {
     42 }
     43 
     44 
     45 RegExpMacroAssembler::~RegExpMacroAssembler() {
     46 }
     47 
     48 
     49 bool RegExpMacroAssembler::CanReadUnaligned() {
     50 #ifdef V8_HOST_CAN_READ_UNALIGNED
     51   return true;
     52 #else
     53   return false;
     54 #endif
     55 }
     56 
     57 
     58 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
     59 
     60 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
     61     : RegExpMacroAssembler(zone) {
     62 }
     63 
     64 
     65 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
     66 }
     67 
     68 
     69 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
     70   return FLAG_enable_unaligned_accesses && !slow_safe();
     71 }
     72 
     73 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
     74     String* subject,
     75     int start_index) {
     76   // Not just flat, but ultra flat.
     77   ASSERT(subject->IsExternalString() || subject->IsSeqString());
     78   ASSERT(start_index >= 0);
     79   ASSERT(start_index <= subject->length());
     80   if (subject->IsOneByteRepresentation()) {
     81     const byte* address;
     82     if (StringShape(subject).IsExternal()) {
     83       const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars();
     84       address = reinterpret_cast<const byte*>(data);
     85     } else {
     86       ASSERT(subject->IsSeqOneByteString());
     87       const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
     88       address = reinterpret_cast<const byte*>(data);
     89     }
     90     return address + start_index;
     91   }
     92   const uc16* data;
     93   if (StringShape(subject).IsExternal()) {
     94     data = ExternalTwoByteString::cast(subject)->GetChars();
     95   } else {
     96     ASSERT(subject->IsSeqTwoByteString());
     97     data = SeqTwoByteString::cast(subject)->GetChars();
     98   }
     99   return reinterpret_cast<const byte*>(data + start_index);
    100 }
    101 
    102 
    103 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
    104     Handle<Code> regexp_code,
    105     Handle<String> subject,
    106     int* offsets_vector,
    107     int offsets_vector_length,
    108     int previous_index,
    109     Isolate* isolate) {
    110 
    111   ASSERT(subject->IsFlat());
    112   ASSERT(previous_index >= 0);
    113   ASSERT(previous_index <= subject->length());
    114 
    115   // No allocations before calling the regexp, but we can't use
    116   // DisallowHeapAllocation, since regexps might be preempted, and another
    117   // thread might do allocation anyway.
    118 
    119   String* subject_ptr = *subject;
    120   // Character offsets into string.
    121   int start_offset = previous_index;
    122   int char_length = subject_ptr->length() - start_offset;
    123   int slice_offset = 0;
    124 
    125   // The string has been flattened, so if it is a cons string it contains the
    126   // full string in the first part.
    127   if (StringShape(subject_ptr).IsCons()) {
    128     ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
    129     subject_ptr = ConsString::cast(subject_ptr)->first();
    130   } else if (StringShape(subject_ptr).IsSliced()) {
    131     SlicedString* slice = SlicedString::cast(subject_ptr);
    132     subject_ptr = slice->parent();
    133     slice_offset = slice->offset();
    134   }
    135   // Ensure that an underlying string has the same ASCII-ness.
    136   bool is_ascii = subject_ptr->IsOneByteRepresentation();
    137   ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
    138   // String is now either Sequential or External
    139   int char_size_shift = is_ascii ? 0 : 1;
    140 
    141   const byte* input_start =
    142       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
    143   int byte_length = char_length << char_size_shift;
    144   const byte* input_end = input_start + byte_length;
    145   Result res = Execute(*regexp_code,
    146                        *subject,
    147                        start_offset,
    148                        input_start,
    149                        input_end,
    150                        offsets_vector,
    151                        offsets_vector_length,
    152                        isolate);
    153   return res;
    154 }
    155 
    156 
    157 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
    158     Code* code,
    159     String* input,  // This needs to be the unpacked (sliced, cons) string.
    160     int start_offset,
    161     const byte* input_start,
    162     const byte* input_end,
    163     int* output,
    164     int output_size,
    165     Isolate* isolate) {
    166   ASSERT(isolate == Isolate::Current());
    167   // Ensure that the minimum stack has been allocated.
    168   RegExpStackScope stack_scope(isolate);
    169   Address stack_base = stack_scope.stack()->stack_base();
    170 
    171   int direct_call = 0;
    172   int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
    173                                           input,
    174                                           start_offset,
    175                                           input_start,
    176                                           input_end,
    177                                           output,
    178                                           output_size,
    179                                           stack_base,
    180                                           direct_call,
    181                                           isolate);
    182   ASSERT(result >= RETRY);
    183 
    184   if (result == EXCEPTION && !isolate->has_pending_exception()) {
    185     // We detected a stack overflow (on the backtrack stack) in RegExp code,
    186     // but haven't created the exception yet.
    187     isolate->StackOverflow();
    188   }
    189   return static_cast<Result>(result);
    190 }
    191 
    192 
    193 const byte NativeRegExpMacroAssembler::word_character_map[] = {
    194     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    196     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    197     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    198 
    199     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    200     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    201     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
    202     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
    203 
    204     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
    205     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
    206     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
    207     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
    208 
    209     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
    210     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
    211     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
    212     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
    213     // Latin-1 range
    214     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    215     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    216     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    217     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    218 
    219     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    220     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    221     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    222     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    223 
    224     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    225     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    226     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    227     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    228 
    229     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    230     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    231     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    232     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    233 };
    234 
    235 
    236 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
    237     Address byte_offset1,
    238     Address byte_offset2,
    239     size_t byte_length,
    240     Isolate* isolate) {
    241   ASSERT(isolate == Isolate::Current());
    242   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
    243       isolate->regexp_macro_assembler_canonicalize();
    244   // This function is not allowed to cause a garbage collection.
    245   // A GC might move the calling generated code and invalidate the
    246   // return address on the stack.
    247   ASSERT(byte_length % 2 == 0);
    248   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
    249   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
    250   size_t length = byte_length >> 1;
    251 
    252   for (size_t i = 0; i < length; i++) {
    253     unibrow::uchar c1 = substring1[i];
    254     unibrow::uchar c2 = substring2[i];
    255     if (c1 != c2) {
    256       unibrow::uchar s1[1] = { c1 };
    257       canonicalize->get(c1, '\0', s1);
    258       if (s1[0] != c2) {
    259         unibrow::uchar s2[1] = { c2 };
    260         canonicalize->get(c2, '\0', s2);
    261         if (s1[0] != s2[0]) {
    262           return 0;
    263         }
    264       }
    265     }
    266   }
    267   return 1;
    268 }
    269 
    270 
    271 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
    272                                               Address* stack_base,
    273                                               Isolate* isolate) {
    274   ASSERT(isolate == Isolate::Current());
    275   RegExpStack* regexp_stack = isolate->regexp_stack();
    276   size_t size = regexp_stack->stack_capacity();
    277   Address old_stack_base = regexp_stack->stack_base();
    278   ASSERT(old_stack_base == *stack_base);
    279   ASSERT(stack_pointer <= old_stack_base);
    280   ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
    281   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
    282   if (new_stack_base == NULL) {
    283     return NULL;
    284   }
    285   *stack_base = new_stack_base;
    286   intptr_t stack_content_size = old_stack_base - stack_pointer;
    287   return new_stack_base - stack_content_size;
    288 }
    289 
    290 #endif  // V8_INTERPRETED_REGEXP
    291 
    292 } }  // namespace v8::internal
    293