Home | History | Annotate | Download | only in src
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "src/v8.h"
      6 
      7 #include "src/assembler.h"
      8 #include "src/ast.h"
      9 #include "src/regexp-macro-assembler.h"
     10 #include "src/regexp-stack.h"
     11 #include "src/simulator.h"
     12 
     13 namespace v8 {
     14 namespace internal {
     15 
     16 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
     17   : slow_safe_compiler_(false),
     18     global_mode_(NOT_GLOBAL),
     19     zone_(zone) {
     20 }
     21 
     22 
     23 RegExpMacroAssembler::~RegExpMacroAssembler() {
     24 }
     25 
     26 
     27 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
     28 
     29 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
     30     : RegExpMacroAssembler(zone) {
     31 }
     32 
     33 
     34 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
     35 }
     36 
     37 
     38 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
     39   return FLAG_enable_unaligned_accesses && !slow_safe();
     40 }
     41 
     42 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
     43     String* subject,
     44     int start_index) {
     45   // Not just flat, but ultra flat.
     46   DCHECK(subject->IsExternalString() || subject->IsSeqString());
     47   DCHECK(start_index >= 0);
     48   DCHECK(start_index <= subject->length());
     49   if (subject->IsOneByteRepresentation()) {
     50     const byte* address;
     51     if (StringShape(subject).IsExternal()) {
     52       const uint8_t* data = ExternalOneByteString::cast(subject)->GetChars();
     53       address = reinterpret_cast<const byte*>(data);
     54     } else {
     55       DCHECK(subject->IsSeqOneByteString());
     56       const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
     57       address = reinterpret_cast<const byte*>(data);
     58     }
     59     return address + start_index;
     60   }
     61   const uc16* data;
     62   if (StringShape(subject).IsExternal()) {
     63     data = ExternalTwoByteString::cast(subject)->GetChars();
     64   } else {
     65     DCHECK(subject->IsSeqTwoByteString());
     66     data = SeqTwoByteString::cast(subject)->GetChars();
     67   }
     68   return reinterpret_cast<const byte*>(data + start_index);
     69 }
     70 
     71 
     72 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
     73     Handle<Code> regexp_code,
     74     Handle<String> subject,
     75     int* offsets_vector,
     76     int offsets_vector_length,
     77     int previous_index,
     78     Isolate* isolate) {
     79 
     80   DCHECK(subject->IsFlat());
     81   DCHECK(previous_index >= 0);
     82   DCHECK(previous_index <= subject->length());
     83 
     84   // No allocations before calling the regexp, but we can't use
     85   // DisallowHeapAllocation, since regexps might be preempted, and another
     86   // thread might do allocation anyway.
     87 
     88   String* subject_ptr = *subject;
     89   // Character offsets into string.
     90   int start_offset = previous_index;
     91   int char_length = subject_ptr->length() - start_offset;
     92   int slice_offset = 0;
     93 
     94   // The string has been flattened, so if it is a cons string it contains the
     95   // full string in the first part.
     96   if (StringShape(subject_ptr).IsCons()) {
     97     DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
     98     subject_ptr = ConsString::cast(subject_ptr)->first();
     99   } else if (StringShape(subject_ptr).IsSliced()) {
    100     SlicedString* slice = SlicedString::cast(subject_ptr);
    101     subject_ptr = slice->parent();
    102     slice_offset = slice->offset();
    103   }
    104   // Ensure that an underlying string has the same representation.
    105   bool is_one_byte = subject_ptr->IsOneByteRepresentation();
    106   DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
    107   // String is now either Sequential or External
    108   int char_size_shift = is_one_byte ? 0 : 1;
    109 
    110   const byte* input_start =
    111       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
    112   int byte_length = char_length << char_size_shift;
    113   const byte* input_end = input_start + byte_length;
    114   Result res = Execute(*regexp_code,
    115                        *subject,
    116                        start_offset,
    117                        input_start,
    118                        input_end,
    119                        offsets_vector,
    120                        offsets_vector_length,
    121                        isolate);
    122   return res;
    123 }
    124 
    125 
    126 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
    127     Code* code,
    128     String* input,  // This needs to be the unpacked (sliced, cons) string.
    129     int start_offset,
    130     const byte* input_start,
    131     const byte* input_end,
    132     int* output,
    133     int output_size,
    134     Isolate* isolate) {
    135   // Ensure that the minimum stack has been allocated.
    136   RegExpStackScope stack_scope(isolate);
    137   Address stack_base = stack_scope.stack()->stack_base();
    138 
    139   int direct_call = 0;
    140   int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
    141                                           input,
    142                                           start_offset,
    143                                           input_start,
    144                                           input_end,
    145                                           output,
    146                                           output_size,
    147                                           stack_base,
    148                                           direct_call,
    149                                           isolate);
    150   DCHECK(result >= RETRY);
    151 
    152   if (result == EXCEPTION && !isolate->has_pending_exception()) {
    153     // We detected a stack overflow (on the backtrack stack) in RegExp code,
    154     // but haven't created the exception yet.
    155     isolate->StackOverflow();
    156   }
    157   return static_cast<Result>(result);
    158 }
    159 
    160 
    161 const byte NativeRegExpMacroAssembler::word_character_map[] = {
    162     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    163     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    164     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    165     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    166 
    167     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    168     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    169     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
    170     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
    171 
    172     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
    173     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
    174     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
    175     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
    176 
    177     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
    178     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
    179     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
    180     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
    181     // Latin-1 range
    182     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    183     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    184     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    185     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    186 
    187     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    188     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    189     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    190     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    191 
    192     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    193     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    194     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    196 
    197     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    198     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    199     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    200     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    201 };
    202 
    203 
    204 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
    205     Address byte_offset1,
    206     Address byte_offset2,
    207     size_t byte_length,
    208     Isolate* isolate) {
    209   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
    210       isolate->regexp_macro_assembler_canonicalize();
    211   // This function is not allowed to cause a garbage collection.
    212   // A GC might move the calling generated code and invalidate the
    213   // return address on the stack.
    214   DCHECK(byte_length % 2 == 0);
    215   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
    216   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
    217   size_t length = byte_length >> 1;
    218 
    219   for (size_t i = 0; i < length; i++) {
    220     unibrow::uchar c1 = substring1[i];
    221     unibrow::uchar c2 = substring2[i];
    222     if (c1 != c2) {
    223       unibrow::uchar s1[1] = { c1 };
    224       canonicalize->get(c1, '\0', s1);
    225       if (s1[0] != c2) {
    226         unibrow::uchar s2[1] = { c2 };
    227         canonicalize->get(c2, '\0', s2);
    228         if (s1[0] != s2[0]) {
    229           return 0;
    230         }
    231       }
    232     }
    233   }
    234   return 1;
    235 }
    236 
    237 
    238 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
    239                                               Address* stack_base,
    240                                               Isolate* isolate) {
    241   RegExpStack* regexp_stack = isolate->regexp_stack();
    242   size_t size = regexp_stack->stack_capacity();
    243   Address old_stack_base = regexp_stack->stack_base();
    244   DCHECK(old_stack_base == *stack_base);
    245   DCHECK(stack_pointer <= old_stack_base);
    246   DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
    247   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
    248   if (new_stack_base == NULL) {
    249     return NULL;
    250   }
    251   *stack_base = new_stack_base;
    252   intptr_t stack_content_size = old_stack_base - stack_pointer;
    253   return new_stack_base - stack_content_size;
    254 }
    255 
    256 #endif  // V8_INTERPRETED_REGEXP
    257 
    258 } }  // namespace v8::internal
    259