Home | History | Annotate | Download | only in src
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include "v8.h"
     29 #include "ast.h"
     30 #include "assembler.h"
     31 #include "regexp-stack.h"
     32 #include "regexp-macro-assembler.h"
     33 #include "simulator.h"
     34 
     35 namespace v8 {
     36 namespace internal {
     37 
     38 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
     39   : slow_safe_compiler_(false),
     40     global_mode_(NOT_GLOBAL),
     41     zone_(zone) {
     42 }
     43 
     44 
     45 RegExpMacroAssembler::~RegExpMacroAssembler() {
     46 }
     47 
     48 
     49 bool RegExpMacroAssembler::CanReadUnaligned() {
     50 #ifdef V8_HOST_CAN_READ_UNALIGNED
     51   return true;
     52 #else
     53   return false;
     54 #endif
     55 }
     56 
     57 
     58 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
     59 
     60 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
     61     : RegExpMacroAssembler(zone) {
     62 }
     63 
     64 
     65 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
     66 }
     67 
     68 
     69 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
     70   return FLAG_enable_unaligned_accesses && !slow_safe();
     71 }
     72 
     73 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
     74     String* subject,
     75     int start_index) {
     76   // Not just flat, but ultra flat.
     77   ASSERT(subject->IsExternalString() || subject->IsSeqString());
     78   ASSERT(start_index >= 0);
     79   ASSERT(start_index <= subject->length());
     80   if (subject->IsOneByteRepresentation()) {
     81     const byte* address;
     82     if (StringShape(subject).IsExternal()) {
     83       const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars();
     84       address = reinterpret_cast<const byte*>(data);
     85     } else {
     86       ASSERT(subject->IsSeqOneByteString());
     87       const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
     88       address = reinterpret_cast<const byte*>(data);
     89     }
     90     return address + start_index;
     91   }
     92   const uc16* data;
     93   if (StringShape(subject).IsExternal()) {
     94     data = ExternalTwoByteString::cast(subject)->GetChars();
     95   } else {
     96     ASSERT(subject->IsSeqTwoByteString());
     97     data = SeqTwoByteString::cast(subject)->GetChars();
     98   }
     99   return reinterpret_cast<const byte*>(data + start_index);
    100 }
    101 
    102 
    103 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
    104     Handle<Code> regexp_code,
    105     Handle<String> subject,
    106     int* offsets_vector,
    107     int offsets_vector_length,
    108     int previous_index,
    109     Isolate* isolate) {
    110 
    111   ASSERT(subject->IsFlat());
    112   ASSERT(previous_index >= 0);
    113   ASSERT(previous_index <= subject->length());
    114 
    115   // No allocations before calling the regexp, but we can't use
    116   // DisallowHeapAllocation, since regexps might be preempted, and another
    117   // thread might do allocation anyway.
    118 
    119   String* subject_ptr = *subject;
    120   // Character offsets into string.
    121   int start_offset = previous_index;
    122   int char_length = subject_ptr->length() - start_offset;
    123   int slice_offset = 0;
    124 
    125   // The string has been flattened, so if it is a cons string it contains the
    126   // full string in the first part.
    127   if (StringShape(subject_ptr).IsCons()) {
    128     ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
    129     subject_ptr = ConsString::cast(subject_ptr)->first();
    130   } else if (StringShape(subject_ptr).IsSliced()) {
    131     SlicedString* slice = SlicedString::cast(subject_ptr);
    132     subject_ptr = slice->parent();
    133     slice_offset = slice->offset();
    134   }
    135   // Ensure that an underlying string has the same ASCII-ness.
    136   bool is_ascii = subject_ptr->IsOneByteRepresentation();
    137   ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
    138   // String is now either Sequential or External
    139   int char_size_shift = is_ascii ? 0 : 1;
    140 
    141   const byte* input_start =
    142       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
    143   int byte_length = char_length << char_size_shift;
    144   const byte* input_end = input_start + byte_length;
    145   Result res = Execute(*regexp_code,
    146                        *subject,
    147                        start_offset,
    148                        input_start,
    149                        input_end,
    150                        offsets_vector,
    151                        offsets_vector_length,
    152                        isolate);
    153   return res;
    154 }
    155 
    156 
    157 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
    158     Code* code,
    159     String* input,  // This needs to be the unpacked (sliced, cons) string.
    160     int start_offset,
    161     const byte* input_start,
    162     const byte* input_end,
    163     int* output,
    164     int output_size,
    165     Isolate* isolate) {
    166   // Ensure that the minimum stack has been allocated.
    167   RegExpStackScope stack_scope(isolate);
    168   Address stack_base = stack_scope.stack()->stack_base();
    169 
    170   int direct_call = 0;
    171   int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
    172                                           input,
    173                                           start_offset,
    174                                           input_start,
    175                                           input_end,
    176                                           output,
    177                                           output_size,
    178                                           stack_base,
    179                                           direct_call,
    180                                           isolate);
    181   ASSERT(result >= RETRY);
    182 
    183   if (result == EXCEPTION && !isolate->has_pending_exception()) {
    184     // We detected a stack overflow (on the backtrack stack) in RegExp code,
    185     // but haven't created the exception yet.
    186     isolate->StackOverflow();
    187   }
    188   return static_cast<Result>(result);
    189 }
    190 
    191 
    192 const byte NativeRegExpMacroAssembler::word_character_map[] = {
    193     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    194     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    196     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    197 
    198     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    199     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    200     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
    201     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
    202 
    203     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
    204     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
    205     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
    206     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
    207 
    208     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
    209     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
    210     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
    211     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
    212     // Latin-1 range
    213     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    214     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    215     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    216     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    217 
    218     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    219     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    220     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    221     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    222 
    223     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    224     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    225     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    226     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    227 
    228     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    229     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    230     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    231     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
    232 };
    233 
    234 
    235 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
    236     Address byte_offset1,
    237     Address byte_offset2,
    238     size_t byte_length,
    239     Isolate* isolate) {
    240   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
    241       isolate->regexp_macro_assembler_canonicalize();
    242   // This function is not allowed to cause a garbage collection.
    243   // A GC might move the calling generated code and invalidate the
    244   // return address on the stack.
    245   ASSERT(byte_length % 2 == 0);
    246   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
    247   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
    248   size_t length = byte_length >> 1;
    249 
    250   for (size_t i = 0; i < length; i++) {
    251     unibrow::uchar c1 = substring1[i];
    252     unibrow::uchar c2 = substring2[i];
    253     if (c1 != c2) {
    254       unibrow::uchar s1[1] = { c1 };
    255       canonicalize->get(c1, '\0', s1);
    256       if (s1[0] != c2) {
    257         unibrow::uchar s2[1] = { c2 };
    258         canonicalize->get(c2, '\0', s2);
    259         if (s1[0] != s2[0]) {
    260           return 0;
    261         }
    262       }
    263     }
    264   }
    265   return 1;
    266 }
    267 
    268 
    269 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
    270                                               Address* stack_base,
    271                                               Isolate* isolate) {
    272   RegExpStack* regexp_stack = isolate->regexp_stack();
    273   size_t size = regexp_stack->stack_capacity();
    274   Address old_stack_base = regexp_stack->stack_base();
    275   ASSERT(old_stack_base == *stack_base);
    276   ASSERT(stack_pointer <= old_stack_base);
    277   ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
    278   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
    279   if (new_stack_base == NULL) {
    280     return NULL;
    281   }
    282   *stack_base = new_stack_base;
    283   intptr_t stack_content_size = old_stack_base - stack_pointer;
    284   return new_stack_base - stack_content_size;
    285 }
    286 
    287 #endif  // V8_INTERPRETED_REGEXP
    288 
    289 } }  // namespace v8::internal
    290