1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 #include "ast.h" 30 #include "assembler.h" 31 #include "regexp-stack.h" 32 #include "regexp-macro-assembler.h" 33 #include "simulator.h" 34 35 namespace v8 { 36 namespace internal { 37 38 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone) 39 : slow_safe_compiler_(false), 40 global_mode_(NOT_GLOBAL), 41 zone_(zone) { 42 } 43 44 45 RegExpMacroAssembler::~RegExpMacroAssembler() { 46 } 47 48 49 bool RegExpMacroAssembler::CanReadUnaligned() { 50 #ifdef V8_HOST_CAN_READ_UNALIGNED 51 return true; 52 #else 53 return false; 54 #endif 55 } 56 57 58 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. 59 60 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone) 61 : RegExpMacroAssembler(zone) { 62 } 63 64 65 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { 66 } 67 68 69 bool NativeRegExpMacroAssembler::CanReadUnaligned() { 70 return FLAG_enable_unaligned_accesses && !slow_safe(); 71 } 72 73 const byte* NativeRegExpMacroAssembler::StringCharacterPosition( 74 String* subject, 75 int start_index) { 76 // Not just flat, but ultra flat. 77 ASSERT(subject->IsExternalString() || subject->IsSeqString()); 78 ASSERT(start_index >= 0); 79 ASSERT(start_index <= subject->length()); 80 if (subject->IsOneByteRepresentation()) { 81 const byte* address; 82 if (StringShape(subject).IsExternal()) { 83 const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars(); 84 address = reinterpret_cast<const byte*>(data); 85 } else { 86 ASSERT(subject->IsSeqOneByteString()); 87 const uint8_t* data = SeqOneByteString::cast(subject)->GetChars(); 88 address = reinterpret_cast<const byte*>(data); 89 } 90 return address + start_index; 91 } 92 const uc16* data; 93 if (StringShape(subject).IsExternal()) { 94 data = ExternalTwoByteString::cast(subject)->GetChars(); 95 } else { 96 ASSERT(subject->IsSeqTwoByteString()); 97 data = SeqTwoByteString::cast(subject)->GetChars(); 98 } 99 return reinterpret_cast<const byte*>(data + start_index); 100 } 101 102 103 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( 104 Handle<Code> regexp_code, 105 Handle<String> subject, 106 int* offsets_vector, 107 int offsets_vector_length, 108 int previous_index, 109 Isolate* isolate) { 110 111 ASSERT(subject->IsFlat()); 112 ASSERT(previous_index >= 0); 113 ASSERT(previous_index <= subject->length()); 114 115 // No allocations before calling the regexp, but we can't use 116 // DisallowHeapAllocation, since regexps might be preempted, and another 117 // thread might do allocation anyway. 118 119 String* subject_ptr = *subject; 120 // Character offsets into string. 121 int start_offset = previous_index; 122 int char_length = subject_ptr->length() - start_offset; 123 int slice_offset = 0; 124 125 // The string has been flattened, so if it is a cons string it contains the 126 // full string in the first part. 127 if (StringShape(subject_ptr).IsCons()) { 128 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length()); 129 subject_ptr = ConsString::cast(subject_ptr)->first(); 130 } else if (StringShape(subject_ptr).IsSliced()) { 131 SlicedString* slice = SlicedString::cast(subject_ptr); 132 subject_ptr = slice->parent(); 133 slice_offset = slice->offset(); 134 } 135 // Ensure that an underlying string has the same ASCII-ness. 136 bool is_ascii = subject_ptr->IsOneByteRepresentation(); 137 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString()); 138 // String is now either Sequential or External 139 int char_size_shift = is_ascii ? 0 : 1; 140 141 const byte* input_start = 142 StringCharacterPosition(subject_ptr, start_offset + slice_offset); 143 int byte_length = char_length << char_size_shift; 144 const byte* input_end = input_start + byte_length; 145 Result res = Execute(*regexp_code, 146 *subject, 147 start_offset, 148 input_start, 149 input_end, 150 offsets_vector, 151 offsets_vector_length, 152 isolate); 153 return res; 154 } 155 156 157 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute( 158 Code* code, 159 String* input, // This needs to be the unpacked (sliced, cons) string. 160 int start_offset, 161 const byte* input_start, 162 const byte* input_end, 163 int* output, 164 int output_size, 165 Isolate* isolate) { 166 // Ensure that the minimum stack has been allocated. 167 RegExpStackScope stack_scope(isolate); 168 Address stack_base = stack_scope.stack()->stack_base(); 169 170 int direct_call = 0; 171 int result = CALL_GENERATED_REGEXP_CODE(code->entry(), 172 input, 173 start_offset, 174 input_start, 175 input_end, 176 output, 177 output_size, 178 stack_base, 179 direct_call, 180 isolate); 181 ASSERT(result >= RETRY); 182 183 if (result == EXCEPTION && !isolate->has_pending_exception()) { 184 // We detected a stack overflow (on the backtrack stack) in RegExp code, 185 // but haven't created the exception yet. 186 isolate->StackOverflow(); 187 } 188 return static_cast<Result>(result); 189 } 190 191 192 const byte NativeRegExpMacroAssembler::word_character_map[] = { 193 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 195 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 196 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 197 198 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 199 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 200 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7' 201 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9' 202 203 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G' 204 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O' 205 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W' 206 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_' 207 208 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g' 209 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o' 210 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w' 211 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z' 212 // Latin-1 range 213 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 214 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 215 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 216 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 217 218 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 219 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 220 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 221 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 222 223 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 224 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 225 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 226 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 227 228 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 229 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 230 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 231 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 232 }; 233 234 235 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( 236 Address byte_offset1, 237 Address byte_offset2, 238 size_t byte_length, 239 Isolate* isolate) { 240 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = 241 isolate->regexp_macro_assembler_canonicalize(); 242 // This function is not allowed to cause a garbage collection. 243 // A GC might move the calling generated code and invalidate the 244 // return address on the stack. 245 ASSERT(byte_length % 2 == 0); 246 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); 247 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); 248 size_t length = byte_length >> 1; 249 250 for (size_t i = 0; i < length; i++) { 251 unibrow::uchar c1 = substring1[i]; 252 unibrow::uchar c2 = substring2[i]; 253 if (c1 != c2) { 254 unibrow::uchar s1[1] = { c1 }; 255 canonicalize->get(c1, '\0', s1); 256 if (s1[0] != c2) { 257 unibrow::uchar s2[1] = { c2 }; 258 canonicalize->get(c2, '\0', s2); 259 if (s1[0] != s2[0]) { 260 return 0; 261 } 262 } 263 } 264 } 265 return 1; 266 } 267 268 269 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, 270 Address* stack_base, 271 Isolate* isolate) { 272 RegExpStack* regexp_stack = isolate->regexp_stack(); 273 size_t size = regexp_stack->stack_capacity(); 274 Address old_stack_base = regexp_stack->stack_base(); 275 ASSERT(old_stack_base == *stack_base); 276 ASSERT(stack_pointer <= old_stack_base); 277 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size); 278 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); 279 if (new_stack_base == NULL) { 280 return NULL; 281 } 282 *stack_base = new_stack_base; 283 intptr_t stack_content_size = old_stack_base - stack_pointer; 284 return new_stack_base - stack_content_size; 285 } 286 287 #endif // V8_INTERPRETED_REGEXP 288 289 } } // namespace v8::internal 290