1 // Copyright 2008-2009 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/regexp/regexp-macro-assembler-irregexp.h" 6 7 #include "src/ast/ast.h" 8 #include "src/regexp/bytecodes-irregexp.h" 9 #include "src/regexp/regexp-macro-assembler.h" 10 #include "src/regexp/regexp-macro-assembler-irregexp-inl.h" 11 12 13 namespace v8 { 14 namespace internal { 15 16 #ifdef V8_INTERPRETED_REGEXP 17 18 RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Isolate* isolate, 19 Vector<byte> buffer, 20 Zone* zone) 21 : RegExpMacroAssembler(isolate, zone), 22 buffer_(buffer), 23 pc_(0), 24 own_buffer_(false), 25 advance_current_end_(kInvalidPC), 26 isolate_(isolate) {} 27 28 29 RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() { 30 if (backtrack_.is_linked()) backtrack_.Unuse(); 31 if (own_buffer_) buffer_.Dispose(); 32 } 33 34 35 RegExpMacroAssemblerIrregexp::IrregexpImplementation 36 RegExpMacroAssemblerIrregexp::Implementation() { 37 return kBytecodeImplementation; 38 } 39 40 41 void RegExpMacroAssemblerIrregexp::Bind(Label* l) { 42 advance_current_end_ = kInvalidPC; 43 DCHECK(!l->is_bound()); 44 if (l->is_linked()) { 45 int pos = l->pos(); 46 while (pos != 0) { 47 int fixup = pos; 48 pos = *reinterpret_cast<int32_t*>(buffer_.start() + fixup); 49 *reinterpret_cast<uint32_t*>(buffer_.start() + fixup) = pc_; 50 } 51 } 52 l->bind_to(pc_); 53 } 54 55 56 void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { 57 if (l == NULL) l = &backtrack_; 58 if (l->is_bound()) { 59 Emit32(l->pos()); 60 } else { 61 int pos = 0; 62 if (l->is_linked()) { 63 pos = l->pos(); 64 } 65 l->link_to(pc_); 66 Emit32(pos); 67 } 68 } 69 70 71 void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) { 72 DCHECK(register_index >= 0); 73 DCHECK(register_index <= kMaxRegister); 74 Emit(BC_POP_REGISTER, register_index); 75 } 76 77 78 void RegExpMacroAssemblerIrregexp::PushRegister( 79 int register_index, 80 StackCheckFlag check_stack_limit) { 81 DCHECK(register_index >= 0); 82 DCHECK(register_index <= kMaxRegister); 83 Emit(BC_PUSH_REGISTER, register_index); 84 } 85 86 87 void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( 88 int register_index, int cp_offset) { 89 DCHECK(register_index >= 0); 90 DCHECK(register_index <= kMaxRegister); 91 Emit(BC_SET_REGISTER_TO_CP, register_index); 92 Emit32(cp_offset); // Current position offset. 93 } 94 95 96 void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) { 97 DCHECK(reg_from <= reg_to); 98 for (int reg = reg_from; reg <= reg_to; reg++) { 99 SetRegister(reg, -1); 100 } 101 } 102 103 104 void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister( 105 int register_index) { 106 DCHECK(register_index >= 0); 107 DCHECK(register_index <= kMaxRegister); 108 Emit(BC_SET_CP_TO_REGISTER, register_index); 109 } 110 111 112 void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister( 113 int register_index) { 114 DCHECK(register_index >= 0); 115 DCHECK(register_index <= kMaxRegister); 116 Emit(BC_SET_REGISTER_TO_SP, register_index); 117 } 118 119 120 void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister( 121 int register_index) { 122 DCHECK(register_index >= 0); 123 DCHECK(register_index <= kMaxRegister); 124 Emit(BC_SET_SP_TO_REGISTER, register_index); 125 } 126 127 128 void RegExpMacroAssemblerIrregexp::SetCurrentPositionFromEnd(int by) { 129 DCHECK(is_uint24(by)); 130 Emit(BC_SET_CURRENT_POSITION_FROM_END, by); 131 } 132 133 134 void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) { 135 DCHECK(register_index >= 0); 136 DCHECK(register_index <= kMaxRegister); 137 Emit(BC_SET_REGISTER, register_index); 138 Emit32(to); 139 } 140 141 142 void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) { 143 DCHECK(register_index >= 0); 144 DCHECK(register_index <= kMaxRegister); 145 Emit(BC_ADVANCE_REGISTER, register_index); 146 Emit32(by); 147 } 148 149 150 void RegExpMacroAssemblerIrregexp::PopCurrentPosition() { 151 Emit(BC_POP_CP, 0); 152 } 153 154 155 void RegExpMacroAssemblerIrregexp::PushCurrentPosition() { 156 Emit(BC_PUSH_CP, 0); 157 } 158 159 160 void RegExpMacroAssemblerIrregexp::Backtrack() { 161 Emit(BC_POP_BT, 0); 162 } 163 164 165 void RegExpMacroAssemblerIrregexp::GoTo(Label* l) { 166 if (advance_current_end_ == pc_) { 167 // Combine advance current and goto. 168 pc_ = advance_current_start_; 169 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_); 170 EmitOrLink(l); 171 advance_current_end_ = kInvalidPC; 172 } else { 173 // Regular goto. 174 Emit(BC_GOTO, 0); 175 EmitOrLink(l); 176 } 177 } 178 179 180 void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) { 181 Emit(BC_PUSH_BT, 0); 182 EmitOrLink(l); 183 } 184 185 186 bool RegExpMacroAssemblerIrregexp::Succeed() { 187 Emit(BC_SUCCEED, 0); 188 return false; // Restart matching for global regexp not supported. 189 } 190 191 192 void RegExpMacroAssemblerIrregexp::Fail() { 193 Emit(BC_FAIL, 0); 194 } 195 196 197 void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { 198 DCHECK(by >= kMinCPOffset); 199 DCHECK(by <= kMaxCPOffset); 200 advance_current_start_ = pc_; 201 advance_current_offset_ = by; 202 Emit(BC_ADVANCE_CP, by); 203 advance_current_end_ = pc_; 204 } 205 206 207 void RegExpMacroAssemblerIrregexp::CheckGreedyLoop( 208 Label* on_tos_equals_current_position) { 209 Emit(BC_CHECK_GREEDY, 0); 210 EmitOrLink(on_tos_equals_current_position); 211 } 212 213 214 void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, 215 Label* on_failure, 216 bool check_bounds, 217 int characters) { 218 DCHECK(cp_offset >= kMinCPOffset); 219 DCHECK(cp_offset <= kMaxCPOffset); 220 int bytecode; 221 if (check_bounds) { 222 if (characters == 4) { 223 bytecode = BC_LOAD_4_CURRENT_CHARS; 224 } else if (characters == 2) { 225 bytecode = BC_LOAD_2_CURRENT_CHARS; 226 } else { 227 DCHECK(characters == 1); 228 bytecode = BC_LOAD_CURRENT_CHAR; 229 } 230 } else { 231 if (characters == 4) { 232 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED; 233 } else if (characters == 2) { 234 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED; 235 } else { 236 DCHECK(characters == 1); 237 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; 238 } 239 } 240 Emit(bytecode, cp_offset); 241 if (check_bounds) EmitOrLink(on_failure); 242 } 243 244 245 void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, 246 Label* on_less) { 247 Emit(BC_CHECK_LT, limit); 248 EmitOrLink(on_less); 249 } 250 251 252 void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit, 253 Label* on_greater) { 254 Emit(BC_CHECK_GT, limit); 255 EmitOrLink(on_greater); 256 } 257 258 259 void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) { 260 if (c > MAX_FIRST_ARG) { 261 Emit(BC_CHECK_4_CHARS, 0); 262 Emit32(c); 263 } else { 264 Emit(BC_CHECK_CHAR, c); 265 } 266 EmitOrLink(on_equal); 267 } 268 269 270 void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) { 271 Emit(BC_CHECK_AT_START, 0); 272 EmitOrLink(on_at_start); 273 } 274 275 276 void RegExpMacroAssemblerIrregexp::CheckNotAtStart(int cp_offset, 277 Label* on_not_at_start) { 278 Emit(BC_CHECK_NOT_AT_START, cp_offset); 279 EmitOrLink(on_not_at_start); 280 } 281 282 283 void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c, 284 Label* on_not_equal) { 285 if (c > MAX_FIRST_ARG) { 286 Emit(BC_CHECK_NOT_4_CHARS, 0); 287 Emit32(c); 288 } else { 289 Emit(BC_CHECK_NOT_CHAR, c); 290 } 291 EmitOrLink(on_not_equal); 292 } 293 294 295 void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd( 296 uint32_t c, 297 uint32_t mask, 298 Label* on_equal) { 299 if (c > MAX_FIRST_ARG) { 300 Emit(BC_AND_CHECK_4_CHARS, 0); 301 Emit32(c); 302 } else { 303 Emit(BC_AND_CHECK_CHAR, c); 304 } 305 Emit32(mask); 306 EmitOrLink(on_equal); 307 } 308 309 310 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd( 311 uint32_t c, 312 uint32_t mask, 313 Label* on_not_equal) { 314 if (c > MAX_FIRST_ARG) { 315 Emit(BC_AND_CHECK_NOT_4_CHARS, 0); 316 Emit32(c); 317 } else { 318 Emit(BC_AND_CHECK_NOT_CHAR, c); 319 } 320 Emit32(mask); 321 EmitOrLink(on_not_equal); 322 } 323 324 325 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd( 326 uc16 c, 327 uc16 minus, 328 uc16 mask, 329 Label* on_not_equal) { 330 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); 331 Emit16(minus); 332 Emit16(mask); 333 EmitOrLink(on_not_equal); 334 } 335 336 337 void RegExpMacroAssemblerIrregexp::CheckCharacterInRange( 338 uc16 from, 339 uc16 to, 340 Label* on_in_range) { 341 Emit(BC_CHECK_CHAR_IN_RANGE, 0); 342 Emit16(from); 343 Emit16(to); 344 EmitOrLink(on_in_range); 345 } 346 347 348 void RegExpMacroAssemblerIrregexp::CheckCharacterNotInRange( 349 uc16 from, 350 uc16 to, 351 Label* on_not_in_range) { 352 Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0); 353 Emit16(from); 354 Emit16(to); 355 EmitOrLink(on_not_in_range); 356 } 357 358 359 void RegExpMacroAssemblerIrregexp::CheckBitInTable( 360 Handle<ByteArray> table, Label* on_bit_set) { 361 Emit(BC_CHECK_BIT_IN_TABLE, 0); 362 EmitOrLink(on_bit_set); 363 for (int i = 0; i < kTableSize; i += kBitsPerByte) { 364 int byte = 0; 365 for (int j = 0; j < kBitsPerByte; j++) { 366 if (table->get(i + j) != 0) byte |= 1 << j; 367 } 368 Emit8(byte); 369 } 370 } 371 372 373 void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, 374 bool read_backward, 375 Label* on_not_equal) { 376 DCHECK(start_reg >= 0); 377 DCHECK(start_reg <= kMaxRegister); 378 Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF, 379 start_reg); 380 EmitOrLink(on_not_equal); 381 } 382 383 384 void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( 385 int start_reg, bool read_backward, Label* on_not_equal) { 386 DCHECK(start_reg >= 0); 387 DCHECK(start_reg <= kMaxRegister); 388 Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD 389 : BC_CHECK_NOT_BACK_REF_NO_CASE, 390 start_reg); 391 EmitOrLink(on_not_equal); 392 } 393 394 395 void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index, 396 int comparand, 397 Label* on_less_than) { 398 DCHECK(register_index >= 0); 399 DCHECK(register_index <= kMaxRegister); 400 Emit(BC_CHECK_REGISTER_LT, register_index); 401 Emit32(comparand); 402 EmitOrLink(on_less_than); 403 } 404 405 406 void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, 407 int comparand, 408 Label* on_greater_or_equal) { 409 DCHECK(register_index >= 0); 410 DCHECK(register_index <= kMaxRegister); 411 Emit(BC_CHECK_REGISTER_GE, register_index); 412 Emit32(comparand); 413 EmitOrLink(on_greater_or_equal); 414 } 415 416 417 void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index, 418 Label* on_eq) { 419 DCHECK(register_index >= 0); 420 DCHECK(register_index <= kMaxRegister); 421 Emit(BC_CHECK_REGISTER_EQ_POS, register_index); 422 EmitOrLink(on_eq); 423 } 424 425 426 Handle<HeapObject> RegExpMacroAssemblerIrregexp::GetCode( 427 Handle<String> source) { 428 Bind(&backtrack_); 429 Emit(BC_POP_BT, 0); 430 Handle<ByteArray> array = isolate_->factory()->NewByteArray(length()); 431 Copy(array->GetDataStartAddress()); 432 return array; 433 } 434 435 436 int RegExpMacroAssemblerIrregexp::length() { 437 return pc_; 438 } 439 440 441 void RegExpMacroAssemblerIrregexp::Copy(Address a) { 442 MemCopy(a, buffer_.start(), length()); 443 } 444 445 446 void RegExpMacroAssemblerIrregexp::Expand() { 447 bool old_buffer_was_our_own = own_buffer_; 448 Vector<byte> old_buffer = buffer_; 449 buffer_ = Vector<byte>::New(old_buffer.length() * 2); 450 own_buffer_ = true; 451 MemCopy(buffer_.start(), old_buffer.start(), old_buffer.length()); 452 if (old_buffer_was_our_own) { 453 old_buffer.Dispose(); 454 } 455 } 456 457 #endif // V8_INTERPRETED_REGEXP 458 459 } // namespace internal 460 } // namespace v8 461