1 // Copyright 2008-2009 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/v8.h" 6 7 #include "src/ast.h" 8 #include "src/bytecodes-irregexp.h" 9 #include "src/regexp-macro-assembler.h" 10 #include "src/regexp-macro-assembler-irregexp.h" 11 #include "src/regexp-macro-assembler-irregexp-inl.h" 12 13 14 namespace v8 { 15 namespace internal { 16 17 #ifdef V8_INTERPRETED_REGEXP 18 19 RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector<byte> buffer, 20 Zone* zone) 21 : RegExpMacroAssembler(zone), 22 buffer_(buffer), 23 pc_(0), 24 own_buffer_(false), 25 advance_current_end_(kInvalidPC), 26 isolate_(zone->isolate()) { } 27 28 29 RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() { 30 if (backtrack_.is_linked()) backtrack_.Unuse(); 31 if (own_buffer_) buffer_.Dispose(); 32 } 33 34 35 RegExpMacroAssemblerIrregexp::IrregexpImplementation 36 RegExpMacroAssemblerIrregexp::Implementation() { 37 return kBytecodeImplementation; 38 } 39 40 41 void RegExpMacroAssemblerIrregexp::Bind(Label* l) { 42 advance_current_end_ = kInvalidPC; 43 DCHECK(!l->is_bound()); 44 if (l->is_linked()) { 45 int pos = l->pos(); 46 while (pos != 0) { 47 int fixup = pos; 48 pos = *reinterpret_cast<int32_t*>(buffer_.start() + fixup); 49 *reinterpret_cast<uint32_t*>(buffer_.start() + fixup) = pc_; 50 } 51 } 52 l->bind_to(pc_); 53 } 54 55 56 void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { 57 if (l == NULL) l = &backtrack_; 58 if (l->is_bound()) { 59 Emit32(l->pos()); 60 } else { 61 int pos = 0; 62 if (l->is_linked()) { 63 pos = l->pos(); 64 } 65 l->link_to(pc_); 66 Emit32(pos); 67 } 68 } 69 70 71 void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) { 72 DCHECK(register_index >= 0); 73 DCHECK(register_index <= kMaxRegister); 74 Emit(BC_POP_REGISTER, register_index); 75 } 76 77 78 void RegExpMacroAssemblerIrregexp::PushRegister( 79 int register_index, 80 StackCheckFlag check_stack_limit) { 81 DCHECK(register_index >= 0); 82 DCHECK(register_index <= kMaxRegister); 83 Emit(BC_PUSH_REGISTER, register_index); 84 } 85 86 87 void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( 88 int register_index, int cp_offset) { 89 DCHECK(register_index >= 0); 90 DCHECK(register_index <= kMaxRegister); 91 Emit(BC_SET_REGISTER_TO_CP, register_index); 92 Emit32(cp_offset); // Current position offset. 93 } 94 95 96 void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) { 97 DCHECK(reg_from <= reg_to); 98 for (int reg = reg_from; reg <= reg_to; reg++) { 99 SetRegister(reg, -1); 100 } 101 } 102 103 104 void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister( 105 int register_index) { 106 DCHECK(register_index >= 0); 107 DCHECK(register_index <= kMaxRegister); 108 Emit(BC_SET_CP_TO_REGISTER, register_index); 109 } 110 111 112 void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister( 113 int register_index) { 114 DCHECK(register_index >= 0); 115 DCHECK(register_index <= kMaxRegister); 116 Emit(BC_SET_REGISTER_TO_SP, register_index); 117 } 118 119 120 void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister( 121 int register_index) { 122 DCHECK(register_index >= 0); 123 DCHECK(register_index <= kMaxRegister); 124 Emit(BC_SET_SP_TO_REGISTER, register_index); 125 } 126 127 128 void RegExpMacroAssemblerIrregexp::SetCurrentPositionFromEnd(int by) { 129 DCHECK(is_uint24(by)); 130 Emit(BC_SET_CURRENT_POSITION_FROM_END, by); 131 } 132 133 134 void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) { 135 DCHECK(register_index >= 0); 136 DCHECK(register_index <= kMaxRegister); 137 Emit(BC_SET_REGISTER, register_index); 138 Emit32(to); 139 } 140 141 142 void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) { 143 DCHECK(register_index >= 0); 144 DCHECK(register_index <= kMaxRegister); 145 Emit(BC_ADVANCE_REGISTER, register_index); 146 Emit32(by); 147 } 148 149 150 void RegExpMacroAssemblerIrregexp::PopCurrentPosition() { 151 Emit(BC_POP_CP, 0); 152 } 153 154 155 void RegExpMacroAssemblerIrregexp::PushCurrentPosition() { 156 Emit(BC_PUSH_CP, 0); 157 } 158 159 160 void RegExpMacroAssemblerIrregexp::Backtrack() { 161 Emit(BC_POP_BT, 0); 162 } 163 164 165 void RegExpMacroAssemblerIrregexp::GoTo(Label* l) { 166 if (advance_current_end_ == pc_) { 167 // Combine advance current and goto. 168 pc_ = advance_current_start_; 169 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_); 170 EmitOrLink(l); 171 advance_current_end_ = kInvalidPC; 172 } else { 173 // Regular goto. 174 Emit(BC_GOTO, 0); 175 EmitOrLink(l); 176 } 177 } 178 179 180 void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) { 181 Emit(BC_PUSH_BT, 0); 182 EmitOrLink(l); 183 } 184 185 186 bool RegExpMacroAssemblerIrregexp::Succeed() { 187 Emit(BC_SUCCEED, 0); 188 return false; // Restart matching for global regexp not supported. 189 } 190 191 192 void RegExpMacroAssemblerIrregexp::Fail() { 193 Emit(BC_FAIL, 0); 194 } 195 196 197 void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { 198 DCHECK(by >= kMinCPOffset); 199 DCHECK(by <= kMaxCPOffset); 200 advance_current_start_ = pc_; 201 advance_current_offset_ = by; 202 Emit(BC_ADVANCE_CP, by); 203 advance_current_end_ = pc_; 204 } 205 206 207 void RegExpMacroAssemblerIrregexp::CheckGreedyLoop( 208 Label* on_tos_equals_current_position) { 209 Emit(BC_CHECK_GREEDY, 0); 210 EmitOrLink(on_tos_equals_current_position); 211 } 212 213 214 void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, 215 Label* on_failure, 216 bool check_bounds, 217 int characters) { 218 DCHECK(cp_offset >= kMinCPOffset); 219 DCHECK(cp_offset <= kMaxCPOffset); 220 int bytecode; 221 if (check_bounds) { 222 if (characters == 4) { 223 bytecode = BC_LOAD_4_CURRENT_CHARS; 224 } else if (characters == 2) { 225 bytecode = BC_LOAD_2_CURRENT_CHARS; 226 } else { 227 DCHECK(characters == 1); 228 bytecode = BC_LOAD_CURRENT_CHAR; 229 } 230 } else { 231 if (characters == 4) { 232 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED; 233 } else if (characters == 2) { 234 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED; 235 } else { 236 DCHECK(characters == 1); 237 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; 238 } 239 } 240 Emit(bytecode, cp_offset); 241 if (check_bounds) EmitOrLink(on_failure); 242 } 243 244 245 void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, 246 Label* on_less) { 247 Emit(BC_CHECK_LT, limit); 248 EmitOrLink(on_less); 249 } 250 251 252 void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit, 253 Label* on_greater) { 254 Emit(BC_CHECK_GT, limit); 255 EmitOrLink(on_greater); 256 } 257 258 259 void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) { 260 if (c > MAX_FIRST_ARG) { 261 Emit(BC_CHECK_4_CHARS, 0); 262 Emit32(c); 263 } else { 264 Emit(BC_CHECK_CHAR, c); 265 } 266 EmitOrLink(on_equal); 267 } 268 269 270 void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) { 271 Emit(BC_CHECK_AT_START, 0); 272 EmitOrLink(on_at_start); 273 } 274 275 276 void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) { 277 Emit(BC_CHECK_NOT_AT_START, 0); 278 EmitOrLink(on_not_at_start); 279 } 280 281 282 void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c, 283 Label* on_not_equal) { 284 if (c > MAX_FIRST_ARG) { 285 Emit(BC_CHECK_NOT_4_CHARS, 0); 286 Emit32(c); 287 } else { 288 Emit(BC_CHECK_NOT_CHAR, c); 289 } 290 EmitOrLink(on_not_equal); 291 } 292 293 294 void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd( 295 uint32_t c, 296 uint32_t mask, 297 Label* on_equal) { 298 if (c > MAX_FIRST_ARG) { 299 Emit(BC_AND_CHECK_4_CHARS, 0); 300 Emit32(c); 301 } else { 302 Emit(BC_AND_CHECK_CHAR, c); 303 } 304 Emit32(mask); 305 EmitOrLink(on_equal); 306 } 307 308 309 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd( 310 uint32_t c, 311 uint32_t mask, 312 Label* on_not_equal) { 313 if (c > MAX_FIRST_ARG) { 314 Emit(BC_AND_CHECK_NOT_4_CHARS, 0); 315 Emit32(c); 316 } else { 317 Emit(BC_AND_CHECK_NOT_CHAR, c); 318 } 319 Emit32(mask); 320 EmitOrLink(on_not_equal); 321 } 322 323 324 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd( 325 uc16 c, 326 uc16 minus, 327 uc16 mask, 328 Label* on_not_equal) { 329 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); 330 Emit16(minus); 331 Emit16(mask); 332 EmitOrLink(on_not_equal); 333 } 334 335 336 void RegExpMacroAssemblerIrregexp::CheckCharacterInRange( 337 uc16 from, 338 uc16 to, 339 Label* on_in_range) { 340 Emit(BC_CHECK_CHAR_IN_RANGE, 0); 341 Emit16(from); 342 Emit16(to); 343 EmitOrLink(on_in_range); 344 } 345 346 347 void RegExpMacroAssemblerIrregexp::CheckCharacterNotInRange( 348 uc16 from, 349 uc16 to, 350 Label* on_not_in_range) { 351 Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0); 352 Emit16(from); 353 Emit16(to); 354 EmitOrLink(on_not_in_range); 355 } 356 357 358 void RegExpMacroAssemblerIrregexp::CheckBitInTable( 359 Handle<ByteArray> table, Label* on_bit_set) { 360 Emit(BC_CHECK_BIT_IN_TABLE, 0); 361 EmitOrLink(on_bit_set); 362 for (int i = 0; i < kTableSize; i += kBitsPerByte) { 363 int byte = 0; 364 for (int j = 0; j < kBitsPerByte; j++) { 365 if (table->get(i + j) != 0) byte |= 1 << j; 366 } 367 Emit8(byte); 368 } 369 } 370 371 372 void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, 373 Label* on_not_equal) { 374 DCHECK(start_reg >= 0); 375 DCHECK(start_reg <= kMaxRegister); 376 Emit(BC_CHECK_NOT_BACK_REF, start_reg); 377 EmitOrLink(on_not_equal); 378 } 379 380 381 void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( 382 int start_reg, 383 Label* on_not_equal) { 384 DCHECK(start_reg >= 0); 385 DCHECK(start_reg <= kMaxRegister); 386 Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); 387 EmitOrLink(on_not_equal); 388 } 389 390 391 void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index, 392 int comparand, 393 Label* on_less_than) { 394 DCHECK(register_index >= 0); 395 DCHECK(register_index <= kMaxRegister); 396 Emit(BC_CHECK_REGISTER_LT, register_index); 397 Emit32(comparand); 398 EmitOrLink(on_less_than); 399 } 400 401 402 void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, 403 int comparand, 404 Label* on_greater_or_equal) { 405 DCHECK(register_index >= 0); 406 DCHECK(register_index <= kMaxRegister); 407 Emit(BC_CHECK_REGISTER_GE, register_index); 408 Emit32(comparand); 409 EmitOrLink(on_greater_or_equal); 410 } 411 412 413 void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index, 414 Label* on_eq) { 415 DCHECK(register_index >= 0); 416 DCHECK(register_index <= kMaxRegister); 417 Emit(BC_CHECK_REGISTER_EQ_POS, register_index); 418 EmitOrLink(on_eq); 419 } 420 421 422 Handle<HeapObject> RegExpMacroAssemblerIrregexp::GetCode( 423 Handle<String> source) { 424 Bind(&backtrack_); 425 Emit(BC_POP_BT, 0); 426 Handle<ByteArray> array = isolate_->factory()->NewByteArray(length()); 427 Copy(array->GetDataStartAddress()); 428 return array; 429 } 430 431 432 int RegExpMacroAssemblerIrregexp::length() { 433 return pc_; 434 } 435 436 437 void RegExpMacroAssemblerIrregexp::Copy(Address a) { 438 MemCopy(a, buffer_.start(), length()); 439 } 440 441 442 void RegExpMacroAssemblerIrregexp::Expand() { 443 bool old_buffer_was_our_own = own_buffer_; 444 Vector<byte> old_buffer = buffer_; 445 buffer_ = Vector<byte>::New(old_buffer.length() * 2); 446 own_buffer_ = true; 447 MemCopy(buffer_.start(), old_buffer.start(), old_buffer.length()); 448 if (old_buffer_was_our_own) { 449 old_buffer.Dispose(); 450 } 451 } 452 453 #endif // V8_INTERPRETED_REGEXP 454 455 } } // namespace v8::internal 456