1 // Copyright 2008-2009 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 #include "ast.h" 30 #include "bytecodes-irregexp.h" 31 #include "regexp-macro-assembler.h" 32 #include "regexp-macro-assembler-irregexp.h" 33 #include "regexp-macro-assembler-irregexp-inl.h" 34 35 36 namespace v8 { 37 namespace internal { 38 39 #ifdef V8_INTERPRETED_REGEXP 40 41 RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector<byte> buffer, 42 Zone* zone) 43 : RegExpMacroAssembler(zone), 44 buffer_(buffer), 45 pc_(0), 46 own_buffer_(false), 47 advance_current_end_(kInvalidPC), 48 isolate_(zone->isolate()) { } 49 50 51 RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() { 52 if (backtrack_.is_linked()) backtrack_.Unuse(); 53 if (own_buffer_) buffer_.Dispose(); 54 } 55 56 57 RegExpMacroAssemblerIrregexp::IrregexpImplementation 58 RegExpMacroAssemblerIrregexp::Implementation() { 59 return kBytecodeImplementation; 60 } 61 62 63 void RegExpMacroAssemblerIrregexp::Bind(Label* l) { 64 advance_current_end_ = kInvalidPC; 65 ASSERT(!l->is_bound()); 66 if (l->is_linked()) { 67 int pos = l->pos(); 68 while (pos != 0) { 69 int fixup = pos; 70 pos = *reinterpret_cast<int32_t*>(buffer_.start() + fixup); 71 *reinterpret_cast<uint32_t*>(buffer_.start() + fixup) = pc_; 72 } 73 } 74 l->bind_to(pc_); 75 } 76 77 78 void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { 79 if (l == NULL) l = &backtrack_; 80 if (l->is_bound()) { 81 Emit32(l->pos()); 82 } else { 83 int pos = 0; 84 if (l->is_linked()) { 85 pos = l->pos(); 86 } 87 l->link_to(pc_); 88 Emit32(pos); 89 } 90 } 91 92 93 void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) { 94 ASSERT(register_index >= 0); 95 ASSERT(register_index <= kMaxRegister); 96 Emit(BC_POP_REGISTER, register_index); 97 } 98 99 100 void RegExpMacroAssemblerIrregexp::PushRegister( 101 int register_index, 102 StackCheckFlag check_stack_limit) { 103 ASSERT(register_index >= 0); 104 ASSERT(register_index <= kMaxRegister); 105 Emit(BC_PUSH_REGISTER, register_index); 106 } 107 108 109 void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( 110 int register_index, int cp_offset) { 111 ASSERT(register_index >= 0); 112 ASSERT(register_index <= kMaxRegister); 113 Emit(BC_SET_REGISTER_TO_CP, register_index); 114 Emit32(cp_offset); // Current position offset. 115 } 116 117 118 void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) { 119 ASSERT(reg_from <= reg_to); 120 for (int reg = reg_from; reg <= reg_to; reg++) { 121 SetRegister(reg, -1); 122 } 123 } 124 125 126 void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister( 127 int register_index) { 128 ASSERT(register_index >= 0); 129 ASSERT(register_index <= kMaxRegister); 130 Emit(BC_SET_CP_TO_REGISTER, register_index); 131 } 132 133 134 void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister( 135 int register_index) { 136 ASSERT(register_index >= 0); 137 ASSERT(register_index <= kMaxRegister); 138 Emit(BC_SET_REGISTER_TO_SP, register_index); 139 } 140 141 142 void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister( 143 int register_index) { 144 ASSERT(register_index >= 0); 145 ASSERT(register_index <= kMaxRegister); 146 Emit(BC_SET_SP_TO_REGISTER, register_index); 147 } 148 149 150 void RegExpMacroAssemblerIrregexp::SetCurrentPositionFromEnd(int by) { 151 ASSERT(is_uint24(by)); 152 Emit(BC_SET_CURRENT_POSITION_FROM_END, by); 153 } 154 155 156 void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) { 157 ASSERT(register_index >= 0); 158 ASSERT(register_index <= kMaxRegister); 159 Emit(BC_SET_REGISTER, register_index); 160 Emit32(to); 161 } 162 163 164 void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) { 165 ASSERT(register_index >= 0); 166 ASSERT(register_index <= kMaxRegister); 167 Emit(BC_ADVANCE_REGISTER, register_index); 168 Emit32(by); 169 } 170 171 172 void RegExpMacroAssemblerIrregexp::PopCurrentPosition() { 173 Emit(BC_POP_CP, 0); 174 } 175 176 177 void RegExpMacroAssemblerIrregexp::PushCurrentPosition() { 178 Emit(BC_PUSH_CP, 0); 179 } 180 181 182 void RegExpMacroAssemblerIrregexp::Backtrack() { 183 Emit(BC_POP_BT, 0); 184 } 185 186 187 void RegExpMacroAssemblerIrregexp::GoTo(Label* l) { 188 if (advance_current_end_ == pc_) { 189 // Combine advance current and goto. 190 pc_ = advance_current_start_; 191 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_); 192 EmitOrLink(l); 193 advance_current_end_ = kInvalidPC; 194 } else { 195 // Regular goto. 196 Emit(BC_GOTO, 0); 197 EmitOrLink(l); 198 } 199 } 200 201 202 void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) { 203 Emit(BC_PUSH_BT, 0); 204 EmitOrLink(l); 205 } 206 207 208 bool RegExpMacroAssemblerIrregexp::Succeed() { 209 Emit(BC_SUCCEED, 0); 210 return false; // Restart matching for global regexp not supported. 211 } 212 213 214 void RegExpMacroAssemblerIrregexp::Fail() { 215 Emit(BC_FAIL, 0); 216 } 217 218 219 void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { 220 ASSERT(by >= kMinCPOffset); 221 ASSERT(by <= kMaxCPOffset); 222 advance_current_start_ = pc_; 223 advance_current_offset_ = by; 224 Emit(BC_ADVANCE_CP, by); 225 advance_current_end_ = pc_; 226 } 227 228 229 void RegExpMacroAssemblerIrregexp::CheckGreedyLoop( 230 Label* on_tos_equals_current_position) { 231 Emit(BC_CHECK_GREEDY, 0); 232 EmitOrLink(on_tos_equals_current_position); 233 } 234 235 236 void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, 237 Label* on_failure, 238 bool check_bounds, 239 int characters) { 240 ASSERT(cp_offset >= kMinCPOffset); 241 ASSERT(cp_offset <= kMaxCPOffset); 242 int bytecode; 243 if (check_bounds) { 244 if (characters == 4) { 245 bytecode = BC_LOAD_4_CURRENT_CHARS; 246 } else if (characters == 2) { 247 bytecode = BC_LOAD_2_CURRENT_CHARS; 248 } else { 249 ASSERT(characters == 1); 250 bytecode = BC_LOAD_CURRENT_CHAR; 251 } 252 } else { 253 if (characters == 4) { 254 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED; 255 } else if (characters == 2) { 256 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED; 257 } else { 258 ASSERT(characters == 1); 259 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; 260 } 261 } 262 Emit(bytecode, cp_offset); 263 if (check_bounds) EmitOrLink(on_failure); 264 } 265 266 267 void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, 268 Label* on_less) { 269 Emit(BC_CHECK_LT, limit); 270 EmitOrLink(on_less); 271 } 272 273 274 void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit, 275 Label* on_greater) { 276 Emit(BC_CHECK_GT, limit); 277 EmitOrLink(on_greater); 278 } 279 280 281 void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) { 282 if (c > MAX_FIRST_ARG) { 283 Emit(BC_CHECK_4_CHARS, 0); 284 Emit32(c); 285 } else { 286 Emit(BC_CHECK_CHAR, c); 287 } 288 EmitOrLink(on_equal); 289 } 290 291 292 void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) { 293 Emit(BC_CHECK_AT_START, 0); 294 EmitOrLink(on_at_start); 295 } 296 297 298 void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) { 299 Emit(BC_CHECK_NOT_AT_START, 0); 300 EmitOrLink(on_not_at_start); 301 } 302 303 304 void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c, 305 Label* on_not_equal) { 306 if (c > MAX_FIRST_ARG) { 307 Emit(BC_CHECK_NOT_4_CHARS, 0); 308 Emit32(c); 309 } else { 310 Emit(BC_CHECK_NOT_CHAR, c); 311 } 312 EmitOrLink(on_not_equal); 313 } 314 315 316 void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd( 317 uint32_t c, 318 uint32_t mask, 319 Label* on_equal) { 320 if (c > MAX_FIRST_ARG) { 321 Emit(BC_AND_CHECK_4_CHARS, 0); 322 Emit32(c); 323 } else { 324 Emit(BC_AND_CHECK_CHAR, c); 325 } 326 Emit32(mask); 327 EmitOrLink(on_equal); 328 } 329 330 331 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd( 332 uint32_t c, 333 uint32_t mask, 334 Label* on_not_equal) { 335 if (c > MAX_FIRST_ARG) { 336 Emit(BC_AND_CHECK_NOT_4_CHARS, 0); 337 Emit32(c); 338 } else { 339 Emit(BC_AND_CHECK_NOT_CHAR, c); 340 } 341 Emit32(mask); 342 EmitOrLink(on_not_equal); 343 } 344 345 346 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd( 347 uc16 c, 348 uc16 minus, 349 uc16 mask, 350 Label* on_not_equal) { 351 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); 352 Emit16(minus); 353 Emit16(mask); 354 EmitOrLink(on_not_equal); 355 } 356 357 358 void RegExpMacroAssemblerIrregexp::CheckCharacterInRange( 359 uc16 from, 360 uc16 to, 361 Label* on_in_range) { 362 Emit(BC_CHECK_CHAR_IN_RANGE, 0); 363 Emit16(from); 364 Emit16(to); 365 EmitOrLink(on_in_range); 366 } 367 368 369 void RegExpMacroAssemblerIrregexp::CheckCharacterNotInRange( 370 uc16 from, 371 uc16 to, 372 Label* on_not_in_range) { 373 Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0); 374 Emit16(from); 375 Emit16(to); 376 EmitOrLink(on_not_in_range); 377 } 378 379 380 void RegExpMacroAssemblerIrregexp::CheckBitInTable( 381 Handle<ByteArray> table, Label* on_bit_set) { 382 Emit(BC_CHECK_BIT_IN_TABLE, 0); 383 EmitOrLink(on_bit_set); 384 for (int i = 0; i < kTableSize; i += kBitsPerByte) { 385 int byte = 0; 386 for (int j = 0; j < kBitsPerByte; j++) { 387 if (table->get(i + j) != 0) byte |= 1 << j; 388 } 389 Emit8(byte); 390 } 391 } 392 393 394 void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, 395 Label* on_not_equal) { 396 ASSERT(start_reg >= 0); 397 ASSERT(start_reg <= kMaxRegister); 398 Emit(BC_CHECK_NOT_BACK_REF, start_reg); 399 EmitOrLink(on_not_equal); 400 } 401 402 403 void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( 404 int start_reg, 405 Label* on_not_equal) { 406 ASSERT(start_reg >= 0); 407 ASSERT(start_reg <= kMaxRegister); 408 Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); 409 EmitOrLink(on_not_equal); 410 } 411 412 413 void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index, 414 int comparand, 415 Label* on_less_than) { 416 ASSERT(register_index >= 0); 417 ASSERT(register_index <= kMaxRegister); 418 Emit(BC_CHECK_REGISTER_LT, register_index); 419 Emit32(comparand); 420 EmitOrLink(on_less_than); 421 } 422 423 424 void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, 425 int comparand, 426 Label* on_greater_or_equal) { 427 ASSERT(register_index >= 0); 428 ASSERT(register_index <= kMaxRegister); 429 Emit(BC_CHECK_REGISTER_GE, register_index); 430 Emit32(comparand); 431 EmitOrLink(on_greater_or_equal); 432 } 433 434 435 void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index, 436 Label* on_eq) { 437 ASSERT(register_index >= 0); 438 ASSERT(register_index <= kMaxRegister); 439 Emit(BC_CHECK_REGISTER_EQ_POS, register_index); 440 EmitOrLink(on_eq); 441 } 442 443 444 Handle<HeapObject> RegExpMacroAssemblerIrregexp::GetCode( 445 Handle<String> source) { 446 Bind(&backtrack_); 447 Emit(BC_POP_BT, 0); 448 Handle<ByteArray> array = isolate_->factory()->NewByteArray(length()); 449 Copy(array->GetDataStartAddress()); 450 return array; 451 } 452 453 454 int RegExpMacroAssemblerIrregexp::length() { 455 return pc_; 456 } 457 458 459 void RegExpMacroAssemblerIrregexp::Copy(Address a) { 460 OS::MemCopy(a, buffer_.start(), length()); 461 } 462 463 464 void RegExpMacroAssemblerIrregexp::Expand() { 465 bool old_buffer_was_our_own = own_buffer_; 466 Vector<byte> old_buffer = buffer_; 467 buffer_ = Vector<byte>::New(old_buffer.length() * 2); 468 own_buffer_ = true; 469 OS::MemCopy(buffer_.start(), old_buffer.start(), old_buffer.length()); 470 if (old_buffer_was_our_own) { 471 old_buffer.Dispose(); 472 } 473 } 474 475 #endif // V8_INTERPRETED_REGEXP 476 477 } } // namespace v8::internal 478