1 // Copyright 2008-2009 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 #include "ast.h" 30 #include "bytecodes-irregexp.h" 31 #include "regexp-macro-assembler.h" 32 #include "regexp-macro-assembler-irregexp.h" 33 #include "regexp-macro-assembler-irregexp-inl.h" 34 35 36 namespace v8 { 37 namespace internal { 38 39 #ifdef V8_INTERPRETED_REGEXP 40 41 RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector<byte> buffer) 42 : buffer_(buffer), 43 pc_(0), 44 own_buffer_(false), 45 advance_current_end_(kInvalidPC) { 46 } 47 48 49 RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() { 50 if (backtrack_.is_linked()) backtrack_.Unuse(); 51 if (own_buffer_) buffer_.Dispose(); 52 } 53 54 55 RegExpMacroAssemblerIrregexp::IrregexpImplementation 56 RegExpMacroAssemblerIrregexp::Implementation() { 57 return kBytecodeImplementation; 58 } 59 60 61 void RegExpMacroAssemblerIrregexp::Bind(Label* l) { 62 advance_current_end_ = kInvalidPC; 63 ASSERT(!l->is_bound()); 64 if (l->is_linked()) { 65 int pos = l->pos(); 66 while (pos != 0) { 67 int fixup = pos; 68 pos = *reinterpret_cast<int32_t*>(buffer_.start() + fixup); 69 *reinterpret_cast<uint32_t*>(buffer_.start() + fixup) = pc_; 70 } 71 } 72 l->bind_to(pc_); 73 } 74 75 76 void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { 77 if (l == NULL) l = &backtrack_; 78 if (l->is_bound()) { 79 Emit32(l->pos()); 80 } else { 81 int pos = 0; 82 if (l->is_linked()) { 83 pos = l->pos(); 84 } 85 l->link_to(pc_); 86 Emit32(pos); 87 } 88 } 89 90 91 void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) { 92 ASSERT(register_index >= 0); 93 ASSERT(register_index <= kMaxRegister); 94 Emit(BC_POP_REGISTER, register_index); 95 } 96 97 98 void RegExpMacroAssemblerIrregexp::PushRegister( 99 int register_index, 100 StackCheckFlag check_stack_limit) { 101 ASSERT(register_index >= 0); 102 ASSERT(register_index <= kMaxRegister); 103 Emit(BC_PUSH_REGISTER, register_index); 104 } 105 106 107 void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( 108 int register_index, int cp_offset) { 109 ASSERT(register_index >= 0); 110 ASSERT(register_index <= kMaxRegister); 111 Emit(BC_SET_REGISTER_TO_CP, register_index); 112 Emit32(cp_offset); // Current position offset. 113 } 114 115 116 void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) { 117 ASSERT(reg_from <= reg_to); 118 for (int reg = reg_from; reg <= reg_to; reg++) { 119 SetRegister(reg, -1); 120 } 121 } 122 123 124 void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister( 125 int register_index) { 126 ASSERT(register_index >= 0); 127 ASSERT(register_index <= kMaxRegister); 128 Emit(BC_SET_CP_TO_REGISTER, register_index); 129 } 130 131 132 void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister( 133 int register_index) { 134 ASSERT(register_index >= 0); 135 ASSERT(register_index <= kMaxRegister); 136 Emit(BC_SET_REGISTER_TO_SP, register_index); 137 } 138 139 140 void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister( 141 int register_index) { 142 ASSERT(register_index >= 0); 143 ASSERT(register_index <= kMaxRegister); 144 Emit(BC_SET_SP_TO_REGISTER, register_index); 145 } 146 147 148 void RegExpMacroAssemblerIrregexp::SetCurrentPositionFromEnd(int by) { 149 ASSERT(is_uint24(by)); 150 Emit(BC_SET_CURRENT_POSITION_FROM_END, by); 151 } 152 153 154 void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) { 155 ASSERT(register_index >= 0); 156 ASSERT(register_index <= kMaxRegister); 157 Emit(BC_SET_REGISTER, register_index); 158 Emit32(to); 159 } 160 161 162 void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) { 163 ASSERT(register_index >= 0); 164 ASSERT(register_index <= kMaxRegister); 165 Emit(BC_ADVANCE_REGISTER, register_index); 166 Emit32(by); 167 } 168 169 170 void RegExpMacroAssemblerIrregexp::PopCurrentPosition() { 171 Emit(BC_POP_CP, 0); 172 } 173 174 175 void RegExpMacroAssemblerIrregexp::PushCurrentPosition() { 176 Emit(BC_PUSH_CP, 0); 177 } 178 179 180 void RegExpMacroAssemblerIrregexp::Backtrack() { 181 Emit(BC_POP_BT, 0); 182 } 183 184 185 void RegExpMacroAssemblerIrregexp::GoTo(Label* l) { 186 if (advance_current_end_ == pc_) { 187 // Combine advance current and goto. 188 pc_ = advance_current_start_; 189 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_); 190 EmitOrLink(l); 191 advance_current_end_ = kInvalidPC; 192 } else { 193 // Regular goto. 194 Emit(BC_GOTO, 0); 195 EmitOrLink(l); 196 } 197 } 198 199 200 void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) { 201 Emit(BC_PUSH_BT, 0); 202 EmitOrLink(l); 203 } 204 205 206 void RegExpMacroAssemblerIrregexp::Succeed() { 207 Emit(BC_SUCCEED, 0); 208 } 209 210 211 void RegExpMacroAssemblerIrregexp::Fail() { 212 Emit(BC_FAIL, 0); 213 } 214 215 216 void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { 217 ASSERT(by >= kMinCPOffset); 218 ASSERT(by <= kMaxCPOffset); 219 advance_current_start_ = pc_; 220 advance_current_offset_ = by; 221 Emit(BC_ADVANCE_CP, by); 222 advance_current_end_ = pc_; 223 } 224 225 226 void RegExpMacroAssemblerIrregexp::CheckGreedyLoop( 227 Label* on_tos_equals_current_position) { 228 Emit(BC_CHECK_GREEDY, 0); 229 EmitOrLink(on_tos_equals_current_position); 230 } 231 232 233 void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, 234 Label* on_failure, 235 bool check_bounds, 236 int characters) { 237 ASSERT(cp_offset >= kMinCPOffset); 238 ASSERT(cp_offset <= kMaxCPOffset); 239 int bytecode; 240 if (check_bounds) { 241 if (characters == 4) { 242 bytecode = BC_LOAD_4_CURRENT_CHARS; 243 } else if (characters == 2) { 244 bytecode = BC_LOAD_2_CURRENT_CHARS; 245 } else { 246 ASSERT(characters == 1); 247 bytecode = BC_LOAD_CURRENT_CHAR; 248 } 249 } else { 250 if (characters == 4) { 251 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED; 252 } else if (characters == 2) { 253 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED; 254 } else { 255 ASSERT(characters == 1); 256 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; 257 } 258 } 259 Emit(bytecode, cp_offset); 260 if (check_bounds) EmitOrLink(on_failure); 261 } 262 263 264 void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, 265 Label* on_less) { 266 Emit(BC_CHECK_LT, limit); 267 EmitOrLink(on_less); 268 } 269 270 271 void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit, 272 Label* on_greater) { 273 Emit(BC_CHECK_GT, limit); 274 EmitOrLink(on_greater); 275 } 276 277 278 void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) { 279 if (c > MAX_FIRST_ARG) { 280 Emit(BC_CHECK_4_CHARS, 0); 281 Emit32(c); 282 } else { 283 Emit(BC_CHECK_CHAR, c); 284 } 285 EmitOrLink(on_equal); 286 } 287 288 289 void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) { 290 Emit(BC_CHECK_AT_START, 0); 291 EmitOrLink(on_at_start); 292 } 293 294 295 void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) { 296 Emit(BC_CHECK_NOT_AT_START, 0); 297 EmitOrLink(on_not_at_start); 298 } 299 300 301 void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c, 302 Label* on_not_equal) { 303 if (c > MAX_FIRST_ARG) { 304 Emit(BC_CHECK_NOT_4_CHARS, 0); 305 Emit32(c); 306 } else { 307 Emit(BC_CHECK_NOT_CHAR, c); 308 } 309 EmitOrLink(on_not_equal); 310 } 311 312 313 void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd( 314 uint32_t c, 315 uint32_t mask, 316 Label* on_equal) { 317 if (c > MAX_FIRST_ARG) { 318 Emit(BC_AND_CHECK_4_CHARS, 0); 319 Emit32(c); 320 } else { 321 Emit(BC_AND_CHECK_CHAR, c); 322 } 323 Emit32(mask); 324 EmitOrLink(on_equal); 325 } 326 327 328 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd( 329 uint32_t c, 330 uint32_t mask, 331 Label* on_not_equal) { 332 if (c > MAX_FIRST_ARG) { 333 Emit(BC_AND_CHECK_NOT_4_CHARS, 0); 334 Emit32(c); 335 } else { 336 Emit(BC_AND_CHECK_NOT_CHAR, c); 337 } 338 Emit32(mask); 339 EmitOrLink(on_not_equal); 340 } 341 342 343 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd( 344 uc16 c, 345 uc16 minus, 346 uc16 mask, 347 Label* on_not_equal) { 348 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); 349 Emit16(minus); 350 Emit16(mask); 351 EmitOrLink(on_not_equal); 352 } 353 354 355 void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, 356 Label* on_not_equal) { 357 ASSERT(start_reg >= 0); 358 ASSERT(start_reg <= kMaxRegister); 359 Emit(BC_CHECK_NOT_BACK_REF, start_reg); 360 EmitOrLink(on_not_equal); 361 } 362 363 364 void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( 365 int start_reg, 366 Label* on_not_equal) { 367 ASSERT(start_reg >= 0); 368 ASSERT(start_reg <= kMaxRegister); 369 Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); 370 EmitOrLink(on_not_equal); 371 } 372 373 374 void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1, 375 int reg2, 376 Label* on_not_equal) { 377 ASSERT(reg1 >= 0); 378 ASSERT(reg1 <= kMaxRegister); 379 Emit(BC_CHECK_NOT_REGS_EQUAL, reg1); 380 Emit32(reg2); 381 EmitOrLink(on_not_equal); 382 } 383 384 385 void RegExpMacroAssemblerIrregexp::CheckCharacters( 386 Vector<const uc16> str, 387 int cp_offset, 388 Label* on_failure, 389 bool check_end_of_string) { 390 ASSERT(cp_offset >= kMinCPOffset); 391 ASSERT(cp_offset + str.length() - 1 <= kMaxCPOffset); 392 // It is vital that this loop is backwards due to the unchecked character 393 // load below. 394 for (int i = str.length() - 1; i >= 0; i--) { 395 if (check_end_of_string && i == str.length() - 1) { 396 Emit(BC_LOAD_CURRENT_CHAR, cp_offset + i); 397 EmitOrLink(on_failure); 398 } else { 399 Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED, cp_offset + i); 400 } 401 Emit(BC_CHECK_NOT_CHAR, str[i]); 402 EmitOrLink(on_failure); 403 } 404 } 405 406 407 void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index, 408 int comparand, 409 Label* on_less_than) { 410 ASSERT(register_index >= 0); 411 ASSERT(register_index <= kMaxRegister); 412 Emit(BC_CHECK_REGISTER_LT, register_index); 413 Emit32(comparand); 414 EmitOrLink(on_less_than); 415 } 416 417 418 void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, 419 int comparand, 420 Label* on_greater_or_equal) { 421 ASSERT(register_index >= 0); 422 ASSERT(register_index <= kMaxRegister); 423 Emit(BC_CHECK_REGISTER_GE, register_index); 424 Emit32(comparand); 425 EmitOrLink(on_greater_or_equal); 426 } 427 428 429 void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index, 430 Label* on_eq) { 431 ASSERT(register_index >= 0); 432 ASSERT(register_index <= kMaxRegister); 433 Emit(BC_CHECK_REGISTER_EQ_POS, register_index); 434 EmitOrLink(on_eq); 435 } 436 437 438 Handle<HeapObject> RegExpMacroAssemblerIrregexp::GetCode( 439 Handle<String> source) { 440 Bind(&backtrack_); 441 Emit(BC_POP_BT, 0); 442 Handle<ByteArray> array = FACTORY->NewByteArray(length()); 443 Copy(array->GetDataStartAddress()); 444 return array; 445 } 446 447 448 int RegExpMacroAssemblerIrregexp::length() { 449 return pc_; 450 } 451 452 453 void RegExpMacroAssemblerIrregexp::Copy(Address a) { 454 memcpy(a, buffer_.start(), length()); 455 } 456 457 458 void RegExpMacroAssemblerIrregexp::Expand() { 459 bool old_buffer_was_our_own = own_buffer_; 460 Vector<byte> old_buffer = buffer_; 461 buffer_ = Vector<byte>::New(old_buffer.length() * 2); 462 own_buffer_ = true; 463 memcpy(buffer_.start(), old_buffer.start(), old_buffer.length()); 464 if (old_buffer_was_our_own) { 465 old_buffer.Dispose(); 466 } 467 } 468 469 #endif // V8_INTERPRETED_REGEXP 470 471 } } // namespace v8::internal 472