1 // Copyright 2008-2009 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 #include "ast.h" 30 #include "bytecodes-irregexp.h" 31 #include "regexp-macro-assembler.h" 32 #include "regexp-macro-assembler-irregexp.h" 33 #include "regexp-macro-assembler-irregexp-inl.h" 34 35 36 namespace v8 { 37 namespace internal { 38 39 #ifndef V8_NATIVE_REGEXP 40 41 RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector<byte> buffer) 42 : buffer_(buffer), 43 pc_(0), 44 own_buffer_(false), 45 advance_current_end_(kInvalidPC) { 46 } 47 48 49 RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() { 50 if (backtrack_.is_linked()) backtrack_.Unuse(); 51 if (own_buffer_) buffer_.Dispose(); 52 } 53 54 55 RegExpMacroAssemblerIrregexp::IrregexpImplementation 56 RegExpMacroAssemblerIrregexp::Implementation() { 57 return kBytecodeImplementation; 58 } 59 60 61 void RegExpMacroAssemblerIrregexp::Bind(Label* l) { 62 advance_current_end_ = kInvalidPC; 63 ASSERT(!l->is_bound()); 64 if (l->is_linked()) { 65 int pos = l->pos(); 66 while (pos != 0) { 67 int fixup = pos; 68 pos = *reinterpret_cast<int32_t*>(buffer_.start() + fixup); 69 *reinterpret_cast<uint32_t*>(buffer_.start() + fixup) = pc_; 70 } 71 } 72 l->bind_to(pc_); 73 } 74 75 76 void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { 77 if (l == NULL) l = &backtrack_; 78 if (l->is_bound()) { 79 Emit32(l->pos()); 80 } else { 81 int pos = 0; 82 if (l->is_linked()) { 83 pos = l->pos(); 84 } 85 l->link_to(pc_); 86 Emit32(pos); 87 } 88 } 89 90 91 void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) { 92 ASSERT(register_index >= 0); 93 ASSERT(register_index <= kMaxRegister); 94 Emit(BC_POP_REGISTER, register_index); 95 } 96 97 98 void RegExpMacroAssemblerIrregexp::PushRegister( 99 int register_index, 100 StackCheckFlag check_stack_limit) { 101 ASSERT(register_index >= 0); 102 ASSERT(register_index <= kMaxRegister); 103 Emit(BC_PUSH_REGISTER, register_index); 104 } 105 106 107 void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( 108 int register_index, int cp_offset) { 109 ASSERT(register_index >= 0); 110 ASSERT(register_index <= kMaxRegister); 111 Emit(BC_SET_REGISTER_TO_CP, register_index); 112 Emit32(cp_offset); // Current position offset. 113 } 114 115 116 void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) { 117 ASSERT(reg_from <= reg_to); 118 for (int reg = reg_from; reg <= reg_to; reg++) { 119 SetRegister(reg, -1); 120 } 121 } 122 123 124 void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister( 125 int register_index) { 126 ASSERT(register_index >= 0); 127 ASSERT(register_index <= kMaxRegister); 128 Emit(BC_SET_CP_TO_REGISTER, register_index); 129 } 130 131 132 void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister( 133 int register_index) { 134 ASSERT(register_index >= 0); 135 ASSERT(register_index <= kMaxRegister); 136 Emit(BC_SET_REGISTER_TO_SP, register_index); 137 } 138 139 140 void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister( 141 int register_index) { 142 ASSERT(register_index >= 0); 143 ASSERT(register_index <= kMaxRegister); 144 Emit(BC_SET_SP_TO_REGISTER, register_index); 145 } 146 147 148 void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) { 149 ASSERT(register_index >= 0); 150 ASSERT(register_index <= kMaxRegister); 151 Emit(BC_SET_REGISTER, register_index); 152 Emit32(to); 153 } 154 155 156 void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) { 157 ASSERT(register_index >= 0); 158 ASSERT(register_index <= kMaxRegister); 159 Emit(BC_ADVANCE_REGISTER, register_index); 160 Emit32(by); 161 } 162 163 164 void RegExpMacroAssemblerIrregexp::PopCurrentPosition() { 165 Emit(BC_POP_CP, 0); 166 } 167 168 169 void RegExpMacroAssemblerIrregexp::PushCurrentPosition() { 170 Emit(BC_PUSH_CP, 0); 171 } 172 173 174 void RegExpMacroAssemblerIrregexp::Backtrack() { 175 Emit(BC_POP_BT, 0); 176 } 177 178 179 void RegExpMacroAssemblerIrregexp::GoTo(Label* l) { 180 if (advance_current_end_ == pc_) { 181 // Combine advance current and goto. 182 pc_ = advance_current_start_; 183 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_); 184 EmitOrLink(l); 185 advance_current_end_ = kInvalidPC; 186 } else { 187 // Regular goto. 188 Emit(BC_GOTO, 0); 189 EmitOrLink(l); 190 } 191 } 192 193 194 void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) { 195 Emit(BC_PUSH_BT, 0); 196 EmitOrLink(l); 197 } 198 199 200 void RegExpMacroAssemblerIrregexp::Succeed() { 201 Emit(BC_SUCCEED, 0); 202 } 203 204 205 void RegExpMacroAssemblerIrregexp::Fail() { 206 Emit(BC_FAIL, 0); 207 } 208 209 210 void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { 211 ASSERT(by >= kMinCPOffset); 212 ASSERT(by <= kMaxCPOffset); 213 advance_current_start_ = pc_; 214 advance_current_offset_ = by; 215 Emit(BC_ADVANCE_CP, by); 216 advance_current_end_ = pc_; 217 } 218 219 220 void RegExpMacroAssemblerIrregexp::CheckGreedyLoop( 221 Label* on_tos_equals_current_position) { 222 Emit(BC_CHECK_GREEDY, 0); 223 EmitOrLink(on_tos_equals_current_position); 224 } 225 226 227 void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, 228 Label* on_failure, 229 bool check_bounds, 230 int characters) { 231 ASSERT(cp_offset >= kMinCPOffset); 232 ASSERT(cp_offset <= kMaxCPOffset); 233 int bytecode; 234 if (check_bounds) { 235 if (characters == 4) { 236 bytecode = BC_LOAD_4_CURRENT_CHARS; 237 } else if (characters == 2) { 238 bytecode = BC_LOAD_2_CURRENT_CHARS; 239 } else { 240 ASSERT(characters == 1); 241 bytecode = BC_LOAD_CURRENT_CHAR; 242 } 243 } else { 244 if (characters == 4) { 245 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED; 246 } else if (characters == 2) { 247 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED; 248 } else { 249 ASSERT(characters == 1); 250 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; 251 } 252 } 253 Emit(bytecode, cp_offset); 254 if (check_bounds) EmitOrLink(on_failure); 255 } 256 257 258 void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, 259 Label* on_less) { 260 Emit(BC_CHECK_LT, limit); 261 EmitOrLink(on_less); 262 } 263 264 265 void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit, 266 Label* on_greater) { 267 Emit(BC_CHECK_GT, limit); 268 EmitOrLink(on_greater); 269 } 270 271 272 void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) { 273 if (c > MAX_FIRST_ARG) { 274 Emit(BC_CHECK_4_CHARS, 0); 275 Emit32(c); 276 } else { 277 Emit(BC_CHECK_CHAR, c); 278 } 279 EmitOrLink(on_equal); 280 } 281 282 283 void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) { 284 Emit(BC_CHECK_AT_START, 0); 285 EmitOrLink(on_at_start); 286 } 287 288 289 void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) { 290 Emit(BC_CHECK_NOT_AT_START, 0); 291 EmitOrLink(on_not_at_start); 292 } 293 294 295 void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c, 296 Label* on_not_equal) { 297 if (c > MAX_FIRST_ARG) { 298 Emit(BC_CHECK_NOT_4_CHARS, 0); 299 Emit32(c); 300 } else { 301 Emit(BC_CHECK_NOT_CHAR, c); 302 } 303 EmitOrLink(on_not_equal); 304 } 305 306 307 void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd( 308 uint32_t c, 309 uint32_t mask, 310 Label* on_equal) { 311 if (c > MAX_FIRST_ARG) { 312 Emit(BC_AND_CHECK_4_CHARS, 0); 313 Emit32(c); 314 } else { 315 Emit(BC_AND_CHECK_CHAR, c); 316 } 317 Emit32(mask); 318 EmitOrLink(on_equal); 319 } 320 321 322 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd( 323 uint32_t c, 324 uint32_t mask, 325 Label* on_not_equal) { 326 if (c > MAX_FIRST_ARG) { 327 Emit(BC_AND_CHECK_NOT_4_CHARS, 0); 328 Emit32(c); 329 } else { 330 Emit(BC_AND_CHECK_NOT_CHAR, c); 331 } 332 Emit32(mask); 333 EmitOrLink(on_not_equal); 334 } 335 336 337 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd( 338 uc16 c, 339 uc16 minus, 340 uc16 mask, 341 Label* on_not_equal) { 342 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); 343 Emit16(minus); 344 Emit16(mask); 345 EmitOrLink(on_not_equal); 346 } 347 348 349 void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, 350 Label* on_not_equal) { 351 ASSERT(start_reg >= 0); 352 ASSERT(start_reg <= kMaxRegister); 353 Emit(BC_CHECK_NOT_BACK_REF, start_reg); 354 EmitOrLink(on_not_equal); 355 } 356 357 358 void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( 359 int start_reg, 360 Label* on_not_equal) { 361 ASSERT(start_reg >= 0); 362 ASSERT(start_reg <= kMaxRegister); 363 Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); 364 EmitOrLink(on_not_equal); 365 } 366 367 368 void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1, 369 int reg2, 370 Label* on_not_equal) { 371 ASSERT(reg1 >= 0); 372 ASSERT(reg1 <= kMaxRegister); 373 Emit(BC_CHECK_NOT_REGS_EQUAL, reg1); 374 Emit32(reg2); 375 EmitOrLink(on_not_equal); 376 } 377 378 379 void RegExpMacroAssemblerIrregexp::CheckCharacters( 380 Vector<const uc16> str, 381 int cp_offset, 382 Label* on_failure, 383 bool check_end_of_string) { 384 ASSERT(cp_offset >= kMinCPOffset); 385 ASSERT(cp_offset + str.length() - 1 <= kMaxCPOffset); 386 // It is vital that this loop is backwards due to the unchecked character 387 // load below. 388 for (int i = str.length() - 1; i >= 0; i--) { 389 if (check_end_of_string && i == str.length() - 1) { 390 Emit(BC_LOAD_CURRENT_CHAR, cp_offset + i); 391 EmitOrLink(on_failure); 392 } else { 393 Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED, cp_offset + i); 394 } 395 Emit(BC_CHECK_NOT_CHAR, str[i]); 396 EmitOrLink(on_failure); 397 } 398 } 399 400 401 void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index, 402 int comparand, 403 Label* on_less_than) { 404 ASSERT(register_index >= 0); 405 ASSERT(register_index <= kMaxRegister); 406 Emit(BC_CHECK_REGISTER_LT, register_index); 407 Emit32(comparand); 408 EmitOrLink(on_less_than); 409 } 410 411 412 void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, 413 int comparand, 414 Label* on_greater_or_equal) { 415 ASSERT(register_index >= 0); 416 ASSERT(register_index <= kMaxRegister); 417 Emit(BC_CHECK_REGISTER_GE, register_index); 418 Emit32(comparand); 419 EmitOrLink(on_greater_or_equal); 420 } 421 422 423 void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index, 424 Label* on_eq) { 425 ASSERT(register_index >= 0); 426 ASSERT(register_index <= kMaxRegister); 427 Emit(BC_CHECK_REGISTER_EQ_POS, register_index); 428 EmitOrLink(on_eq); 429 } 430 431 432 Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String> source) { 433 Bind(&backtrack_); 434 Emit(BC_POP_BT, 0); 435 Handle<ByteArray> array = Factory::NewByteArray(length()); 436 Copy(array->GetDataStartAddress()); 437 return array; 438 } 439 440 441 int RegExpMacroAssemblerIrregexp::length() { 442 return pc_; 443 } 444 445 446 void RegExpMacroAssemblerIrregexp::Copy(Address a) { 447 memcpy(a, buffer_.start(), length()); 448 } 449 450 451 void RegExpMacroAssemblerIrregexp::Expand() { 452 bool old_buffer_was_our_own = own_buffer_; 453 Vector<byte> old_buffer = buffer_; 454 buffer_ = Vector<byte>::New(old_buffer.length() * 2); 455 own_buffer_ = true; 456 memcpy(buffer_.start(), old_buffer.start(), old_buffer.length()); 457 if (old_buffer_was_our_own) { 458 old_buffer.Dispose(); 459 } 460 } 461 462 #endif // !V8_NATIVE_REGEXP 463 464 } } // namespace v8::internal 465