Home | History | Annotate | Download | only in src
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #ifndef V8_REGEXP_MACRO_ASSEMBLER_H_
     29 #define V8_REGEXP_MACRO_ASSEMBLER_H_
     30 
     31 #include "ast.h"
     32 
     33 namespace v8 {
     34 namespace internal {
     35 
     36 struct DisjunctDecisionRow {
     37   RegExpCharacterClass cc;
     38   Label* on_match;
     39 };
     40 
     41 
     42 class RegExpMacroAssembler {
     43  public:
     44   // The implementation must be able to handle at least:
     45   static const int kMaxRegister = (1 << 16) - 1;
     46   static const int kMaxCPOffset = (1 << 15) - 1;
     47   static const int kMinCPOffset = -(1 << 15);
     48 
     49   static const int kTableSizeBits = 7;
     50   static const int kTableSize = 1 << kTableSizeBits;
     51   static const int kTableMask = kTableSize - 1;
     52 
     53   enum IrregexpImplementation {
     54     kIA32Implementation,
     55     kARMImplementation,
     56     kMIPSImplementation,
     57     kX64Implementation,
     58     kBytecodeImplementation
     59   };
     60 
     61   enum StackCheckFlag {
     62     kNoStackLimitCheck = false,
     63     kCheckStackLimit = true
     64   };
     65 
     66   explicit RegExpMacroAssembler(Zone* zone);
     67   virtual ~RegExpMacroAssembler();
     68   // The maximal number of pushes between stack checks. Users must supply
     69   // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
     70   // at least once for every stack_limit() pushes that are executed.
     71   virtual int stack_limit_slack() = 0;
     72   virtual bool CanReadUnaligned();
     73   virtual void AdvanceCurrentPosition(int by) = 0;  // Signed cp change.
     74   virtual void AdvanceRegister(int reg, int by) = 0;  // r[reg] += by.
     75   // Continues execution from the position pushed on the top of the backtrack
     76   // stack by an earlier PushBacktrack(Label*).
     77   virtual void Backtrack() = 0;
     78   virtual void Bind(Label* label) = 0;
     79   virtual void CheckAtStart(Label* on_at_start) = 0;
     80   // Dispatch after looking the current character up in a 2-bits-per-entry
     81   // map.  The destinations vector has up to 4 labels.
     82   virtual void CheckCharacter(unsigned c, Label* on_equal) = 0;
     83   // Bitwise and the current character with the given constant and then
     84   // check for a match with c.
     85   virtual void CheckCharacterAfterAnd(unsigned c,
     86                                       unsigned and_with,
     87                                       Label* on_equal) = 0;
     88   virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
     89   virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
     90   virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
     91   virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
     92   virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
     93   virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
     94                                                Label* on_no_match) = 0;
     95   // Check the current character for a match with a literal character.  If we
     96   // fail to match then goto the on_failure label.  End of input always
     97   // matches.  If the label is NULL then we should pop a backtrack address off
     98   // the stack and go to that.
     99   virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0;
    100   virtual void CheckNotCharacterAfterAnd(unsigned c,
    101                                          unsigned and_with,
    102                                          Label* on_not_equal) = 0;
    103   // Subtract a constant from the current character, then and with the given
    104   // constant and then check for a match with c.
    105   virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
    106                                               uc16 minus,
    107                                               uc16 and_with,
    108                                               Label* on_not_equal) = 0;
    109   virtual void CheckCharacterInRange(uc16 from,
    110                                      uc16 to,  // Both inclusive.
    111                                      Label* on_in_range) = 0;
    112   virtual void CheckCharacterNotInRange(uc16 from,
    113                                         uc16 to,  // Both inclusive.
    114                                         Label* on_not_in_range) = 0;
    115 
    116   // The current character (modulus the kTableSize) is looked up in the byte
    117   // array, and if the found byte is non-zero, we jump to the on_bit_set label.
    118   virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0;
    119 
    120   // Checks whether the given offset from the current position is before
    121   // the end of the string.  May overwrite the current character.
    122   virtual void CheckPosition(int cp_offset, Label* on_outside_input) {
    123     LoadCurrentCharacter(cp_offset, on_outside_input, true);
    124   }
    125   // Check whether a standard/default character class matches the current
    126   // character. Returns false if the type of special character class does
    127   // not have custom support.
    128   // May clobber the current loaded character.
    129   virtual bool CheckSpecialCharacterClass(uc16 type,
    130                                           Label* on_no_match) {
    131     return false;
    132   }
    133   virtual void Fail() = 0;
    134   virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
    135   virtual void GoTo(Label* label) = 0;
    136   // Check whether a register is >= a given constant and go to a label if it
    137   // is.  Backtracks instead if the label is NULL.
    138   virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
    139   // Check whether a register is < a given constant and go to a label if it is.
    140   // Backtracks instead if the label is NULL.
    141   virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
    142   // Check whether a register is == to the current position and go to a
    143   // label if it is.
    144   virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
    145   virtual IrregexpImplementation Implementation() = 0;
    146   virtual void LoadCurrentCharacter(int cp_offset,
    147                                     Label* on_end_of_input,
    148                                     bool check_bounds = true,
    149                                     int characters = 1) = 0;
    150   virtual void PopCurrentPosition() = 0;
    151   virtual void PopRegister(int register_index) = 0;
    152   // Pushes the label on the backtrack stack, so that a following Backtrack
    153   // will go to this label. Always checks the backtrack stack limit.
    154   virtual void PushBacktrack(Label* label) = 0;
    155   virtual void PushCurrentPosition() = 0;
    156   virtual void PushRegister(int register_index,
    157                             StackCheckFlag check_stack_limit) = 0;
    158   virtual void ReadCurrentPositionFromRegister(int reg) = 0;
    159   virtual void ReadStackPointerFromRegister(int reg) = 0;
    160   virtual void SetCurrentPositionFromEnd(int by) = 0;
    161   virtual void SetRegister(int register_index, int to) = 0;
    162   // Return whether the matching (with a global regexp) will be restarted.
    163   virtual bool Succeed() = 0;
    164   virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
    165   virtual void ClearRegisters(int reg_from, int reg_to) = 0;
    166   virtual void WriteStackPointerToRegister(int reg) = 0;
    167 
    168   // Controls the generation of large inlined constants in the code.
    169   void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
    170   bool slow_safe() { return slow_safe_compiler_; }
    171 
    172   enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK };
    173   // Set whether the regular expression has the global flag.  Exiting due to
    174   // a failure in a global regexp may still mean success overall.
    175   inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
    176   inline bool global() { return global_mode_ != NOT_GLOBAL; }
    177   inline bool global_with_zero_length_check() {
    178     return global_mode_ == GLOBAL;
    179   }
    180 
    181   Zone* zone() const { return zone_; }
    182 
    183  private:
    184   bool slow_safe_compiler_;
    185   bool global_mode_;
    186   Zone* zone_;
    187 };
    188 
    189 
    190 #ifndef V8_INTERPRETED_REGEXP  // Avoid compiling unused code.
    191 
    192 class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
    193  public:
    194   // Type of input string to generate code for.
    195   enum Mode { ASCII = 1, UC16 = 2 };
    196 
    197   // Result of calling generated native RegExp code.
    198   // RETRY: Something significant changed during execution, and the matching
    199   //        should be retried from scratch.
    200   // EXCEPTION: Something failed during execution. If no exception has been
    201   //        thrown, it's an internal out-of-memory, and the caller should
    202   //        throw the exception.
    203   // FAILURE: Matching failed.
    204   // SUCCESS: Matching succeeded, and the output array has been filled with
    205   //        capture positions.
    206   enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
    207 
    208   explicit NativeRegExpMacroAssembler(Zone* zone);
    209   virtual ~NativeRegExpMacroAssembler();
    210   virtual bool CanReadUnaligned();
    211 
    212   static Result Match(Handle<Code> regexp,
    213                       Handle<String> subject,
    214                       int* offsets_vector,
    215                       int offsets_vector_length,
    216                       int previous_index,
    217                       Isolate* isolate);
    218 
    219   // Compares two-byte strings case insensitively.
    220   // Called from generated RegExp code.
    221   static int CaseInsensitiveCompareUC16(Address byte_offset1,
    222                                         Address byte_offset2,
    223                                         size_t byte_length,
    224                                         Isolate* isolate);
    225 
    226   // Called from RegExp if the backtrack stack limit is hit.
    227   // Tries to expand the stack. Returns the new stack-pointer if
    228   // successful, and updates the stack_top address, or returns 0 if unable
    229   // to grow the stack.
    230   // This function must not trigger a garbage collection.
    231   static Address GrowStack(Address stack_pointer, Address* stack_top,
    232                            Isolate* isolate);
    233 
    234   static const byte* StringCharacterPosition(String* subject, int start_index);
    235 
    236   // Byte map of one byte characters with a 0xff if the character is a word
    237   // character (digit, letter or underscore) and 0x00 otherwise.
    238   // Used by generated RegExp code.
    239   static const byte word_character_map[256];
    240 
    241   static Address word_character_map_address() {
    242     return const_cast<Address>(&word_character_map[0]);
    243   }
    244 
    245   static Result Execute(Code* code,
    246                         String* input,
    247                         int start_offset,
    248                         const byte* input_start,
    249                         const byte* input_end,
    250                         int* output,
    251                         int output_size,
    252                         Isolate* isolate);
    253 };
    254 
    255 #endif  // V8_INTERPRETED_REGEXP
    256 
    257 } }  // namespace v8::internal
    258 
    259 #endif  // V8_REGEXP_MACRO_ASSEMBLER_H_
    260