Home | History | Annotate | Download | only in pcre
      1 // Copyright (c) 2005, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 //
     30 // Author: Sanjay Ghemawat
     31 
     32 #ifdef HAVE_CONFIG_H
     33 #include "config.h"
     34 #endif
     35 
     36 #include <vector>
     37 #include <assert.h>
     38 
     39 #include "pcrecpp_internal.h"
     40 #include "pcre_scanner.h"
     41 
     42 using std::vector;
     43 
     44 namespace pcrecpp {
     45 
     46 Scanner::Scanner()
     47   : data_(),
     48     input_(data_),
     49     skip_(NULL),
     50     should_skip_(false),
     51     skip_repeat_(false),
     52     save_comments_(false),
     53     comments_(NULL),
     54     comments_offset_(0) {
     55 }
     56 
     57 Scanner::Scanner(const string& in)
     58   : data_(in),
     59     input_(data_),
     60     skip_(NULL),
     61     should_skip_(false),
     62     skip_repeat_(false),
     63     save_comments_(false),
     64     comments_(NULL),
     65     comments_offset_(0) {
     66 }
     67 
     68 Scanner::~Scanner() {
     69   delete skip_;
     70   delete comments_;
     71 }
     72 
     73 void Scanner::SetSkipExpression(const char* re) {
     74   delete skip_;
     75   if (re != NULL) {
     76     skip_ = new RE(re);
     77     should_skip_ = true;
     78     skip_repeat_ = true;
     79     ConsumeSkip();
     80   } else {
     81     skip_ = NULL;
     82     should_skip_ = false;
     83     skip_repeat_ = false;
     84   }
     85 }
     86 
     87 void Scanner::Skip(const char* re) {
     88   delete skip_;
     89   if (re != NULL) {
     90     skip_ = new RE(re);
     91     should_skip_ = true;
     92     skip_repeat_ = false;
     93     ConsumeSkip();
     94   } else {
     95     skip_ = NULL;
     96     should_skip_ = false;
     97     skip_repeat_ = false;
     98   }
     99 }
    100 
    101 void Scanner::DisableSkip() {
    102   assert(skip_ != NULL);
    103   should_skip_ = false;
    104 }
    105 
    106 void Scanner::EnableSkip() {
    107   assert(skip_ != NULL);
    108   should_skip_ = true;
    109   ConsumeSkip();
    110 }
    111 
    112 int Scanner::LineNumber() const {
    113   // TODO: Make it more efficient by keeping track of the last point
    114   // where we computed line numbers and counting newlines since then.
    115   // We could use std:count, but not all systems have it. :-(
    116   int count = 1;
    117   for (const char* p = data_.data(); p < input_.data(); ++p)
    118     if (*p == '\n')
    119       ++count;
    120   return count;
    121 }
    122 
    123 int Scanner::Offset() const {
    124   return (int)(input_.data() - data_.c_str());
    125 }
    126 
    127 bool Scanner::LookingAt(const RE& re) const {
    128   int consumed;
    129   return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
    130 }
    131 
    132 
    133 bool Scanner::Consume(const RE& re,
    134                       const Arg& arg0,
    135                       const Arg& arg1,
    136                       const Arg& arg2) {
    137   const bool result = re.Consume(&input_, arg0, arg1, arg2);
    138   if (result && should_skip_) ConsumeSkip();
    139   return result;
    140 }
    141 
    142 // helper function to consume *skip_ and honour save_comments_
    143 void Scanner::ConsumeSkip() {
    144   const char* start_data = input_.data();
    145   while (skip_->Consume(&input_)) {
    146     if (!skip_repeat_) {
    147       // Only one skip allowed.
    148       break;
    149     }
    150   }
    151   if (save_comments_) {
    152     if (comments_ == NULL) {
    153       comments_ = new vector<StringPiece>;
    154     }
    155     // already pointing one past end, so no need to +1
    156     int length = (int)(input_.data() - start_data);
    157     if (length > 0) {
    158       comments_->push_back(StringPiece(start_data, length));
    159     }
    160   }
    161 }
    162 
    163 
    164 void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
    165   // short circuit out if we've not yet initialized comments_
    166   // (e.g., when save_comments is false)
    167   if (!comments_) {
    168     return;
    169   }
    170   // TODO: if we guarantee that comments_ will contain StringPieces
    171   // that are ordered by their start, then we can do a binary search
    172   // for the first StringPiece at or past start and then scan for the
    173   // ones contained in the range, quit early (use equal_range or
    174   // lower_bound)
    175   for (vector<StringPiece>::const_iterator it = comments_->begin();
    176        it != comments_->end(); ++it) {
    177     if ((it->data() >= data_.c_str() + start &&
    178          it->data() + it->size() <= data_.c_str() + end)) {
    179       ranges->push_back(*it);
    180     }
    181   }
    182 }
    183 
    184 
    185 void Scanner::GetNextComments(vector<StringPiece> *ranges) {
    186   // short circuit out if we've not yet initialized comments_
    187   // (e.g., when save_comments is false)
    188   if (!comments_) {
    189     return;
    190   }
    191   for (vector<StringPiece>::const_iterator it =
    192          comments_->begin() + comments_offset_;
    193        it != comments_->end(); ++it) {
    194     ranges->push_back(*it);
    195     ++comments_offset_;
    196   }
    197 }
    198 
    199 }   // namespace pcrecpp
    200