Home | History | Annotate | Download | only in pcrecpp
      1 // Copyright (c) 2005, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 //
     30 // Author: Sanjay Ghemawat
     31 
     32 #include <vector>
     33 #include <assert.h>
     34 
     35 #include "pcrecpp_internal.h"
     36 #include "pcre_scanner.h"
     37 
     38 using std::vector;
     39 
     40 namespace pcrecpp {
     41 
     42 Scanner::Scanner()
     43   : data_(),
     44     input_(data_),
     45     skip_(NULL),
     46     should_skip_(false),
     47     skip_repeat_(false),
     48     save_comments_(false),
     49     comments_(NULL),
     50     comments_offset_(0) {
     51 }
     52 
     53 Scanner::Scanner(const string& in)
     54   : data_(in),
     55     input_(data_),
     56     skip_(NULL),
     57     should_skip_(false),
     58     skip_repeat_(false),
     59     save_comments_(false),
     60     comments_(NULL),
     61     comments_offset_(0) {
     62 }
     63 
     64 Scanner::~Scanner() {
     65   delete skip_;
     66   delete comments_;
     67 }
     68 
     69 void Scanner::SetSkipExpression(const char* re) {
     70   delete skip_;
     71   if (re != NULL) {
     72     skip_ = new RE(re);
     73     should_skip_ = true;
     74     skip_repeat_ = true;
     75     ConsumeSkip();
     76   } else {
     77     skip_ = NULL;
     78     should_skip_ = false;
     79     skip_repeat_ = false;
     80   }
     81 }
     82 
     83 void Scanner::Skip(const char* re) {
     84   delete skip_;
     85   if (re != NULL) {
     86     skip_ = new RE(re);
     87     should_skip_ = true;
     88     skip_repeat_ = false;
     89     ConsumeSkip();
     90   } else {
     91     skip_ = NULL;
     92     should_skip_ = false;
     93     skip_repeat_ = false;
     94   }
     95 }
     96 
     97 void Scanner::DisableSkip() {
     98   assert(skip_ != NULL);
     99   should_skip_ = false;
    100 }
    101 
    102 void Scanner::EnableSkip() {
    103   assert(skip_ != NULL);
    104   should_skip_ = true;
    105   ConsumeSkip();
    106 }
    107 
    108 int Scanner::LineNumber() const {
    109   // TODO: Make it more efficient by keeping track of the last point
    110   // where we computed line numbers and counting newlines since then.
    111   // We could use std:count, but not all systems have it. :-(
    112   int count = 1;
    113   for (const char* p = data_.data(); p < input_.data(); ++p)
    114     if (*p == '\n')
    115       ++count;
    116   return count;
    117 }
    118 
    119 int Scanner::Offset() const {
    120   return (int)(input_.data() - data_.c_str());
    121 }
    122 
    123 bool Scanner::LookingAt(const RE& re) const {
    124   int consumed;
    125   return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
    126 }
    127 
    128 
    129 // helper function to consume *skip_ and honour save_comments_
    130 void Scanner::ConsumeSkip() {
    131   const char* start_data = input_.data();
    132   while (skip_->Consume(&input_)) {
    133     if (!skip_repeat_) {
    134       // Only one skip allowed.
    135       break;
    136     }
    137   }
    138   if (save_comments_) {
    139     if (comments_ == NULL) {
    140       comments_ = new vector<StringPiece>;
    141     }
    142     // already pointing one past end, so no need to +1
    143     int length = (int)(input_.data() - start_data);
    144     if (length > 0) {
    145       comments_->push_back(StringPiece(start_data, length));
    146     }
    147   }
    148 }
    149 
    150 
    151 void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
    152   // short circuit out if we've not yet initialized comments_
    153   // (e.g., when save_comments is false)
    154   if (!comments_) {
    155     return;
    156   }
    157   // TODO: if we guarantee that comments_ will contain StringPieces
    158   // that are ordered by their start, then we can do a binary search
    159   // for the first StringPiece at or past start and then scan for the
    160   // ones contained in the range, quit early (use equal_range or
    161   // lower_bound)
    162   for (vector<StringPiece>::const_iterator it = comments_->begin();
    163        it != comments_->end(); ++it) {
    164     if ((it->data() >= data_.c_str() + start &&
    165          it->data() + it->size() <= data_.c_str() + end)) {
    166       ranges->push_back(*it);
    167     }
    168   }
    169 }
    170 
    171 
    172 void Scanner::GetNextComments(vector<StringPiece> *ranges) {
    173   // short circuit out if we've not yet initialized comments_
    174   // (e.g., when save_comments is false)
    175   if (!comments_) {
    176     return;
    177   }
    178   for (vector<StringPiece>::const_iterator it =
    179          comments_->begin() + comments_offset_;
    180        it != comments_->end(); ++it) {
    181     ranges->push_back(*it);
    182     ++comments_offset_;
    183   }
    184 }
    185 
    186 }   // namespace pcrecpp
    187