1 // Copyright (c) 2005, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 // 30 // Author: Sanjay Ghemawat 31 32 #ifdef HAVE_CONFIG_H 33 #include "config.h" 34 #endif 35 36 #include <vector> 37 #include <assert.h> 38 39 #include "pcrecpp_internal.h" 40 #include "pcre_scanner.h" 41 42 using std::vector; 43 44 namespace pcrecpp { 45 46 Scanner::Scanner() 47 : data_(), 48 input_(data_), 49 skip_(NULL), 50 should_skip_(false), 51 skip_repeat_(false), 52 save_comments_(false), 53 comments_(NULL), 54 comments_offset_(0) { 55 } 56 57 Scanner::Scanner(const string& in) 58 : data_(in), 59 input_(data_), 60 skip_(NULL), 61 should_skip_(false), 62 skip_repeat_(false), 63 save_comments_(false), 64 comments_(NULL), 65 comments_offset_(0) { 66 } 67 68 Scanner::~Scanner() { 69 delete skip_; 70 delete comments_; 71 } 72 73 void Scanner::SetSkipExpression(const char* re) { 74 delete skip_; 75 if (re != NULL) { 76 skip_ = new RE(re); 77 should_skip_ = true; 78 skip_repeat_ = true; 79 ConsumeSkip(); 80 } else { 81 skip_ = NULL; 82 should_skip_ = false; 83 skip_repeat_ = false; 84 } 85 } 86 87 void Scanner::Skip(const char* re) { 88 delete skip_; 89 if (re != NULL) { 90 skip_ = new RE(re); 91 should_skip_ = true; 92 skip_repeat_ = false; 93 ConsumeSkip(); 94 } else { 95 skip_ = NULL; 96 should_skip_ = false; 97 skip_repeat_ = false; 98 } 99 } 100 101 void Scanner::DisableSkip() { 102 assert(skip_ != NULL); 103 should_skip_ = false; 104 } 105 106 void Scanner::EnableSkip() { 107 assert(skip_ != NULL); 108 should_skip_ = true; 109 ConsumeSkip(); 110 } 111 112 int Scanner::LineNumber() const { 113 // TODO: Make it more efficient by keeping track of the last point 114 // where we computed line numbers and counting newlines since then. 115 // We could use std:count, but not all systems have it. :-( 116 int count = 1; 117 for (const char* p = data_.data(); p < input_.data(); ++p) 118 if (*p == '\n') 119 ++count; 120 return count; 121 } 122 123 int Scanner::Offset() const { 124 return (int)(input_.data() - data_.c_str()); 125 } 126 127 bool Scanner::LookingAt(const RE& re) const { 128 int consumed; 129 return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); 130 } 131 132 133 bool Scanner::Consume(const RE& re, 134 const Arg& arg0, 135 const Arg& arg1, 136 const Arg& arg2) { 137 const bool result = re.Consume(&input_, arg0, arg1, arg2); 138 if (result && should_skip_) ConsumeSkip(); 139 return result; 140 } 141 142 // helper function to consume *skip_ and honour save_comments_ 143 void Scanner::ConsumeSkip() { 144 const char* start_data = input_.data(); 145 while (skip_->Consume(&input_)) { 146 if (!skip_repeat_) { 147 // Only one skip allowed. 148 break; 149 } 150 } 151 if (save_comments_) { 152 if (comments_ == NULL) { 153 comments_ = new vector<StringPiece>; 154 } 155 // already pointing one past end, so no need to +1 156 int length = (int)(input_.data() - start_data); 157 if (length > 0) { 158 comments_->push_back(StringPiece(start_data, length)); 159 } 160 } 161 } 162 163 164 void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { 165 // short circuit out if we've not yet initialized comments_ 166 // (e.g., when save_comments is false) 167 if (!comments_) { 168 return; 169 } 170 // TODO: if we guarantee that comments_ will contain StringPieces 171 // that are ordered by their start, then we can do a binary search 172 // for the first StringPiece at or past start and then scan for the 173 // ones contained in the range, quit early (use equal_range or 174 // lower_bound) 175 for (vector<StringPiece>::const_iterator it = comments_->begin(); 176 it != comments_->end(); ++it) { 177 if ((it->data() >= data_.c_str() + start && 178 it->data() + it->size() <= data_.c_str() + end)) { 179 ranges->push_back(*it); 180 } 181 } 182 } 183 184 185 void Scanner::GetNextComments(vector<StringPiece> *ranges) { 186 // short circuit out if we've not yet initialized comments_ 187 // (e.g., when save_comments is false) 188 if (!comments_) { 189 return; 190 } 191 for (vector<StringPiece>::const_iterator it = 192 comments_->begin() + comments_offset_; 193 it != comments_->end(); ++it) { 194 ranges->push_back(*it); 195 ++comments_offset_; 196 } 197 } 198 199 } // namespace pcrecpp 200