Home | History | Annotate | Download | only in parser
      1 // Copyright (C) 2015 The Android Open Source Project
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "tokenizer.h"
     16 
     17 namespace init {
     18 
     19 Tokenizer::Tokenizer(const std::string& data)
     20     : data_(data), eof_(false), pos_(0), tok_start_(0) {
     21   current_.type = TOK_START;
     22 
     23   if (data.size() > 0) {
     24     cur_char_ = data[0];
     25   } else {
     26     eof_ = true;
     27     cur_char_ = '\0';
     28   }
     29 }
     30 
     31 Tokenizer::~Tokenizer() {}
     32 
     33 const Tokenizer::Token& Tokenizer::current() {
     34   return current_;
     35 }
     36 
     37 bool Tokenizer::Next() {
     38   while (!eof_) {
     39     AdvWhiteSpace();
     40 
     41     // Check for comments.
     42     if (cur_char_ == '#') {
     43       AdvChar();
     44       // Skip rest of line
     45       while (!eof_ && cur_char_ != '\n') {
     46         AdvChar();
     47       }
     48     }
     49 
     50     if (eof_) {
     51       break;
     52     }
     53 
     54     if (cur_char_ == '\0') {
     55       AdvChar();
     56     } else if (cur_char_ == '\n') {
     57       current_.type = TOK_NEWLINE;
     58       current_.text.clear();
     59       AdvChar();
     60       return true;
     61     } else if (cur_char_ == '\\') {
     62       AdvChar();  // skip backslash
     63       // This is line continuation so
     64       // do not generated TOK_NEWLINE at
     65       // the next \n.
     66       AdvUntil('\n');
     67       AdvChar();  // skip \n
     68     } else if (cur_char_ == '\"') {
     69       AdvChar();
     70       StartText();
     71       // Grab everything until the next quote.
     72       AdvUntil('\"');
     73       EndText();
     74       AdvChar();  // skip quote.
     75       return true;
     76     } else {
     77       StartText();
     78       AdvText();
     79       EndText();
     80       return true;
     81     }
     82   }
     83   current_.type = TOK_END;
     84   current_.text.clear();
     85   return false;
     86 }
     87 
     88 void Tokenizer::AdvChar() {
     89   pos_++;
     90   if (pos_ < data_.size()) {
     91     cur_char_ = data_[pos_];
     92   } else {
     93     eof_ = true;
     94     cur_char_ = '\0';
     95   }
     96 }
     97 
     98 void Tokenizer::AdvWhiteSpace() {
     99   while (cur_char_ == '\t' || cur_char_ == '\r' || cur_char_ == ' ') {
    100     AdvChar();
    101   }
    102 }
    103 
    104 void Tokenizer::AdvUntil(char x) {
    105   while (!eof_ && cur_char_ != x) {
    106     AdvChar();
    107   }
    108 }
    109 
    110 void Tokenizer::AdvText() {
    111   while (cur_char_ != '\t' && cur_char_ != '\r' && cur_char_ != '\0' &&
    112          cur_char_ != ' ' && cur_char_ != '\n' && cur_char_ != '#') {
    113     AdvChar();
    114   }
    115 }
    116 
    117 void Tokenizer::StartText() {
    118   current_.text.clear();
    119   tok_start_ = pos_;
    120   current_.type = TOK_TEXT;
    121 }
    122 
    123 void Tokenizer::EndText() {
    124   if (pos_ != tok_start_) {
    125     current_.text.append(data_, tok_start_, pos_ - tok_start_);
    126   }
    127 }
    128 
    129 }  // namespace init