Home | History | Annotate | Download | only in profile_reset
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/tools/profile_reset/jtl_parser.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/logging.h"
     10 #include "third_party/re2/re2/re2.h"
     11 
     12 namespace {
     13 
     14 // RegEx that matches the first line of a text. Will throw away any potential
     15 // double-slash-introduced comments and the potential trailing EOL character.
     16 // Note: will fail in case the first line contains an unmatched double-quote
     17 // outside of comments.
     18 const char kSingleLineWithMaybeCommentsRE[] =
     19     // Non-greedily match and capture sequences of 1.) string literals inside
     20     // correctly matched double-quotes, or 2.) any other character.
     21     "^((?:\"[^\"\\n]*\"|[^\"\\n])*?)"
     22     // Greedily match and throw away the potential comment.
     23     "(?://.*)?"
     24     // Match and throw away EOL, or match end-of-string.
     25     "(?:\n|$)";
     26 
     27 // RegEx to match either a double-quote-enclosed string literal or a whitespace.
     28 // Applied repeatedly and without overlapping, can be used to remove whitespace
     29 // outside of string literals.
     30 const char kRemoveWhitespaceRE[] = "(\"[^\"]*\")|\\s";
     31 
     32 // The substitution pattern to use together with the above when replacing. As
     33 // the whitespace is not back-referenced here, it will get removed.
     34 const char kRemoveWhitespaceRewrite[] = "\\1";
     35 
     36 // Separator to terminate a sentence.
     37 const char kEndOfSentenceSeparator[] = ";";
     38 
     39 // The 'true' Boolean keyword.
     40 const char kTrueKeyword[] = "true";
     41 
     42 // RegEx that matches and captures one argument, which is either a double-quote
     43 // enclosed string, or a Boolean value. Will throw away a trailing comma.
     44 const char kSingleArgumentRE[] = "(?:(?:\"([^\"]*)\"|(true|false))(?:,|$))";
     45 
     46 // RegEx-es that, when concatenated, will match a single operation, and capture
     47 // the: operation name, the optional arguments, and the separator that follows.
     48 const char kOperationNameRE[] = "([[:word:]]+)";
     49 const char kMaybeArgumentListRE[] =
     50     "(?:\\("                    // Opening parenthesis.
     51     "((?:\"[^\"]*\"|[^\")])*)"  // Capture: anything inside, quote-aware.
     52     "\\))?";                    // Closing parenthesis + everything optional.
     53 const char kOperationSeparatorRE[] = "(;|\\.)";
     54 
     55 }  // namespace
     56 
     57 struct JtlParser::ParsingState {
     58   explicit ParsingState(const re2::StringPiece& compacted_source)
     59       : single_operation_regex(std::string(kOperationNameRE) +
     60                                kMaybeArgumentListRE +
     61                                kOperationSeparatorRE),
     62         single_argument_regex(kSingleArgumentRE),
     63         remaining_compacted_source(compacted_source),
     64         last_line_number(0) {}
     65 
     66   RE2 single_operation_regex;
     67   RE2 single_argument_regex;
     68   re2::StringPiece remaining_compacted_source;
     69   re2::StringPiece last_context;
     70   size_t last_line_number;
     71 };
     72 
     73 JtlParser::JtlParser(const std::string& compacted_source_code,
     74                      const std::vector<size_t>& newline_indices)
     75     : compacted_source_(compacted_source_code),
     76       newline_indices_(newline_indices) {
     77   state_.reset(new ParsingState(compacted_source_));
     78 }
     79 
     80 JtlParser::~JtlParser() {}
     81 
     82 // static
     83 bool JtlParser::RemoveCommentsAndAllWhitespace(
     84     const std::string& verbose_text,
     85     std::string* compacted_text,
     86     std::vector<size_t>* newline_indices,
     87     size_t* error_line_number) {
     88   DCHECK(compacted_text);
     89   DCHECK(newline_indices);
     90   std::string line;
     91   RE2 single_line_regex(kSingleLineWithMaybeCommentsRE);
     92   RE2 remove_whitespace_regex(kRemoveWhitespaceRE);
     93   re2::StringPiece verbose_text_piece(verbose_text);
     94   compacted_text->clear();
     95   newline_indices->clear();
     96   while (!verbose_text_piece.empty()) {
     97     if (!RE2::Consume(&verbose_text_piece, single_line_regex, &line)) {
     98       if (error_line_number)
     99         *error_line_number = newline_indices->size();
    100       return false;
    101     }
    102     RE2::GlobalReplace(
    103         &line, remove_whitespace_regex, kRemoveWhitespaceRewrite);
    104     *compacted_text += line;
    105     newline_indices->push_back(compacted_text->size());
    106   }
    107   return true;
    108 }
    109 
    110 bool JtlParser::HasFinished() {
    111   return state_->remaining_compacted_source.empty();
    112 }
    113 
    114 bool JtlParser::ParseNextOperation(std::string* name,
    115                                    base::ListValue* argument_list,
    116                                    bool* ends_sentence) {
    117   DCHECK(name);
    118   DCHECK(argument_list);
    119   DCHECK(ends_sentence);
    120 
    121   state_->last_context = state_->remaining_compacted_source;
    122   state_->last_line_number = GetOriginalLineNumber(
    123       compacted_source_.size() - state_->remaining_compacted_source.length());
    124 
    125   std::string arguments, separator;
    126   if (!RE2::Consume(&state_->remaining_compacted_source,
    127                     state_->single_operation_regex,
    128                     name,
    129                     &arguments,
    130                     &separator))
    131     return false;
    132 
    133   *ends_sentence = (separator == kEndOfSentenceSeparator);
    134   state_->last_context.remove_suffix(state_->remaining_compacted_source.size());
    135 
    136   re2::StringPiece arguments_piece(arguments);
    137   std::string string_value, boolean_value;
    138   while (!arguments_piece.empty()) {
    139     if (!RE2::Consume(&arguments_piece,
    140                       state_->single_argument_regex,
    141                       &string_value,
    142                       &boolean_value))
    143       return false;
    144 
    145     if (!boolean_value.empty()) {
    146       argument_list->Append(
    147           new base::FundamentalValue(boolean_value == kTrueKeyword));
    148     } else {
    149       // |string_value| might be empty for an empty string
    150       argument_list->Append(new StringValue(string_value));
    151     }
    152   }
    153   return true;
    154 }
    155 
    156 size_t JtlParser::GetOriginalLineNumber(size_t compacted_index) const {
    157   return static_cast<size_t>(std::upper_bound(newline_indices_.begin(),
    158                                               newline_indices_.end(),
    159                                               compacted_index) -
    160                              newline_indices_.begin());
    161 }
    162 
    163 size_t JtlParser::GetLastLineNumber() const { return state_->last_line_number; }
    164 
    165 std::string JtlParser::GetLastContext() const {
    166   return state_->last_context.ToString();
    167 }
    168