1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/tools/profile_reset/jtl_parser.h" 6 7 #include <algorithm> 8 9 #include "base/logging.h" 10 #include "third_party/re2/re2/re2.h" 11 12 namespace { 13 14 // RegEx that matches the first line of a text. Will throw away any potential 15 // double-slash-introduced comments and the potential trailing EOL character. 16 // Note: will fail in case the first line contains an unmatched double-quote 17 // outside of comments. 18 const char kSingleLineWithMaybeCommentsRE[] = 19 // Non-greedily match and capture sequences of 1.) string literals inside 20 // correctly matched double-quotes, or 2.) any other character. 21 "^((?:\"[^\"\\n]*\"|[^\"\\n])*?)" 22 // Greedily match and throw away the potential comment. 23 "(?://.*)?" 24 // Match and throw away EOL, or match end-of-string. 25 "(?:\n|$)"; 26 27 // RegEx to match either a double-quote-enclosed string literal or a whitespace. 28 // Applied repeatedly and without overlapping, can be used to remove whitespace 29 // outside of string literals. 30 const char kRemoveWhitespaceRE[] = "(\"[^\"]*\")|\\s"; 31 32 // The substitution pattern to use together with the above when replacing. As 33 // the whitespace is not back-referenced here, it will get removed. 34 const char kRemoveWhitespaceRewrite[] = "\\1"; 35 36 // Separator to terminate a sentence. 37 const char kEndOfSentenceSeparator[] = ";"; 38 39 // The 'true' Boolean keyword. 40 const char kTrueKeyword[] = "true"; 41 42 // RegEx that matches and captures one argument, which is either a double-quote 43 // enclosed string, or a Boolean value. Will throw away a trailing comma. 44 const char kSingleArgumentRE[] = "(?:(?:\"([^\"]*)\"|(true|false))(?:,|$))"; 45 46 // RegEx-es that, when concatenated, will match a single operation, and capture 47 // the: operation name, the optional arguments, and the separator that follows. 48 const char kOperationNameRE[] = "([[:word:]]+)"; 49 const char kMaybeArgumentListRE[] = 50 "(?:\\(" // Opening parenthesis. 51 "((?:\"[^\"]*\"|[^\")])*)" // Capture: anything inside, quote-aware. 52 "\\))?"; // Closing parenthesis + everything optional. 53 const char kOperationSeparatorRE[] = "(;|\\.)"; 54 55 } // namespace 56 57 struct JtlParser::ParsingState { 58 explicit ParsingState(const re2::StringPiece& compacted_source) 59 : single_operation_regex(std::string(kOperationNameRE) + 60 kMaybeArgumentListRE + 61 kOperationSeparatorRE), 62 single_argument_regex(kSingleArgumentRE), 63 remaining_compacted_source(compacted_source), 64 last_line_number(0) {} 65 66 RE2 single_operation_regex; 67 RE2 single_argument_regex; 68 re2::StringPiece remaining_compacted_source; 69 re2::StringPiece last_context; 70 size_t last_line_number; 71 }; 72 73 JtlParser::JtlParser(const std::string& compacted_source_code, 74 const std::vector<size_t>& newline_indices) 75 : compacted_source_(compacted_source_code), 76 newline_indices_(newline_indices) { 77 state_.reset(new ParsingState(compacted_source_)); 78 } 79 80 JtlParser::~JtlParser() {} 81 82 // static 83 bool JtlParser::RemoveCommentsAndAllWhitespace( 84 const std::string& verbose_text, 85 std::string* compacted_text, 86 std::vector<size_t>* newline_indices, 87 size_t* error_line_number) { 88 DCHECK(compacted_text); 89 DCHECK(newline_indices); 90 std::string line; 91 RE2 single_line_regex(kSingleLineWithMaybeCommentsRE); 92 RE2 remove_whitespace_regex(kRemoveWhitespaceRE); 93 re2::StringPiece verbose_text_piece(verbose_text); 94 compacted_text->clear(); 95 newline_indices->clear(); 96 while (!verbose_text_piece.empty()) { 97 if (!RE2::Consume(&verbose_text_piece, single_line_regex, &line)) { 98 if (error_line_number) 99 *error_line_number = newline_indices->size(); 100 return false; 101 } 102 RE2::GlobalReplace( 103 &line, remove_whitespace_regex, kRemoveWhitespaceRewrite); 104 *compacted_text += line; 105 newline_indices->push_back(compacted_text->size()); 106 } 107 return true; 108 } 109 110 bool JtlParser::HasFinished() { 111 return state_->remaining_compacted_source.empty(); 112 } 113 114 bool JtlParser::ParseNextOperation(std::string* name, 115 base::ListValue* argument_list, 116 bool* ends_sentence) { 117 DCHECK(name); 118 DCHECK(argument_list); 119 DCHECK(ends_sentence); 120 121 state_->last_context = state_->remaining_compacted_source; 122 state_->last_line_number = GetOriginalLineNumber( 123 compacted_source_.size() - state_->remaining_compacted_source.length()); 124 125 std::string arguments, separator; 126 if (!RE2::Consume(&state_->remaining_compacted_source, 127 state_->single_operation_regex, 128 name, 129 &arguments, 130 &separator)) 131 return false; 132 133 *ends_sentence = (separator == kEndOfSentenceSeparator); 134 state_->last_context.remove_suffix(state_->remaining_compacted_source.size()); 135 136 re2::StringPiece arguments_piece(arguments); 137 std::string string_value, boolean_value; 138 while (!arguments_piece.empty()) { 139 if (!RE2::Consume(&arguments_piece, 140 state_->single_argument_regex, 141 &string_value, 142 &boolean_value)) 143 return false; 144 145 if (!boolean_value.empty()) { 146 argument_list->Append( 147 new base::FundamentalValue(boolean_value == kTrueKeyword)); 148 } else { 149 // |string_value| might be empty for an empty string 150 argument_list->Append(new StringValue(string_value)); 151 } 152 } 153 return true; 154 } 155 156 size_t JtlParser::GetOriginalLineNumber(size_t compacted_index) const { 157 return static_cast<size_t>(std::upper_bound(newline_indices_.begin(), 158 newline_indices_.end(), 159 compacted_index) - 160 newline_indices_.begin()); 161 } 162 163 size_t JtlParser::GetLastLineNumber() const { return state_->last_line_number; } 164 165 std::string JtlParser::GetLastContext() const { 166 return state_->last_context.ToString(); 167 } 168