1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <ctype.h> 18 #include <set> 19 #include <stack> 20 #include <string> 21 22 #include "src/perfetto_cmd/pbtxt_to_pb.h" 23 24 #include "google/protobuf/io/zero_copy_stream_impl_lite.h" 25 26 #include "perfetto/base/file_utils.h" 27 #include "perfetto/base/logging.h" 28 #include "perfetto/base/string_view.h" 29 #include "perfetto/base/utils.h" 30 #include "perfetto/common/descriptor.pb.h" 31 #include "perfetto/protozero/message.h" 32 #include "perfetto/protozero/message_handle.h" 33 #include "perfetto/protozero/scattered_heap_buffer.h" 34 #include "src/perfetto_cmd/perfetto_config.descriptor.h" 35 36 namespace perfetto { 37 constexpr char kConfigProtoName[] = ".perfetto.protos.TraceConfig"; 38 39 using protos::DescriptorProto; 40 using protos::EnumDescriptorProto; 41 using protos::EnumValueDescriptorProto; 42 using protos::FieldDescriptorProto; 43 using protos::FileDescriptorSet; 44 using ::google::protobuf::io::ZeroCopyInputStream; 45 using ::google::protobuf::io::ArrayInputStream; 46 47 namespace { 48 49 constexpr bool IsIdentifierStart(char c) { 50 return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_'; 51 } 52 53 constexpr bool IsIdentifierBody(char c) { 54 return IsIdentifierStart(c) || isdigit(c); 55 } 56 57 const char* FieldToTypeName(const FieldDescriptorProto* field) { 58 switch (field->type()) { 59 case FieldDescriptorProto::TYPE_UINT64: 60 return "uint64"; 61 case FieldDescriptorProto::TYPE_UINT32: 62 return "uint32"; 63 case FieldDescriptorProto::TYPE_INT64: 64 return "int64"; 65 case FieldDescriptorProto::TYPE_SINT64: 66 return "sint64"; 67 case FieldDescriptorProto::TYPE_INT32: 68 return "int32"; 69 case FieldDescriptorProto::TYPE_SINT32: 70 return "sint32"; 71 case FieldDescriptorProto::TYPE_FIXED64: 72 return "fixed64"; 73 case FieldDescriptorProto::TYPE_SFIXED64: 74 return "sfixed64"; 75 case FieldDescriptorProto::TYPE_FIXED32: 76 return "fixed32"; 77 case FieldDescriptorProto::TYPE_SFIXED32: 78 return "sfixed32"; 79 case FieldDescriptorProto::TYPE_DOUBLE: 80 return "double"; 81 case FieldDescriptorProto::TYPE_FLOAT: 82 return "float"; 83 case FieldDescriptorProto::TYPE_BOOL: 84 return "bool"; 85 case FieldDescriptorProto::TYPE_STRING: 86 return "string"; 87 case FieldDescriptorProto::TYPE_BYTES: 88 return "bytes"; 89 case FieldDescriptorProto::TYPE_GROUP: 90 return "group"; 91 case FieldDescriptorProto::TYPE_MESSAGE: 92 return "message"; 93 case FieldDescriptorProto::TYPE_ENUM: 94 return "enum"; 95 } 96 // For gcc 97 PERFETTO_FATAL("Non complete switch"); 98 } 99 100 std::string Format(const char* fmt, std::map<std::string, std::string> args) { 101 std::string result(fmt); 102 for (const auto& key_value : args) { 103 size_t start = result.find(key_value.first); 104 PERFETTO_CHECK(start != std::string::npos); 105 result.replace(start, key_value.first.size(), key_value.second); 106 PERFETTO_CHECK(result.find(key_value.first) == std::string::npos); 107 } 108 return result; 109 } 110 111 enum ParseState { 112 kWaitingForKey, 113 kReadingKey, 114 kWaitingForValue, 115 kReadingStringValue, 116 kReadingStringEscape, 117 kReadingNumericValue, 118 kReadingIdentifierValue, 119 }; 120 121 struct Token { 122 size_t offset; 123 size_t column; 124 size_t row; 125 base::StringView txt; 126 127 size_t size() const { return txt.size(); } 128 std::string ToStdString() const { return txt.ToStdString(); } 129 }; 130 131 struct ParserDelegateContext { 132 const DescriptorProto* descriptor; 133 protozero::Message* message; 134 std::set<std::string> seen_fields; 135 }; 136 137 class ParserDelegate { 138 public: 139 ParserDelegate( 140 const DescriptorProto* descriptor, 141 protozero::Message* message, 142 ErrorReporter* reporter, 143 std::map<std::string, const DescriptorProto*> name_to_descriptor, 144 std::map<std::string, const EnumDescriptorProto*> name_to_enum) 145 : reporter_(reporter), 146 name_to_descriptor_(std::move(name_to_descriptor)), 147 name_to_enum_(std::move(name_to_enum)) { 148 ctx_.push(ParserDelegateContext{descriptor, message, {}}); 149 } 150 151 void NumericField(Token key, Token value) { 152 const FieldDescriptorProto* field = FindFieldByName( 153 key, value, 154 { 155 FieldDescriptorProto::TYPE_UINT64, 156 FieldDescriptorProto::TYPE_UINT32, FieldDescriptorProto::TYPE_INT64, 157 FieldDescriptorProto::TYPE_SINT64, FieldDescriptorProto::TYPE_INT32, 158 FieldDescriptorProto::TYPE_SINT32, 159 FieldDescriptorProto::TYPE_FIXED64, 160 FieldDescriptorProto::TYPE_SFIXED64, 161 FieldDescriptorProto::TYPE_FIXED32, 162 FieldDescriptorProto::TYPE_SFIXED32, 163 FieldDescriptorProto::TYPE_DOUBLE, FieldDescriptorProto::TYPE_FLOAT, 164 }); 165 if (!field) 166 return; 167 const auto& field_type = field->type(); 168 switch (field_type) { 169 case FieldDescriptorProto::TYPE_UINT64: 170 return VarIntField<uint64_t>(field, value); 171 case FieldDescriptorProto::TYPE_UINT32: 172 return VarIntField<uint32_t>(field, value); 173 case FieldDescriptorProto::TYPE_INT64: 174 case FieldDescriptorProto::TYPE_SINT64: 175 return VarIntField<int64_t>(field, value); 176 case FieldDescriptorProto::TYPE_INT32: 177 case FieldDescriptorProto::TYPE_SINT32: 178 return VarIntField<int32_t>(field, value); 179 180 case FieldDescriptorProto::TYPE_FIXED64: 181 case FieldDescriptorProto::TYPE_SFIXED64: 182 return FixedField<int64_t>(field, value); 183 184 case FieldDescriptorProto::TYPE_FIXED32: 185 case FieldDescriptorProto::TYPE_SFIXED32: 186 return FixedField<int32_t>(field, value); 187 188 case FieldDescriptorProto::TYPE_DOUBLE: 189 return FixedFloatField<double>(field, value); 190 case FieldDescriptorProto::TYPE_FLOAT: 191 return FixedFloatField<float>(field, value); 192 193 case FieldDescriptorProto::TYPE_BOOL: 194 case FieldDescriptorProto::TYPE_STRING: 195 case FieldDescriptorProto::TYPE_BYTES: 196 case FieldDescriptorProto::TYPE_GROUP: 197 case FieldDescriptorProto::TYPE_MESSAGE: 198 case FieldDescriptorProto::TYPE_ENUM: 199 PERFETTO_FATAL("Invalid type"); 200 } 201 } 202 203 void StringField(Token key, Token value) { 204 const FieldDescriptorProto* field = FindFieldByName( 205 key, value, 206 { 207 FieldDescriptorProto::TYPE_STRING, FieldDescriptorProto::TYPE_BYTES, 208 }); 209 if (!field) 210 return; 211 uint32_t field_id = static_cast<uint32_t>(field->number()); 212 const auto& field_type = field->type(); 213 PERFETTO_CHECK(field_type == FieldDescriptorProto::TYPE_STRING || 214 field_type == FieldDescriptorProto::TYPE_BYTES); 215 216 std::unique_ptr<char, base::FreeDeleter> s( 217 static_cast<char*>(malloc(value.size()))); 218 size_t j = 0; 219 for (size_t i = 0; i < value.size(); i++) { 220 char c = value.txt.data()[i]; 221 if (c == '\\') { 222 if (i + 1 >= value.size()) { 223 // This should be caught by the lexer. 224 PERFETTO_FATAL("Escape at end of string."); 225 return; 226 } 227 char next = value.txt.data()[++i]; 228 switch (next) { 229 case '\\': 230 case '\'': 231 case '"': 232 case '?': 233 s.get()[j++] = next; 234 break; 235 case 'a': 236 s.get()[j++] = '\a'; 237 break; 238 case 'b': 239 s.get()[j++] = '\b'; 240 break; 241 case 'f': 242 s.get()[j++] = '\f'; 243 break; 244 case 'n': 245 s.get()[j++] = '\n'; 246 break; 247 case 'r': 248 s.get()[j++] = '\r'; 249 break; 250 case 't': 251 s.get()[j++] = '\t'; 252 break; 253 case 'v': 254 s.get()[j++] = '\v'; 255 break; 256 default: 257 AddError(value, 258 "Unknown string escape in $k in " 259 "proto $n: '$v'", 260 std::map<std::string, std::string>{ 261 {"$k", key.ToStdString()}, 262 {"$n", descriptor_name()}, 263 {"$v", value.ToStdString()}, 264 }); 265 return; 266 } 267 } else { 268 s.get()[j++] = c; 269 } 270 } 271 msg()->AppendBytes(field_id, s.get(), j); 272 } 273 274 void IdentifierField(Token key, Token value) { 275 const FieldDescriptorProto* field = FindFieldByName( 276 key, value, 277 { 278 FieldDescriptorProto::TYPE_BOOL, FieldDescriptorProto::TYPE_ENUM, 279 }); 280 if (!field) 281 return; 282 uint32_t field_id = static_cast<uint32_t>(field->number()); 283 const auto& field_type = field->type(); 284 if (field_type == FieldDescriptorProto::TYPE_BOOL) { 285 if (value.txt != "true" && value.txt != "false") { 286 AddError(value, 287 "Expected 'true' or 'false' for boolean field $k in " 288 "proto $n instead saw '$v'", 289 std::map<std::string, std::string>{ 290 {"$k", key.ToStdString()}, 291 {"$n", descriptor_name()}, 292 {"$v", value.ToStdString()}, 293 }); 294 return; 295 } 296 msg()->AppendTinyVarInt(field_id, value.txt == "true" ? 1 : 0); 297 } else if (field_type == FieldDescriptorProto::TYPE_ENUM) { 298 const std::string& type_name = field->type_name(); 299 const EnumDescriptorProto* enum_descriptor = name_to_enum_[type_name]; 300 PERFETTO_CHECK(enum_descriptor); 301 bool found_value = false; 302 int32_t enum_value_number = 0; 303 for (const EnumValueDescriptorProto& enum_value : 304 enum_descriptor->value()) { 305 if (value.ToStdString() != enum_value.name()) 306 continue; 307 found_value = true; 308 enum_value_number = enum_value.number(); 309 break; 310 } 311 PERFETTO_CHECK(found_value); 312 msg()->AppendVarInt<int32_t>(field_id, enum_value_number); 313 } else { 314 } 315 } 316 317 void BeginNestedMessage(Token key, Token value) { 318 const FieldDescriptorProto* field = 319 FindFieldByName(key, value, 320 { 321 FieldDescriptorProto::TYPE_MESSAGE, 322 }); 323 if (!field) 324 return; 325 uint32_t field_id = static_cast<uint32_t>(field->number()); 326 const std::string& type_name = field->type_name(); 327 const DescriptorProto* nested_descriptor = name_to_descriptor_[type_name]; 328 PERFETTO_CHECK(nested_descriptor); 329 auto* nested_msg = msg()->BeginNestedMessage<protozero::Message>(field_id); 330 ctx_.push(ParserDelegateContext{nested_descriptor, nested_msg, {}}); 331 } 332 333 void EndNestedMessage() { 334 msg()->Finalize(); 335 ctx_.pop(); 336 } 337 338 void Eof() {} 339 340 void AddError(size_t row, 341 size_t column, 342 const char* fmt, 343 const std::map<std::string, std::string>& args) { 344 reporter_->AddError(row, column, 0, Format(fmt, args)); 345 } 346 347 void AddError(Token token, 348 const char* fmt, 349 const std::map<std::string, std::string>& args) { 350 reporter_->AddError(token.row, token.column, token.size(), 351 Format(fmt, args)); 352 } 353 354 private: 355 template <typename T> 356 void VarIntField(const FieldDescriptorProto* field, Token t) { 357 uint32_t field_id = static_cast<uint32_t>(field->number()); 358 uint64_t n = 0; 359 PERFETTO_CHECK(ParseInteger(t.txt, &n)); 360 if (field->type() == FieldDescriptorProto::TYPE_SINT64 || 361 field->type() == FieldDescriptorProto::TYPE_SINT32) { 362 msg()->AppendSignedVarInt<T>(field_id, static_cast<T>(n)); 363 } else { 364 msg()->AppendVarInt<T>(field_id, static_cast<T>(n)); 365 } 366 } 367 368 template <typename T> 369 void FixedField(const FieldDescriptorProto* field, Token t) { 370 uint32_t field_id = static_cast<uint32_t>(field->number()); 371 uint64_t n = 0; 372 PERFETTO_CHECK(ParseInteger(t.txt, &n)); 373 msg()->AppendFixed<T>(field_id, static_cast<T>(n)); 374 } 375 376 template <typename T> 377 void FixedFloatField(const FieldDescriptorProto* field, Token t) { 378 uint32_t field_id = static_cast<uint32_t>(field->number()); 379 double n = std::stod(t.ToStdString()); 380 msg()->AppendFixed<T>(field_id, static_cast<T>(n)); 381 } 382 383 template <typename T> 384 bool ParseInteger(base::StringView s, T* number_ptr) { 385 uint64_t n = 0; 386 PERFETTO_CHECK(sscanf(s.ToStdString().c_str(), "%" PRIu64, &n) == 1); 387 PERFETTO_CHECK(n <= std::numeric_limits<T>::max()); 388 *number_ptr = static_cast<T>(n); 389 return true; 390 } 391 392 const FieldDescriptorProto* FindFieldByName( 393 Token key, 394 Token value, 395 std::set<FieldDescriptorProto::Type> valid_field_types) { 396 const std::string field_name = key.ToStdString(); 397 const FieldDescriptorProto* field_descriptor = nullptr; 398 for (const auto& f : descriptor()->field()) { 399 if (f.name() == field_name) { 400 field_descriptor = &f; 401 break; 402 } 403 } 404 405 if (!field_descriptor) { 406 AddError(key, "No field named \"$n\" in proto $p", 407 { 408 {"$n", field_name}, {"$p", descriptor_name()}, 409 }); 410 return nullptr; 411 } 412 413 bool is_repeated = 414 field_descriptor->label() == FieldDescriptorProto::LABEL_REPEATED; 415 auto it_and_inserted = ctx_.top().seen_fields.emplace(field_name); 416 if (!it_and_inserted.second && !is_repeated) { 417 AddError(key, "Saw non-repeating field '$f' more than once", 418 { 419 {"$f", field_name}, 420 }); 421 } 422 423 if (!valid_field_types.count(field_descriptor->type())) { 424 AddError(value, 425 "Expected value of type $t for field $k in proto $n " 426 "instead saw '$v'", 427 { 428 {"$t", FieldToTypeName(field_descriptor)}, 429 {"$k", field_name}, 430 {"$n", descriptor_name()}, 431 {"$v", value.ToStdString()}, 432 }); 433 return nullptr; 434 } 435 436 return field_descriptor; 437 } 438 439 const DescriptorProto* descriptor() { 440 PERFETTO_CHECK(!ctx_.empty()); 441 return ctx_.top().descriptor; 442 } 443 444 const std::string& descriptor_name() { return descriptor()->name(); } 445 446 protozero::Message* msg() { 447 PERFETTO_CHECK(!ctx_.empty()); 448 return ctx_.top().message; 449 } 450 451 std::stack<ParserDelegateContext> ctx_; 452 ErrorReporter* reporter_; 453 std::map<std::string, const DescriptorProto*> name_to_descriptor_; 454 std::map<std::string, const EnumDescriptorProto*> name_to_enum_; 455 }; 456 457 void Parse(const std::string& input, ParserDelegate* delegate) { 458 ParseState state = kWaitingForKey; 459 size_t column = 0; 460 size_t row = 1; 461 size_t depth = 0; 462 bool saw_colon_for_this_key = false; 463 bool saw_semicolon_for_this_value = true; 464 bool comment_till_eol = false; 465 Token key{}; 466 Token value{}; 467 468 for (size_t i = 0; i < input.size(); i++, column++) { 469 bool last_character = i + 1 == input.size(); 470 char c = input.at(i); 471 if (c == '\n') { 472 column = 0; 473 row++; 474 if (comment_till_eol) { 475 comment_till_eol = false; 476 continue; 477 } 478 } 479 if (comment_till_eol) 480 continue; 481 482 switch (state) { 483 case kWaitingForKey: 484 if (isspace(c)) 485 continue; 486 if (c == '#') { 487 comment_till_eol = true; 488 continue; 489 } 490 if (c == '}') { 491 if (depth == 0) { 492 delegate->AddError(row, column, "Unmatched closing brace", {}); 493 return; 494 } 495 saw_semicolon_for_this_value = false; 496 depth--; 497 delegate->EndNestedMessage(); 498 continue; 499 } 500 if (!saw_semicolon_for_this_value && c == ';') { 501 saw_semicolon_for_this_value = true; 502 continue; 503 } 504 if (IsIdentifierStart(c)) { 505 saw_colon_for_this_key = false; 506 state = kReadingKey; 507 key.offset = i; 508 key.row = row; 509 key.column = column; 510 continue; 511 } 512 break; 513 514 case kReadingKey: 515 if (IsIdentifierBody(c)) 516 continue; 517 key.txt = base::StringView(input.data() + key.offset, i - key.offset); 518 state = kWaitingForValue; 519 if (c == '#') 520 comment_till_eol = true; 521 continue; 522 523 case kWaitingForValue: 524 if (isspace(c)) 525 continue; 526 if (c == '#') { 527 comment_till_eol = true; 528 continue; 529 } 530 value.offset = i; 531 value.row = row; 532 value.column = column; 533 534 if (c == ':' && !saw_colon_for_this_key) { 535 saw_colon_for_this_key = true; 536 continue; 537 } 538 if (c == '"') { 539 state = kReadingStringValue; 540 continue; 541 } 542 if (c == '-' || isdigit(c)) { 543 state = kReadingNumericValue; 544 continue; 545 } 546 if (IsIdentifierStart(c)) { 547 state = kReadingIdentifierValue; 548 continue; 549 } 550 if (c == '{') { 551 state = kWaitingForKey; 552 depth++; 553 value.txt = base::StringView(input.data() + value.offset, 1); 554 delegate->BeginNestedMessage(key, value); 555 continue; 556 } 557 break; 558 559 case kReadingNumericValue: 560 if (isspace(c) || c == ';' || last_character) { 561 size_t size = i - value.offset + (last_character ? 1 : 0); 562 value.txt = base::StringView(input.data() + value.offset, size); 563 saw_semicolon_for_this_value = c == ';'; 564 state = kWaitingForKey; 565 delegate->NumericField(key, value); 566 continue; 567 } 568 if (isdigit(c)) 569 continue; 570 break; 571 572 case kReadingStringValue: 573 if (c == '\\') { 574 state = kReadingStringEscape; 575 } else if (c == '"') { 576 size_t size = i - value.offset - 1; 577 value.column++; 578 value.txt = base::StringView(input.data() + value.offset + 1, size); 579 saw_semicolon_for_this_value = false; 580 state = kWaitingForKey; 581 delegate->StringField(key, value); 582 } 583 continue; 584 585 case kReadingStringEscape: 586 state = kReadingStringValue; 587 continue; 588 589 case kReadingIdentifierValue: 590 if (isspace(c) || c == ';' || c == '#' || last_character) { 591 size_t size = i - value.offset + (last_character ? 1 : 0); 592 value.txt = base::StringView(input.data() + value.offset, size); 593 comment_till_eol = c == '#'; 594 saw_semicolon_for_this_value = c == ';'; 595 state = kWaitingForKey; 596 delegate->IdentifierField(key, value); 597 continue; 598 } 599 if (IsIdentifierBody(c)) { 600 continue; 601 } 602 break; 603 } 604 PERFETTO_FATAL("Unexpected char %c", c); 605 } // for 606 if (depth > 0) 607 delegate->AddError(row, column, "Nested message not closed", {}); 608 if (state != kWaitingForKey) 609 delegate->AddError(row, column, "Unexpected end of input", {}); 610 delegate->Eof(); 611 } 612 613 void AddNestedDescriptors( 614 const std::string& prefix, 615 const DescriptorProto* descriptor, 616 std::map<std::string, const DescriptorProto*>* name_to_descriptor, 617 std::map<std::string, const EnumDescriptorProto*>* name_to_enum) { 618 for (const EnumDescriptorProto& enum_descriptor : descriptor->enum_type()) { 619 const std::string name = prefix + "." + enum_descriptor.name(); 620 (*name_to_enum)[name] = &enum_descriptor; 621 } 622 for (const DescriptorProto& nested_descriptor : descriptor->nested_type()) { 623 const std::string name = prefix + "." + nested_descriptor.name(); 624 (*name_to_descriptor)[name] = &nested_descriptor; 625 AddNestedDescriptors(name, &nested_descriptor, name_to_descriptor, 626 name_to_enum); 627 } 628 } 629 630 } // namespace 631 632 ErrorReporter::ErrorReporter() = default; 633 ErrorReporter::~ErrorReporter() = default; 634 635 std::vector<uint8_t> PbtxtToPb(const std::string& input, 636 ErrorReporter* reporter) { 637 std::map<std::string, const DescriptorProto*> name_to_descriptor; 638 std::map<std::string, const EnumDescriptorProto*> name_to_enum; 639 FileDescriptorSet file_descriptor_set; 640 641 { 642 file_descriptor_set.ParseFromArray( 643 kPerfettoConfigDescriptor.data(), 644 static_cast<int>(kPerfettoConfigDescriptor.size())); 645 for (const auto& file_descriptor : file_descriptor_set.file()) { 646 for (const auto& enum_descriptor : file_descriptor.enum_type()) { 647 const std::string name = 648 "." + file_descriptor.package() + "." + enum_descriptor.name(); 649 name_to_enum[name] = &enum_descriptor; 650 } 651 for (const auto& descriptor : file_descriptor.message_type()) { 652 const std::string name = 653 "." + file_descriptor.package() + "." + descriptor.name(); 654 name_to_descriptor[name] = &descriptor; 655 AddNestedDescriptors(name, &descriptor, &name_to_descriptor, 656 &name_to_enum); 657 } 658 } 659 } 660 661 const DescriptorProto* descriptor = name_to_descriptor[kConfigProtoName]; 662 PERFETTO_CHECK(descriptor); 663 664 protozero::HeapBuffered<protozero::Message> message; 665 ParserDelegate delegate(descriptor, message.get(), reporter, 666 std::move(name_to_descriptor), 667 std::move(name_to_enum)); 668 Parse(input, &delegate); 669 return message.SerializeAsArray(); 670 } 671 672 } // namespace perfetto 673