1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: jschorr (at) google.com (Joseph Schorr) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // This file defines static methods and classes for comparing Protocol 36 // Messages (see //google/protobuf/util/message_differencer.h for more 37 // information). 38 39 #include <google/protobuf/util/message_differencer.h> 40 41 #include <algorithm> 42 #include <memory> 43 #ifndef _SHARED_PTR_H 44 #include <google/protobuf/stubs/shared_ptr.h> 45 #endif 46 #include <utility> 47 48 #include <google/protobuf/stubs/callback.h> 49 #include <google/protobuf/stubs/common.h> 50 #include <google/protobuf/stubs/logging.h> 51 #include <google/protobuf/stubs/stringprintf.h> 52 #include <google/protobuf/any.h> 53 #include <google/protobuf/io/printer.h> 54 #include <google/protobuf/io/zero_copy_stream.h> 55 #include <google/protobuf/io/zero_copy_stream_impl.h> 56 #include <google/protobuf/dynamic_message.h> 57 #include <google/protobuf/text_format.h> 58 #include <google/protobuf/util/field_comparator.h> 59 #include <google/protobuf/stubs/strutil.h> 60 61 namespace google { 62 namespace protobuf { 63 64 namespace util { 65 66 // When comparing a repeated field as map, MultipleFieldMapKeyComparator can 67 // be used to specify multiple fields as key for key comparison. 68 // Two elements of a repeated field will be regarded as having the same key 69 // iff they have the same value for every specified key field. 70 // Note that you can also specify only one field as key. 71 class MessageDifferencer::MultipleFieldsMapKeyComparator 72 : public MessageDifferencer::MapKeyComparator { 73 public: 74 MultipleFieldsMapKeyComparator( 75 MessageDifferencer* message_differencer, 76 const vector<vector<const FieldDescriptor*> >& key_field_paths) 77 : message_differencer_(message_differencer), 78 key_field_paths_(key_field_paths) { 79 GOOGLE_CHECK(!key_field_paths_.empty()); 80 for (int i = 0; i < key_field_paths_.size(); ++i) { 81 GOOGLE_CHECK(!key_field_paths_[i].empty()); 82 } 83 } 84 MultipleFieldsMapKeyComparator( 85 MessageDifferencer* message_differencer, 86 const FieldDescriptor* key) 87 : message_differencer_(message_differencer) { 88 vector<const FieldDescriptor*> key_field_path; 89 key_field_path.push_back(key); 90 key_field_paths_.push_back(key_field_path); 91 } 92 virtual bool IsMatch( 93 const Message& message1, 94 const Message& message2, 95 const vector<SpecificField>& parent_fields) const { 96 for (int i = 0; i < key_field_paths_.size(); ++i) { 97 if (!IsMatchInternal(message1, message2, parent_fields, 98 key_field_paths_[i], 0)) { 99 return false; 100 } 101 } 102 return true; 103 } 104 private: 105 bool IsMatchInternal( 106 const Message& message1, 107 const Message& message2, 108 const vector<SpecificField>& parent_fields, 109 const vector<const FieldDescriptor*>& key_field_path, 110 int path_index) const { 111 const FieldDescriptor* field = key_field_path[path_index]; 112 vector<SpecificField> current_parent_fields(parent_fields); 113 if (path_index == key_field_path.size() - 1) { 114 if (field->is_repeated()) { 115 if (!message_differencer_->CompareRepeatedField( 116 message1, message2, field, ¤t_parent_fields)) { 117 return false; 118 } 119 } else { 120 if (!message_differencer_->CompareFieldValueUsingParentFields( 121 message1, message2, field, -1, -1, ¤t_parent_fields)) { 122 return false; 123 } 124 } 125 return true; 126 } else { 127 const Reflection* reflection1 = message1.GetReflection(); 128 const Reflection* reflection2 = message2.GetReflection(); 129 bool has_field1 = reflection1->HasField(message1, field); 130 bool has_field2 = reflection2->HasField(message2, field); 131 if (!has_field1 && !has_field2) { 132 return true; 133 } 134 if (has_field1 != has_field2) { 135 return false; 136 } 137 SpecificField specific_field; 138 specific_field.field = field; 139 current_parent_fields.push_back(specific_field); 140 return IsMatchInternal( 141 reflection1->GetMessage(message1, field), 142 reflection2->GetMessage(message2, field), 143 current_parent_fields, 144 key_field_path, 145 path_index + 1); 146 } 147 } 148 MessageDifferencer* message_differencer_; 149 vector<vector<const FieldDescriptor*> > key_field_paths_; 150 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator); 151 }; 152 153 bool MessageDifferencer::Equals(const Message& message1, 154 const Message& message2) { 155 MessageDifferencer differencer; 156 157 return differencer.Compare(message1, message2); 158 } 159 160 bool MessageDifferencer::Equivalent(const Message& message1, 161 const Message& message2) { 162 MessageDifferencer differencer; 163 differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT); 164 165 return differencer.Compare(message1, message2); 166 } 167 168 bool MessageDifferencer::ApproximatelyEquals(const Message& message1, 169 const Message& message2) { 170 MessageDifferencer differencer; 171 differencer.set_float_comparison( 172 MessageDifferencer::APPROXIMATE); 173 174 return differencer.Compare(message1, message2); 175 } 176 177 bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1, 178 const Message& message2) { 179 MessageDifferencer differencer; 180 differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT); 181 differencer.set_float_comparison(MessageDifferencer::APPROXIMATE); 182 183 return differencer.Compare(message1, message2); 184 } 185 186 // =========================================================================== 187 188 MessageDifferencer::MessageDifferencer() 189 : reporter_(NULL), 190 field_comparator_(NULL), 191 message_field_comparison_(EQUAL), 192 scope_(FULL), 193 repeated_field_comparison_(AS_LIST), 194 report_matches_(false), 195 output_string_(NULL) { } 196 197 MessageDifferencer::~MessageDifferencer() { 198 for (int i = 0; i < owned_key_comparators_.size(); ++i) { 199 delete owned_key_comparators_[i]; 200 } 201 for (int i = 0; i < ignore_criteria_.size(); ++i) { 202 delete ignore_criteria_[i]; 203 } 204 } 205 206 void MessageDifferencer::set_field_comparator(FieldComparator* comparator) { 207 GOOGLE_CHECK(comparator) << "Field comparator can't be NULL."; 208 field_comparator_ = comparator; 209 } 210 211 void MessageDifferencer::set_message_field_comparison( 212 MessageFieldComparison comparison) { 213 message_field_comparison_ = comparison; 214 } 215 216 void MessageDifferencer::set_scope(Scope scope) { 217 scope_ = scope; 218 } 219 220 MessageDifferencer::Scope MessageDifferencer::scope() { 221 return scope_; 222 } 223 224 void MessageDifferencer::set_float_comparison(FloatComparison comparison) { 225 default_field_comparator_.set_float_comparison( 226 comparison == EXACT ? 227 DefaultFieldComparator::EXACT : DefaultFieldComparator::APPROXIMATE); 228 } 229 230 void MessageDifferencer::set_repeated_field_comparison( 231 RepeatedFieldComparison comparison) { 232 repeated_field_comparison_ = comparison; 233 } 234 235 void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) { 236 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: " 237 << field->full_name(); 238 const MapKeyComparator* key_comparator = GetMapKeyComparator(field); 239 GOOGLE_CHECK(key_comparator == NULL) 240 << "Cannot treat this repeated field as both Map and Set for" 241 << " comparison. Field name is: " << field->full_name(); 242 GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end()) 243 << "Cannot treat the same field as both SET and LIST. Field name is: " 244 << field->full_name(); 245 set_fields_.insert(field); 246 } 247 248 void MessageDifferencer::TreatAsList(const FieldDescriptor* field) { 249 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: " 250 << field->full_name(); 251 const MapKeyComparator* key_comparator = GetMapKeyComparator(field); 252 GOOGLE_CHECK(key_comparator == NULL) 253 << "Cannot treat this repeated field as both Map and Set for" 254 << " comparison. Field name is: " << field->full_name(); 255 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end()) 256 << "Cannot treat the same field as both SET and LIST. Field name is: " 257 << field->full_name(); 258 list_fields_.insert(field); 259 } 260 261 void MessageDifferencer::TreatAsMap(const FieldDescriptor* field, 262 const FieldDescriptor* key) { 263 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: " 264 << field->full_name(); 265 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type()) 266 << "Field has to be message type. Field name is: " 267 << field->full_name(); 268 GOOGLE_CHECK(key->containing_type() == field->message_type()) 269 << key->full_name() 270 << " must be a direct subfield within the repeated field " 271 << field->full_name() << ", not " << key->containing_type()->full_name(); 272 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end()) 273 << "Cannot treat this repeated field as both Map and Set for " 274 << "comparison."; 275 GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end()) 276 << "Cannot treat this repeated field as both Map and List for " 277 << "comparison."; 278 MapKeyComparator* key_comparator = 279 new MultipleFieldsMapKeyComparator(this, key); 280 owned_key_comparators_.push_back(key_comparator); 281 map_field_key_comparator_[field] = key_comparator; 282 } 283 284 void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey( 285 const FieldDescriptor* field, 286 const vector<const FieldDescriptor*>& key_fields) { 287 vector<vector<const FieldDescriptor*> > key_field_paths; 288 for (int i = 0; i < key_fields.size(); ++i) { 289 vector<const FieldDescriptor*> key_field_path; 290 key_field_path.push_back(key_fields[i]); 291 key_field_paths.push_back(key_field_path); 292 } 293 TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths); 294 } 295 296 void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey( 297 const FieldDescriptor* field, 298 const vector<vector<const FieldDescriptor*> >& key_field_paths) { 299 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: " 300 << field->full_name(); 301 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type()) 302 << "Field has to be message type. Field name is: " 303 << field->full_name(); 304 for (int i = 0; i < key_field_paths.size(); ++i) { 305 const vector<const FieldDescriptor*>& key_field_path = key_field_paths[i]; 306 for (int j = 0; j < key_field_path.size(); ++j) { 307 const FieldDescriptor* parent_field = 308 j == 0 ? field : key_field_path[j - 1]; 309 const FieldDescriptor* child_field = key_field_path[j]; 310 GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type()) 311 << child_field->full_name() 312 << " must be a direct subfield within the field: " 313 << parent_field->full_name(); 314 if (j != 0) { 315 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type()) 316 << parent_field->full_name() << " has to be of type message."; 317 GOOGLE_CHECK(!parent_field->is_repeated()) 318 << parent_field->full_name() << " cannot be a repeated field."; 319 } 320 } 321 } 322 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end()) 323 << "Cannot treat this repeated field as both Map and Set for " 324 << "comparison."; 325 MapKeyComparator* key_comparator = 326 new MultipleFieldsMapKeyComparator(this, key_field_paths); 327 owned_key_comparators_.push_back(key_comparator); 328 map_field_key_comparator_[field] = key_comparator; 329 } 330 331 void MessageDifferencer::TreatAsMapUsingKeyComparator( 332 const FieldDescriptor* field, 333 const MapKeyComparator* key_comparator) { 334 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: " 335 << field->full_name(); 336 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type()) 337 << "Field has to be message type. Field name is: " 338 << field->full_name(); 339 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end()) 340 << "Cannot treat this repeated field as both Map and Set for " 341 << "comparison."; 342 map_field_key_comparator_[field] = key_comparator; 343 } 344 345 void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) { 346 ignore_criteria_.push_back(ignore_criteria); 347 } 348 349 void MessageDifferencer::IgnoreField(const FieldDescriptor* field) { 350 ignored_fields_.insert(field); 351 } 352 353 void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field, 354 double fraction, double margin) { 355 default_field_comparator_.SetFractionAndMargin(field, fraction, margin); 356 } 357 358 void MessageDifferencer::ReportDifferencesToString(string* output) { 359 GOOGLE_DCHECK(output) << "Specified output string was NULL"; 360 361 output_string_ = output; 362 output_string_->clear(); 363 } 364 365 void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) { 366 // If an output string is set, clear it to prevent 367 // it superceding the specified reporter. 368 if (output_string_) { 369 output_string_ = NULL; 370 } 371 372 reporter_ = reporter; 373 } 374 375 bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1, 376 const FieldDescriptor* field2) { 377 // Handle sentinel values (i.e. make sure NULLs are always ordered 378 // at the end of the list). 379 if (field1 == NULL) { 380 return false; 381 } 382 383 if (field2 == NULL) { 384 return true; 385 } 386 387 // Always order fields by their tag number 388 return (field1->number() < field2->number()); 389 } 390 391 bool MessageDifferencer::Compare(const Message& message1, 392 const Message& message2) { 393 vector<SpecificField> parent_fields; 394 395 bool result = false; 396 397 // Setup the internal reporter if need be. 398 if (output_string_) { 399 io::StringOutputStream output_stream(output_string_); 400 StreamReporter reporter(&output_stream); 401 reporter_ = &reporter; 402 result = Compare(message1, message2, &parent_fields); 403 reporter_ = NULL; 404 } else { 405 result = Compare(message1, message2, &parent_fields); 406 } 407 408 return result; 409 } 410 411 bool MessageDifferencer::CompareWithFields( 412 const Message& message1, 413 const Message& message2, 414 const vector<const FieldDescriptor*>& message1_fields_arg, 415 const vector<const FieldDescriptor*>& message2_fields_arg) { 416 if (message1.GetDescriptor() != message2.GetDescriptor()) { 417 GOOGLE_LOG(DFATAL) << "Comparison between two messages with different " 418 << "descriptors."; 419 return false; 420 } 421 422 vector<SpecificField> parent_fields; 423 424 bool result = false; 425 426 vector<const FieldDescriptor*> message1_fields(message1_fields_arg); 427 vector<const FieldDescriptor*> message2_fields(message2_fields_arg); 428 429 std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore); 430 std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore); 431 // Append NULL sentinel values. 432 message1_fields.push_back(NULL); 433 message2_fields.push_back(NULL); 434 435 // Setup the internal reporter if need be. 436 if (output_string_) { 437 io::StringOutputStream output_stream(output_string_); 438 StreamReporter reporter(&output_stream); 439 reporter_ = &reporter; 440 result = CompareRequestedFieldsUsingSettings( 441 message1, message2, message1_fields, message2_fields, &parent_fields); 442 reporter_ = NULL; 443 } else { 444 result = CompareRequestedFieldsUsingSettings( 445 message1, message2, message1_fields, message2_fields, &parent_fields); 446 } 447 448 return result; 449 } 450 451 bool MessageDifferencer::Compare( 452 const Message& message1, 453 const Message& message2, 454 vector<SpecificField>* parent_fields) { 455 const Descriptor* descriptor1 = message1.GetDescriptor(); 456 const Descriptor* descriptor2 = message2.GetDescriptor(); 457 if (descriptor1 != descriptor2) { 458 GOOGLE_LOG(DFATAL) << "Comparison between two messages with different " 459 << "descriptors. " 460 << descriptor1->full_name() << " vs " 461 << descriptor2->full_name(); 462 return false; 463 } 464 // Expand google.protobuf.Any payload if possible. 465 if (descriptor1->full_name() == internal::kAnyFullTypeName) { 466 google::protobuf::scoped_ptr<Message> data1; 467 google::protobuf::scoped_ptr<Message> data2; 468 if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) { 469 return Compare(*data1, *data2, parent_fields); 470 } 471 } 472 const Reflection* reflection1 = message1.GetReflection(); 473 const Reflection* reflection2 = message2.GetReflection(); 474 475 // Retrieve all the set fields, including extensions. 476 vector<const FieldDescriptor*> message1_fields; 477 vector<const FieldDescriptor*> message2_fields; 478 479 reflection1->ListFields(message1, &message1_fields); 480 reflection2->ListFields(message2, &message2_fields); 481 482 // Add sentinel values to deal with the 483 // case where the number of the fields in 484 // each list are different. 485 message1_fields.push_back(NULL); 486 message2_fields.push_back(NULL); 487 488 bool unknown_compare_result = true; 489 // Ignore unknown fields in EQUIVALENT mode 490 if (message_field_comparison_ != EQUIVALENT) { 491 const google::protobuf::UnknownFieldSet* unknown_field_set1 = 492 &reflection1->GetUnknownFields(message1); 493 const google::protobuf::UnknownFieldSet* unknown_field_set2 = 494 &reflection2->GetUnknownFields(message2); 495 if (!CompareUnknownFields(message1, message2, 496 *unknown_field_set1, *unknown_field_set2, 497 parent_fields)) { 498 if (reporter_ == NULL) { 499 return false; 500 }; 501 unknown_compare_result = false; 502 } 503 } 504 505 return CompareRequestedFieldsUsingSettings( 506 message1, message2, 507 message1_fields, message2_fields, 508 parent_fields) && unknown_compare_result; 509 } 510 511 bool MessageDifferencer::CompareRequestedFieldsUsingSettings( 512 const Message& message1, 513 const Message& message2, 514 const vector<const FieldDescriptor*>& message1_fields, 515 const vector<const FieldDescriptor*>& message2_fields, 516 vector<SpecificField>* parent_fields) { 517 if (scope_ == FULL) { 518 if (message_field_comparison_ == EQUIVALENT) { 519 // We need to merge the field lists of both messages (i.e. 520 // we are merely checking for a difference in field values, 521 // rather than the addition or deletion of fields). 522 vector<const FieldDescriptor*> fields_union; 523 CombineFields(message1_fields, FULL, message2_fields, FULL, 524 &fields_union); 525 return CompareWithFieldsInternal(message1, message2, fields_union, 526 fields_union, parent_fields); 527 } else { 528 // Simple equality comparison, use the unaltered field lists. 529 return CompareWithFieldsInternal(message1, message2, message1_fields, 530 message2_fields, parent_fields); 531 } 532 } else { 533 if (message_field_comparison_ == EQUIVALENT) { 534 // We use the list of fields for message1 for both messages when 535 // comparing. This way, extra fields in message2 are ignored, 536 // and missing fields in message2 use their default value. 537 return CompareWithFieldsInternal(message1, message2, message1_fields, 538 message1_fields, parent_fields); 539 } else { 540 // We need to consider the full list of fields for message1 541 // but only the intersection for message2. This way, any fields 542 // only present in message2 will be ignored, but any fields only 543 // present in message1 will be marked as a difference. 544 vector<const FieldDescriptor*> fields_intersection; 545 CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL, 546 &fields_intersection); 547 return CompareWithFieldsInternal(message1, message2, message1_fields, 548 fields_intersection, parent_fields); 549 } 550 } 551 } 552 553 void MessageDifferencer::CombineFields( 554 const vector<const FieldDescriptor*>& fields1, 555 Scope fields1_scope, 556 const vector<const FieldDescriptor*>& fields2, 557 Scope fields2_scope, 558 vector<const FieldDescriptor*>* combined_fields) { 559 560 int index1 = 0; 561 int index2 = 0; 562 563 while (index1 < fields1.size() && index2 < fields2.size()) { 564 const FieldDescriptor* field1 = fields1[index1]; 565 const FieldDescriptor* field2 = fields2[index2]; 566 567 if (FieldBefore(field1, field2)) { 568 if (fields1_scope == FULL) { 569 combined_fields->push_back(fields1[index1]); 570 } 571 ++index1; 572 } else if (FieldBefore(field2, field1)) { 573 if (fields2_scope == FULL) { 574 combined_fields->push_back(fields2[index2]); 575 } 576 ++index2; 577 } else { 578 combined_fields->push_back(fields1[index1]); 579 ++index1; 580 ++index2; 581 } 582 } 583 } 584 585 bool MessageDifferencer::CompareWithFieldsInternal( 586 const Message& message1, 587 const Message& message2, 588 const vector<const FieldDescriptor*>& message1_fields, 589 const vector<const FieldDescriptor*>& message2_fields, 590 vector<SpecificField>* parent_fields) { 591 bool isDifferent = false; 592 int field_index1 = 0; 593 int field_index2 = 0; 594 595 const Reflection* reflection1 = message1.GetReflection(); 596 const Reflection* reflection2 = message2.GetReflection(); 597 598 while (true) { 599 const FieldDescriptor* field1 = message1_fields[field_index1]; 600 const FieldDescriptor* field2 = message2_fields[field_index2]; 601 602 // Once we have reached sentinel values, we are done the comparison. 603 if (field1 == NULL && field2 == NULL) { 604 break; 605 } 606 607 // Check for differences in the field itself. 608 if (FieldBefore(field1, field2)) { 609 // Field 1 is not in the field list for message 2. 610 if (IsIgnored(message1, message2, field1, *parent_fields)) { 611 // We are ignoring field1. Report the ignore and move on to 612 // the next field in message1_fields. 613 if (reporter_ != NULL) { 614 SpecificField specific_field; 615 specific_field.field = field1; 616 617 parent_fields->push_back(specific_field); 618 reporter_->ReportIgnored(message1, message2, *parent_fields); 619 parent_fields->pop_back(); 620 } 621 ++field_index1; 622 continue; 623 } 624 625 if (reporter_ != NULL) { 626 int count = field1->is_repeated() ? 627 reflection1->FieldSize(message1, field1) : 1; 628 629 for (int i = 0; i < count; ++i) { 630 SpecificField specific_field; 631 specific_field.field = field1; 632 specific_field.index = field1->is_repeated() ? i : -1; 633 634 parent_fields->push_back(specific_field); 635 reporter_->ReportDeleted(message1, message2, *parent_fields); 636 parent_fields->pop_back(); 637 } 638 639 isDifferent = true; 640 } else { 641 return false; 642 } 643 644 ++field_index1; 645 continue; 646 } else if (FieldBefore(field2, field1)) { 647 // Field 2 is not in the field list for message 1. 648 if (IsIgnored(message1, message2, field2, *parent_fields)) { 649 // We are ignoring field2. Report the ignore and move on to 650 // the next field in message2_fields. 651 if (reporter_ != NULL) { 652 SpecificField specific_field; 653 specific_field.field = field2; 654 655 parent_fields->push_back(specific_field); 656 reporter_->ReportIgnored(message1, message2, *parent_fields); 657 parent_fields->pop_back(); 658 } 659 ++field_index2; 660 continue; 661 } 662 663 if (reporter_ != NULL) { 664 int count = field2->is_repeated() ? 665 reflection2->FieldSize(message2, field2) : 1; 666 667 for (int i = 0; i < count; ++i) { 668 SpecificField specific_field; 669 specific_field.field = field2; 670 specific_field.index = field2->is_repeated() ? i : -1; 671 specific_field.new_index = specific_field.index; 672 673 parent_fields->push_back(specific_field); 674 reporter_->ReportAdded(message1, message2, *parent_fields); 675 parent_fields->pop_back(); 676 } 677 678 isDifferent = true; 679 } else { 680 return false; 681 } 682 683 ++field_index2; 684 continue; 685 } 686 687 // By this point, field1 and field2 are guarenteed to point to the same 688 // field, so we can now compare the values. 689 if (IsIgnored(message1, message2, field1, *parent_fields)) { 690 // Ignore this field. Report and move on. 691 if (reporter_ != NULL) { 692 SpecificField specific_field; 693 specific_field.field = field1; 694 695 parent_fields->push_back(specific_field); 696 reporter_->ReportIgnored(message1, message2, *parent_fields); 697 parent_fields->pop_back(); 698 } 699 700 ++field_index1; 701 ++field_index2; 702 continue; 703 } 704 705 bool fieldDifferent = false; 706 if (field1->is_repeated()) { 707 fieldDifferent = !CompareRepeatedField(message1, message2, field1, 708 parent_fields); 709 if (fieldDifferent) { 710 if (reporter_ == NULL) return false; 711 isDifferent = true; 712 } 713 } else { 714 fieldDifferent = !CompareFieldValueUsingParentFields( 715 message1, message2, field1, -1, -1, parent_fields); 716 717 // If we have found differences, either report them or terminate if 718 // no reporter is present. 719 if (fieldDifferent && reporter_ == NULL) { 720 return false; 721 } 722 723 if (reporter_ != NULL) { 724 SpecificField specific_field; 725 specific_field.field = field1; 726 parent_fields->push_back(specific_field); 727 if (fieldDifferent) { 728 reporter_->ReportModified(message1, message2, *parent_fields); 729 isDifferent = true; 730 } else if (report_matches_) { 731 reporter_->ReportMatched(message1, message2, *parent_fields); 732 } 733 parent_fields->pop_back(); 734 } 735 } 736 // Increment the field indicies. 737 ++field_index1; 738 ++field_index2; 739 } 740 741 return !isDifferent; 742 } 743 744 bool MessageDifferencer::IsMatch(const FieldDescriptor* repeated_field, 745 const MapKeyComparator* key_comparator, 746 const Message* message1, 747 const Message* message2, 748 const vector<SpecificField>& parent_fields, 749 int index1, int index2) { 750 vector<SpecificField> current_parent_fields(parent_fields); 751 if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) { 752 return CompareFieldValueUsingParentFields( 753 *message1, *message2, repeated_field, index1, index2, 754 ¤t_parent_fields); 755 } 756 // Back up the Reporter and output_string_. They will be reset in the 757 // following code. 758 Reporter* backup_reporter = reporter_; 759 string* output_string = output_string_; 760 reporter_ = NULL; 761 output_string_ = NULL; 762 bool match; 763 764 if (key_comparator == NULL) { 765 match = CompareFieldValueUsingParentFields( 766 *message1, *message2, repeated_field, index1, index2, 767 ¤t_parent_fields); 768 } else { 769 const Reflection* reflection1 = message1->GetReflection(); 770 const Reflection* reflection2 = message2->GetReflection(); 771 const Message& m1 = 772 reflection1->GetRepeatedMessage(*message1, repeated_field, index1); 773 const Message& m2 = 774 reflection2->GetRepeatedMessage(*message2, repeated_field, index2); 775 SpecificField specific_field; 776 specific_field.field = repeated_field; 777 current_parent_fields.push_back(specific_field); 778 match = key_comparator->IsMatch(m1, m2, current_parent_fields); 779 } 780 781 reporter_ = backup_reporter; 782 output_string_ = output_string; 783 return match; 784 } 785 786 bool MessageDifferencer::CompareRepeatedField( 787 const Message& message1, 788 const Message& message2, 789 const FieldDescriptor* repeated_field, 790 vector<SpecificField>* parent_fields) { 791 // the input FieldDescriptor is guaranteed to be repeated field. 792 const Reflection* reflection1 = message1.GetReflection(); 793 const Reflection* reflection2 = message2.GetReflection(); 794 const int count1 = reflection1->FieldSize(message1, repeated_field); 795 const int count2 = reflection2->FieldSize(message2, repeated_field); 796 const bool treated_as_subset = IsTreatedAsSubset(repeated_field); 797 798 // If the field is not treated as subset and no detailed reports is needed, 799 // we do a quick check on the number of the elements to avoid unnecessary 800 // comparison. 801 if (count1 != count2 && reporter_ == NULL && !treated_as_subset) { 802 return false; 803 } 804 // A match can never be found if message1 has more items than message2. 805 if (count1 > count2 && reporter_ == NULL) { 806 return false; 807 } 808 809 // These two list are used for store the index of the correspondent 810 // element in peer repeated field. 811 vector<int> match_list1; 812 vector<int> match_list2; 813 814 // Try to match indices of the repeated fields. Return false if match fails 815 // and there's no detailed report needed. 816 if (!MatchRepeatedFieldIndices(message1, message2, repeated_field, 817 *parent_fields, &match_list1, &match_list2) && 818 reporter_ == NULL) { 819 return false; 820 } 821 822 bool fieldDifferent = false; 823 SpecificField specific_field; 824 specific_field.field = repeated_field; 825 826 // At this point, we have already matched pairs of fields (with the reporting 827 // to be done later). Now to check if the paired elements are different. 828 for (int i = 0; i < count1; i++) { 829 if (match_list1[i] == -1) continue; 830 specific_field.index = i; 831 specific_field.new_index = match_list1[i]; 832 833 const bool result = CompareFieldValueUsingParentFields( 834 message1, message2, repeated_field, i, specific_field.new_index, 835 parent_fields); 836 837 // If we have found differences, either report them or terminate if 838 // no reporter is present. Note that ReportModified, ReportMoved, and 839 // ReportMatched are all mutually exclusive. 840 if (!result) { 841 if (reporter_ == NULL) return false; 842 parent_fields->push_back(specific_field); 843 reporter_->ReportModified(message1, message2, *parent_fields); 844 parent_fields->pop_back(); 845 fieldDifferent = true; 846 } else if (reporter_ != NULL && 847 specific_field.index != specific_field.new_index) { 848 parent_fields->push_back(specific_field); 849 reporter_->ReportMoved(message1, message2, *parent_fields); 850 parent_fields->pop_back(); 851 } else if (report_matches_ && reporter_ != NULL) { 852 parent_fields->push_back(specific_field); 853 reporter_->ReportMatched(message1, message2, *parent_fields); 854 parent_fields->pop_back(); 855 } 856 } 857 858 // Report any remaining additions or deletions. 859 for (int i = 0; i < count2; ++i) { 860 if (match_list2[i] != -1) continue; 861 if (!treated_as_subset) { 862 fieldDifferent = true; 863 } 864 865 if (reporter_ == NULL) continue; 866 specific_field.index = i; 867 specific_field.new_index = i; 868 parent_fields->push_back(specific_field); 869 reporter_->ReportAdded(message1, message2, *parent_fields); 870 parent_fields->pop_back(); 871 } 872 873 for (int i = 0; i < count1; ++i) { 874 if (match_list1[i] != -1) continue; 875 specific_field.index = i; 876 parent_fields->push_back(specific_field); 877 reporter_->ReportDeleted(message1, message2, *parent_fields); 878 parent_fields->pop_back(); 879 fieldDifferent = true; 880 } 881 return !fieldDifferent; 882 } 883 884 bool MessageDifferencer::CompareFieldValue(const Message& message1, 885 const Message& message2, 886 const FieldDescriptor* field, 887 int index1, 888 int index2) { 889 return CompareFieldValueUsingParentFields(message1, message2, field, index1, 890 index2, NULL); 891 } 892 893 bool MessageDifferencer::CompareFieldValueUsingParentFields( 894 const Message& message1, const Message& message2, 895 const FieldDescriptor* field, int index1, int index2, 896 vector<SpecificField>* parent_fields) { 897 FieldContext field_context(parent_fields); 898 FieldComparator::ComparisonResult result = GetFieldComparisonResult( 899 message1, message2, field, index1, index2, &field_context); 900 901 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && 902 result == FieldComparator::RECURSE) { 903 // Get the nested messages and compare them using one of the Compare 904 // methods. 905 const Reflection* reflection1 = message1.GetReflection(); 906 const Reflection* reflection2 = message2.GetReflection(); 907 const Message& m1 = field->is_repeated() ? 908 reflection1->GetRepeatedMessage(message1, field, index1) : 909 reflection1->GetMessage(message1, field); 910 const Message& m2 = field->is_repeated() ? 911 reflection2->GetRepeatedMessage(message2, field, index2) : 912 reflection2->GetMessage(message2, field); 913 914 // parent_fields is used in calls to Reporter methods. 915 if (parent_fields != NULL) { 916 // Append currently compared field to the end of parent_fields. 917 SpecificField specific_field; 918 specific_field.field = field; 919 specific_field.index = index1; 920 specific_field.new_index = index2; 921 parent_fields->push_back(specific_field); 922 const bool compare_result = Compare(m1, m2, parent_fields); 923 parent_fields->pop_back(); 924 return compare_result; 925 } else { 926 // Recreates parent_fields as if m1 and m2 had no parents. 927 return Compare(m1, m2); 928 } 929 } else { 930 return (result == FieldComparator::SAME); 931 } 932 } 933 934 bool MessageDifferencer::CheckPathChanged( 935 const vector<SpecificField>& field_path) { 936 for (int i = 0; i < field_path.size(); ++i) { 937 if (field_path[i].index != field_path[i].new_index) return true; 938 } 939 return false; 940 } 941 942 bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) { 943 if (!field->is_repeated()) return false; 944 if (field->is_map()) return true; 945 if (repeated_field_comparison_ == AS_SET) 946 return list_fields_.find(field) == list_fields_.end(); 947 return (set_fields_.find(field) != set_fields_.end()); 948 } 949 950 bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) { 951 return scope_ == PARTIAL && 952 (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL); 953 } 954 955 bool MessageDifferencer::IsIgnored( 956 const Message& message1, 957 const Message& message2, 958 const FieldDescriptor* field, 959 const vector<SpecificField>& parent_fields) { 960 if (ignored_fields_.find(field) != ignored_fields_.end()) { 961 return true; 962 } 963 for (int i = 0; i < ignore_criteria_.size(); ++i) { 964 if (ignore_criteria_[i]->IsIgnored(message1, message2, field, 965 parent_fields)) { 966 return true; 967 } 968 } 969 return false; 970 } 971 972 bool MessageDifferencer::IsUnknownFieldIgnored( 973 const Message& message1, const Message& message2, 974 const SpecificField& field, const vector<SpecificField>& parent_fields) { 975 for (int i = 0; i < ignore_criteria_.size(); ++i) { 976 if (ignore_criteria_[i]->IsUnknownFieldIgnored(message1, message2, field, 977 parent_fields)) { 978 return true; 979 } 980 } 981 return false; 982 } 983 984 const MessageDifferencer::MapKeyComparator* MessageDifferencer 985 ::GetMapKeyComparator(const FieldDescriptor* field) { 986 if (!field->is_repeated()) return NULL; 987 if (map_field_key_comparator_.find(field) != 988 map_field_key_comparator_.end()) { 989 return map_field_key_comparator_[field]; 990 } 991 return NULL; 992 } 993 994 namespace { 995 996 typedef pair<int, const UnknownField*> IndexUnknownFieldPair; 997 998 struct UnknownFieldOrdering { 999 inline bool operator()(const IndexUnknownFieldPair& a, 1000 const IndexUnknownFieldPair& b) const { 1001 if (a.second->number() < b.second->number()) return true; 1002 if (a.second->number() > b.second->number()) return false; 1003 return a.second->type() < b.second->type(); 1004 } 1005 }; 1006 1007 } // namespace 1008 1009 bool MessageDifferencer::UnpackAny(const Message& any, 1010 google::protobuf::scoped_ptr<Message>* data) { 1011 const Reflection* reflection = any.GetReflection(); 1012 const FieldDescriptor* type_url_field; 1013 const FieldDescriptor* value_field; 1014 if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) { 1015 return false; 1016 } 1017 const string& type_url = reflection->GetString(any, type_url_field); 1018 string full_type_name; 1019 if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) { 1020 return false; 1021 } 1022 1023 const google::protobuf::Descriptor* desc = 1024 any.GetDescriptor()->file()->pool()->FindMessageTypeByName( 1025 full_type_name); 1026 if (desc == NULL) { 1027 GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found"; 1028 return false; 1029 } 1030 1031 if (dynamic_message_factory_ == NULL) { 1032 dynamic_message_factory_.reset(new DynamicMessageFactory()); 1033 } 1034 data->reset(dynamic_message_factory_->GetPrototype(desc)->New()); 1035 string serialized_value = reflection->GetString(any, value_field); 1036 if (!(*data)->ParseFromString(serialized_value)) { 1037 GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name; 1038 return false; 1039 } 1040 return true; 1041 } 1042 1043 bool MessageDifferencer::CompareUnknownFields( 1044 const Message& message1, const Message& message2, 1045 const google::protobuf::UnknownFieldSet& unknown_field_set1, 1046 const google::protobuf::UnknownFieldSet& unknown_field_set2, 1047 vector<SpecificField>* parent_field) { 1048 // Ignore unknown fields in EQUIVALENT mode. 1049 if (message_field_comparison_ == EQUIVALENT) return true; 1050 1051 if (unknown_field_set1.empty() && unknown_field_set2.empty()) { 1052 return true; 1053 } 1054 1055 bool is_different = false; 1056 1057 // We first sort the unknown fields by field number and type (in other words, 1058 // in tag order), making sure to preserve ordering of values with the same 1059 // tag. This allows us to report only meaningful differences between the 1060 // two sets -- that is, differing values for the same tag. We use 1061 // IndexUnknownFieldPairs to keep track of the field's original index for 1062 // reporting purposes. 1063 vector<IndexUnknownFieldPair> fields1; // unknown_field_set1, sorted 1064 vector<IndexUnknownFieldPair> fields2; // unknown_field_set2, sorted 1065 fields1.reserve(unknown_field_set1.field_count()); 1066 fields2.reserve(unknown_field_set2.field_count()); 1067 1068 for (int i = 0; i < unknown_field_set1.field_count(); i++) { 1069 fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i))); 1070 } 1071 for (int i = 0; i < unknown_field_set2.field_count(); i++) { 1072 fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i))); 1073 } 1074 1075 UnknownFieldOrdering is_before; 1076 std::stable_sort(fields1.begin(), fields1.end(), is_before); 1077 std::stable_sort(fields2.begin(), fields2.end(), is_before); 1078 1079 // In order to fill in SpecificField::index, we have to keep track of how 1080 // many values we've seen with the same field number and type. 1081 // current_repeated points at the first field in this range, and 1082 // current_repeated_start{1,2} are the indexes of the first field in the 1083 // range within fields1 and fields2. 1084 const UnknownField* current_repeated = NULL; 1085 int current_repeated_start1 = 0; 1086 int current_repeated_start2 = 0; 1087 1088 // Now that we have two sorted lists, we can detect fields which appear only 1089 // in one list or the other by traversing them simultaneously. 1090 int index1 = 0; 1091 int index2 = 0; 1092 while (index1 < fields1.size() || index2 < fields2.size()) { 1093 enum { ADDITION, DELETION, MODIFICATION, COMPARE_GROUPS, 1094 NO_CHANGE } change_type; 1095 1096 // focus_field is the field we're currently reporting on. (In the case 1097 // of a modification, it's the field on the left side.) 1098 const UnknownField* focus_field; 1099 bool match = false; 1100 1101 if (index2 == fields2.size() || 1102 (index1 < fields1.size() && 1103 is_before(fields1[index1], fields2[index2]))) { 1104 // fields1[index1] is not present in fields2. 1105 change_type = DELETION; 1106 focus_field = fields1[index1].second; 1107 } else if (index1 == fields1.size() || 1108 is_before(fields2[index2], fields1[index1])) { 1109 // fields2[index2] is not present in fields1. 1110 if (scope_ == PARTIAL) { 1111 // Ignore. 1112 ++index2; 1113 continue; 1114 } 1115 change_type = ADDITION; 1116 focus_field = fields2[index2].second; 1117 } else { 1118 // Field type and number are the same. See if the values differ. 1119 change_type = MODIFICATION; 1120 focus_field = fields1[index1].second; 1121 1122 switch (focus_field->type()) { 1123 case UnknownField::TYPE_VARINT: 1124 match = fields1[index1].second->varint() == 1125 fields2[index2].second->varint(); 1126 break; 1127 case UnknownField::TYPE_FIXED32: 1128 match = fields1[index1].second->fixed32() == 1129 fields2[index2].second->fixed32(); 1130 break; 1131 case UnknownField::TYPE_FIXED64: 1132 match = fields1[index1].second->fixed64() == 1133 fields2[index2].second->fixed64(); 1134 break; 1135 case UnknownField::TYPE_LENGTH_DELIMITED: 1136 match = fields1[index1].second->length_delimited() == 1137 fields2[index2].second->length_delimited(); 1138 break; 1139 case UnknownField::TYPE_GROUP: 1140 // We must deal with this later, after building the SpecificField. 1141 change_type = COMPARE_GROUPS; 1142 break; 1143 } 1144 if (match && change_type != COMPARE_GROUPS) { 1145 change_type = NO_CHANGE; 1146 } 1147 } 1148 1149 if (current_repeated == NULL || 1150 focus_field->number() != current_repeated->number() || 1151 focus_field->type() != current_repeated->type()) { 1152 // We've started a new repeated field. 1153 current_repeated = focus_field; 1154 current_repeated_start1 = index1; 1155 current_repeated_start2 = index2; 1156 } 1157 1158 if (change_type == NO_CHANGE && reporter_ == NULL) { 1159 // Fields were already compared and matched and we have no reporter. 1160 ++index1; 1161 ++index2; 1162 continue; 1163 } 1164 1165 // Build the SpecificField. This is slightly complicated. 1166 SpecificField specific_field; 1167 specific_field.unknown_field_number = focus_field->number(); 1168 specific_field.unknown_field_type = focus_field->type(); 1169 1170 specific_field.unknown_field_set1 = &unknown_field_set1; 1171 specific_field.unknown_field_set2 = &unknown_field_set2; 1172 1173 if (change_type != ADDITION) { 1174 specific_field.unknown_field_index1 = fields1[index1].first; 1175 } 1176 if (change_type != DELETION) { 1177 specific_field.unknown_field_index2 = fields2[index2].first; 1178 } 1179 1180 // Calculate the field index. 1181 if (change_type == ADDITION) { 1182 specific_field.index = index2 - current_repeated_start2; 1183 specific_field.new_index = index2 - current_repeated_start2; 1184 } else { 1185 specific_field.index = index1 - current_repeated_start1; 1186 specific_field.new_index = index2 - current_repeated_start2; 1187 } 1188 1189 if (IsUnknownFieldIgnored(message1, message2, specific_field, 1190 *parent_field)) { 1191 if (reporter_ != NULL) { 1192 parent_field->push_back(specific_field); 1193 reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field); 1194 parent_field->pop_back(); 1195 } 1196 return true; 1197 } 1198 1199 if (change_type == ADDITION || change_type == DELETION || 1200 change_type == MODIFICATION) { 1201 if (reporter_ == NULL) { 1202 // We found a difference and we have no reproter. 1203 return false; 1204 } 1205 is_different = true; 1206 } 1207 1208 parent_field->push_back(specific_field); 1209 1210 switch (change_type) { 1211 case ADDITION: 1212 reporter_->ReportAdded(message1, message2, *parent_field); 1213 ++index2; 1214 break; 1215 case DELETION: 1216 reporter_->ReportDeleted(message1, message2, *parent_field); 1217 ++index1; 1218 break; 1219 case MODIFICATION: 1220 reporter_->ReportModified(message1, message2, *parent_field); 1221 ++index1; 1222 ++index2; 1223 break; 1224 case COMPARE_GROUPS: 1225 if (!CompareUnknownFields(message1, message2, 1226 fields1[index1].second->group(), 1227 fields2[index2].second->group(), 1228 parent_field)) { 1229 if (reporter_ == NULL) return false; 1230 is_different = true; 1231 reporter_->ReportModified(message1, message2, *parent_field); 1232 } 1233 ++index1; 1234 ++index2; 1235 break; 1236 case NO_CHANGE: 1237 ++index1; 1238 ++index2; 1239 if (report_matches_) { 1240 reporter_->ReportMatched(message1, message2, *parent_field); 1241 } 1242 } 1243 1244 parent_field->pop_back(); 1245 } 1246 1247 return !is_different; 1248 } 1249 1250 namespace { 1251 1252 // Find maximum bipartite matching using the argumenting path algorithm. 1253 class MaximumMatcher { 1254 public: 1255 typedef ResultCallback2<bool, int, int> NodeMatchCallback; 1256 // MaximumMatcher takes ownership of the passed in callback and uses it to 1257 // determine whether a node on the left side of the bipartial graph matches 1258 // a node on the right side. count1 is the number of nodes on the left side 1259 // of the graph and count2 to is the number of nodes on the right side. 1260 // Every node is referred to using 0-based indices. 1261 // If a maximum match is found, the result will be stored in match_list1 and 1262 // match_list2. match_list1[i] == j means the i-th node on the left side is 1263 // matched to the j-th node on the right side and match_list2[x] == y means 1264 // the x-th node on the right side is matched to y-th node on the left side. 1265 // match_list1[i] == -1 means the node is not matched. Same with match_list2. 1266 MaximumMatcher(int count1, int count2, NodeMatchCallback* callback, 1267 vector<int>* match_list1, vector<int>* match_list2); 1268 // Find a maximum match and return the number of matched node pairs. 1269 // If early_return is true, this method will return 0 immediately when it 1270 // finds that not all nodes on the left side can be matched. 1271 int FindMaximumMatch(bool early_return); 1272 private: 1273 // Determines whether the node on the left side of the bipartial graph 1274 // matches the one on the right side. 1275 bool Match(int left, int right); 1276 // Find an argumenting path starting from the node v on the left side. If a 1277 // path can be found, update match_list2_ to reflect the path and return 1278 // true. 1279 bool FindArgumentPathDFS(int v, vector<bool>* visited); 1280 1281 int count1_; 1282 int count2_; 1283 google::protobuf::scoped_ptr<NodeMatchCallback> match_callback_; 1284 map<pair<int, int>, bool> cached_match_results_; 1285 vector<int>* match_list1_; 1286 vector<int>* match_list2_; 1287 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher); 1288 }; 1289 1290 MaximumMatcher::MaximumMatcher(int count1, int count2, 1291 NodeMatchCallback* callback, 1292 vector<int>* match_list1, 1293 vector<int>* match_list2) 1294 : count1_(count1), count2_(count2), match_callback_(callback), 1295 match_list1_(match_list1), match_list2_(match_list2) { 1296 match_list1_->assign(count1, -1); 1297 match_list2_->assign(count2, -1); 1298 } 1299 1300 int MaximumMatcher::FindMaximumMatch(bool early_return) { 1301 int result = 0; 1302 for (int i = 0; i < count1_; ++i) { 1303 vector<bool> visited(count1_); 1304 if (FindArgumentPathDFS(i, &visited)) { 1305 ++result; 1306 } else if (early_return) { 1307 return 0; 1308 } 1309 } 1310 // Backfill match_list1_ as we only filled match_list2_ when finding 1311 // argumenting pathes. 1312 for (int i = 0; i < count2_; ++i) { 1313 if ((*match_list2_)[i] != -1) { 1314 (*match_list1_)[(*match_list2_)[i]] = i; 1315 } 1316 } 1317 return result; 1318 } 1319 1320 bool MaximumMatcher::Match(int left, int right) { 1321 pair<int, int> p(left, right); 1322 map<pair<int, int>, bool>::iterator it = cached_match_results_.find(p); 1323 if (it != cached_match_results_.end()) { 1324 return it->second; 1325 } 1326 cached_match_results_[p] = match_callback_->Run(left, right); 1327 return cached_match_results_[p]; 1328 } 1329 1330 bool MaximumMatcher::FindArgumentPathDFS(int v, vector<bool>* visited) { 1331 (*visited)[v] = true; 1332 // We try to match those un-matched nodes on the right side first. This is 1333 // the step that the navie greedy matching algorithm uses. In the best cases 1334 // where the greedy algorithm can find a maximum matching, we will always 1335 // find a match in this step and the performance will be identical to the 1336 // greedy algorithm. 1337 for (int i = 0; i < count2_; ++i) { 1338 int matched = (*match_list2_)[i]; 1339 if (matched == -1 && Match(v, i)) { 1340 (*match_list2_)[i] = v; 1341 return true; 1342 } 1343 } 1344 // Then we try those already matched nodes and see if we can find an 1345 // alternaive match for the node matched to them. 1346 // The greedy algorithm will stop before this and fail to produce the 1347 // correct result. 1348 for (int i = 0; i < count2_; ++i) { 1349 int matched = (*match_list2_)[i]; 1350 if (matched != -1 && Match(v, i)) { 1351 if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) { 1352 (*match_list2_)[i] = v; 1353 return true; 1354 } 1355 } 1356 } 1357 return false; 1358 } 1359 1360 } // namespace 1361 1362 bool MessageDifferencer::MatchRepeatedFieldIndices( 1363 const Message& message1, 1364 const Message& message2, 1365 const FieldDescriptor* repeated_field, 1366 const vector<SpecificField>& parent_fields, 1367 vector<int>* match_list1, 1368 vector<int>* match_list2) { 1369 const int count1 = 1370 message1.GetReflection()->FieldSize(message1, repeated_field); 1371 const int count2 = 1372 message2.GetReflection()->FieldSize(message2, repeated_field); 1373 const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field); 1374 1375 match_list1->assign(count1, -1); 1376 match_list2->assign(count2, -1); 1377 1378 SpecificField specific_field; 1379 specific_field.field = repeated_field; 1380 1381 bool success = true; 1382 // Find potential match if this is a special repeated field. 1383 if (key_comparator != NULL || IsTreatedAsSet(repeated_field)) { 1384 if (scope_ == PARTIAL) { 1385 // When partial matching is enabled, Compare(a, b) && Compare(a, c) 1386 // doesn't neccessarily imply Compare(b, c). Therefore a naive greedy 1387 // algorithm will fail to find a maximum matching. 1388 // Here we use the argumenting path algorithm. 1389 MaximumMatcher::NodeMatchCallback* callback = 1390 ::google::protobuf::internal::NewPermanentCallback( 1391 this, &MessageDifferencer::IsMatch, 1392 repeated_field, key_comparator, 1393 &message1, &message2, parent_fields); 1394 MaximumMatcher matcher(count1, count2, callback, match_list1, 1395 match_list2); 1396 // If diff info is not needed, we should end the matching process as 1397 // soon as possible if not all items can be matched. 1398 bool early_return = (reporter_ == NULL); 1399 int match_count = matcher.FindMaximumMatch(early_return); 1400 if (match_count != count1 && reporter_ == NULL) return false; 1401 success = success && (match_count == count1); 1402 } else { 1403 for (int i = 0; i < count1; ++i) { 1404 // Indicates any matched elements for this repeated field. 1405 bool match = false; 1406 1407 specific_field.index = i; 1408 specific_field.new_index = i; 1409 1410 for (int j = 0; j < count2; j++) { 1411 if (match_list2->at(j) != -1) continue; 1412 specific_field.index = i; 1413 specific_field.new_index = j; 1414 1415 match = IsMatch(repeated_field, key_comparator, 1416 &message1, &message2, parent_fields, i, j); 1417 1418 if (match) { 1419 match_list1->at(specific_field.index) = specific_field.new_index; 1420 match_list2->at(specific_field.new_index) = specific_field.index; 1421 break; 1422 } 1423 } 1424 if (!match && reporter_ == NULL) return false; 1425 success = success && match; 1426 } 1427 } 1428 } else { 1429 // If this field should be treated as list, just label the match_list. 1430 for (int i = 0; i < count1 && i < count2; i++) { 1431 match_list1->at(i) = i; 1432 match_list2->at(i) = i; 1433 } 1434 } 1435 1436 return success; 1437 } 1438 1439 FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult( 1440 const Message& message1, const Message& message2, 1441 const FieldDescriptor* field, int index1, int index2, 1442 const FieldContext* field_context) { 1443 FieldComparator* comparator = field_comparator_ != NULL ? 1444 field_comparator_ : &default_field_comparator_; 1445 return comparator->Compare(message1, message2, field, 1446 index1, index2, field_context); 1447 } 1448 1449 // =========================================================================== 1450 1451 MessageDifferencer::Reporter::Reporter() { } 1452 MessageDifferencer::Reporter::~Reporter() {} 1453 1454 // =========================================================================== 1455 1456 MessageDifferencer::MapKeyComparator::MapKeyComparator() {} 1457 MessageDifferencer::MapKeyComparator::~MapKeyComparator() {} 1458 1459 // =========================================================================== 1460 1461 MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {} 1462 MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {} 1463 1464 // =========================================================================== 1465 1466 // Note that the printer's delimiter is not used, because if we are given a 1467 // printer, we don't know its delimiter. 1468 MessageDifferencer::StreamReporter::StreamReporter( 1469 io::ZeroCopyOutputStream* output) : printer_(new io::Printer(output, '$')), 1470 delete_printer_(true), 1471 report_modified_aggregates_(false) { } 1472 1473 MessageDifferencer::StreamReporter::StreamReporter( 1474 io::Printer* printer) : printer_(printer), 1475 delete_printer_(false), 1476 report_modified_aggregates_(false) { } 1477 1478 MessageDifferencer::StreamReporter::~StreamReporter() { 1479 if (delete_printer_) delete printer_; 1480 } 1481 1482 void MessageDifferencer::StreamReporter::PrintPath( 1483 const vector<SpecificField>& field_path, bool left_side) { 1484 for (int i = 0; i < field_path.size(); ++i) { 1485 if (i > 0) { 1486 printer_->Print("."); 1487 } 1488 1489 SpecificField specific_field = field_path[i]; 1490 1491 if (specific_field.field != NULL) { 1492 if (specific_field.field->is_extension()) { 1493 printer_->Print("($name$)", "name", 1494 specific_field.field->full_name()); 1495 } else { 1496 printer_->PrintRaw(specific_field.field->name()); 1497 } 1498 } else { 1499 printer_->PrintRaw(SimpleItoa(specific_field.unknown_field_number)); 1500 } 1501 if (left_side && specific_field.index >= 0) { 1502 printer_->Print("[$name$]", "name", SimpleItoa(specific_field.index)); 1503 } 1504 if (!left_side && specific_field.new_index >= 0) { 1505 printer_->Print("[$name$]", "name", SimpleItoa(specific_field.new_index)); 1506 } 1507 } 1508 } 1509 1510 void MessageDifferencer:: 1511 StreamReporter::PrintValue(const Message& message, 1512 const vector<SpecificField>& field_path, 1513 bool left_side) { 1514 const SpecificField& specific_field = field_path.back(); 1515 const FieldDescriptor* field = specific_field.field; 1516 if (field != NULL) { 1517 string output; 1518 int index = left_side ? specific_field.index : specific_field.new_index; 1519 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 1520 const Reflection* reflection = message.GetReflection(); 1521 const Message& field_message = field->is_repeated() ? 1522 reflection->GetRepeatedMessage(message, field, index) : 1523 reflection->GetMessage(message, field); 1524 output = field_message.ShortDebugString(); 1525 if (output.empty()) { 1526 printer_->Print("{ }"); 1527 } else { 1528 printer_->Print("{ $name$ }", "name", output); 1529 } 1530 } else { 1531 TextFormat::PrintFieldValueToString(message, field, index, &output); 1532 printer_->PrintRaw(output); 1533 } 1534 } else { 1535 const UnknownFieldSet* unknown_fields = 1536 (left_side ? 1537 specific_field.unknown_field_set1 : 1538 specific_field.unknown_field_set2); 1539 const UnknownField* unknown_field = &unknown_fields->field( 1540 left_side ? 1541 specific_field.unknown_field_index1 : 1542 specific_field.unknown_field_index2); 1543 PrintUnknownFieldValue(unknown_field); 1544 } 1545 } 1546 1547 void MessageDifferencer:: 1548 StreamReporter::PrintUnknownFieldValue(const UnknownField* unknown_field) { 1549 GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field."; 1550 1551 string output; 1552 switch (unknown_field->type()) { 1553 case UnknownField::TYPE_VARINT: 1554 output = SimpleItoa(unknown_field->varint()); 1555 break; 1556 case UnknownField::TYPE_FIXED32: 1557 output = StrCat("0x", strings::Hex(unknown_field->fixed32(), 1558 strings::ZERO_PAD_8)); 1559 break; 1560 case UnknownField::TYPE_FIXED64: 1561 output = StrCat("0x", strings::Hex(unknown_field->fixed64(), 1562 strings::ZERO_PAD_16)); 1563 break; 1564 case UnknownField::TYPE_LENGTH_DELIMITED: 1565 output = StringPrintf("\"%s\"", 1566 CEscape(unknown_field->length_delimited()).c_str()); 1567 break; 1568 case UnknownField::TYPE_GROUP: 1569 // TODO(kenton): Print the contents of the group like we do for 1570 // messages. Requires an equivalent of ShortDebugString() for 1571 // UnknownFieldSet. 1572 output = "{ ... }"; 1573 break; 1574 } 1575 printer_->PrintRaw(output); 1576 } 1577 1578 void MessageDifferencer::StreamReporter::Print(const string& str) { 1579 printer_->Print(str.c_str()); 1580 } 1581 1582 void MessageDifferencer::StreamReporter::ReportAdded( 1583 const Message& message1, 1584 const Message& message2, 1585 const vector<SpecificField>& field_path) { 1586 printer_->Print("added: "); 1587 PrintPath(field_path, false); 1588 printer_->Print(": "); 1589 PrintValue(message2, field_path, false); 1590 printer_->Print("\n"); // Print for newlines. 1591 } 1592 1593 void MessageDifferencer::StreamReporter::ReportDeleted( 1594 const Message& message1, 1595 const Message& message2, 1596 const vector<SpecificField>& field_path) { 1597 printer_->Print("deleted: "); 1598 PrintPath(field_path, true); 1599 printer_->Print(": "); 1600 PrintValue(message1, field_path, true); 1601 printer_->Print("\n"); // Print for newlines 1602 } 1603 1604 void MessageDifferencer::StreamReporter::ReportModified( 1605 const Message& message1, 1606 const Message& message2, 1607 const vector<SpecificField>& field_path) { 1608 if (!report_modified_aggregates_ && field_path.back().field == NULL) { 1609 if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) { 1610 // Any changes to the subfields have already been printed. 1611 return; 1612 } 1613 } else if (!report_modified_aggregates_) { 1614 if (field_path.back().field->cpp_type() == 1615 FieldDescriptor::CPPTYPE_MESSAGE) { 1616 // Any changes to the subfields have already been printed. 1617 return; 1618 } 1619 } 1620 1621 printer_->Print("modified: "); 1622 PrintPath(field_path, true); 1623 if (CheckPathChanged(field_path)) { 1624 printer_->Print(" -> "); 1625 PrintPath(field_path, false); 1626 } 1627 printer_->Print(": "); 1628 PrintValue(message1, field_path, true); 1629 printer_->Print(" -> "); 1630 PrintValue(message2, field_path, false); 1631 printer_->Print("\n"); // Print for newlines. 1632 } 1633 1634 void MessageDifferencer::StreamReporter::ReportMoved( 1635 const Message& message1, 1636 const Message& message2, 1637 const vector<SpecificField>& field_path) { 1638 printer_->Print("moved: "); 1639 PrintPath(field_path, true); 1640 printer_->Print(" -> "); 1641 PrintPath(field_path, false); 1642 printer_->Print(" : "); 1643 PrintValue(message1, field_path, true); 1644 printer_->Print("\n"); // Print for newlines. 1645 } 1646 1647 void MessageDifferencer::StreamReporter::ReportMatched( 1648 const Message& message1, 1649 const Message& message2, 1650 const vector<SpecificField>& field_path) { 1651 printer_->Print("matched: "); 1652 PrintPath(field_path, true); 1653 if (CheckPathChanged(field_path)) { 1654 printer_->Print(" -> "); 1655 PrintPath(field_path, false); 1656 } 1657 printer_->Print(" : "); 1658 PrintValue(message1, field_path, true); 1659 printer_->Print("\n"); // Print for newlines. 1660 } 1661 1662 void MessageDifferencer::StreamReporter::ReportIgnored( 1663 const Message& message1, 1664 const Message& message2, 1665 const vector<SpecificField>& field_path) { 1666 printer_->Print("ignored: "); 1667 PrintPath(field_path, true); 1668 if (CheckPathChanged(field_path)) { 1669 printer_->Print(" -> "); 1670 PrintPath(field_path, false); 1671 } 1672 printer_->Print("\n"); // Print for newlines. 1673 } 1674 1675 void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored( 1676 const Message& message1, const Message& message2, 1677 const vector<SpecificField>& field_path) { 1678 printer_->Print("ignored: "); 1679 PrintPath(field_path, true); 1680 if (CheckPathChanged(field_path)) { 1681 printer_->Print(" -> "); 1682 PrintPath(field_path, false); 1683 } 1684 printer_->Print("\n"); // Print for newlines. 1685 } 1686 1687 } // namespace util 1688 } // namespace protobuf 1689 } // namespace google 1690