Home | History | Annotate | Download | only in util
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: jschorr (at) google.com (Joseph Schorr)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 //
     35 // This file defines static methods and classes for comparing Protocol
     36 // Messages (see //google/protobuf/util/message_differencer.h for more
     37 // information).
     38 
     39 #include <google/protobuf/util/message_differencer.h>
     40 
     41 #include <algorithm>
     42 #include <memory>
     43 #ifndef _SHARED_PTR_H
     44 #include <google/protobuf/stubs/shared_ptr.h>
     45 #endif
     46 #include <utility>
     47 
     48 #include <google/protobuf/stubs/callback.h>
     49 #include <google/protobuf/stubs/common.h>
     50 #include <google/protobuf/stubs/logging.h>
     51 #include <google/protobuf/stubs/stringprintf.h>
     52 #include <google/protobuf/any.h>
     53 #include <google/protobuf/io/printer.h>
     54 #include <google/protobuf/io/zero_copy_stream.h>
     55 #include <google/protobuf/io/zero_copy_stream_impl.h>
     56 #include <google/protobuf/dynamic_message.h>
     57 #include <google/protobuf/text_format.h>
     58 #include <google/protobuf/util/field_comparator.h>
     59 #include <google/protobuf/stubs/strutil.h>
     60 
     61 namespace google {
     62 namespace protobuf {
     63 
     64 namespace util {
     65 
     66 // When comparing a repeated field as map, MultipleFieldMapKeyComparator can
     67 // be used to specify multiple fields as key for key comparison.
     68 // Two elements of a repeated field will be regarded as having the same key
     69 // iff they have the same value for every specified key field.
     70 // Note that you can also specify only one field as key.
     71 class MessageDifferencer::MultipleFieldsMapKeyComparator
     72     : public MessageDifferencer::MapKeyComparator {
     73  public:
     74   MultipleFieldsMapKeyComparator(
     75       MessageDifferencer* message_differencer,
     76       const vector<vector<const FieldDescriptor*> >& key_field_paths)
     77         : message_differencer_(message_differencer),
     78           key_field_paths_(key_field_paths) {
     79     GOOGLE_CHECK(!key_field_paths_.empty());
     80     for (int i = 0; i < key_field_paths_.size(); ++i) {
     81       GOOGLE_CHECK(!key_field_paths_[i].empty());
     82     }
     83   }
     84   MultipleFieldsMapKeyComparator(
     85       MessageDifferencer* message_differencer,
     86       const FieldDescriptor* key)
     87         : message_differencer_(message_differencer) {
     88     vector<const FieldDescriptor*> key_field_path;
     89     key_field_path.push_back(key);
     90     key_field_paths_.push_back(key_field_path);
     91   }
     92   virtual bool IsMatch(
     93       const Message& message1,
     94       const Message& message2,
     95       const vector<SpecificField>& parent_fields) const {
     96     for (int i = 0; i < key_field_paths_.size(); ++i) {
     97       if (!IsMatchInternal(message1, message2, parent_fields,
     98                            key_field_paths_[i], 0)) {
     99         return false;
    100       }
    101     }
    102     return true;
    103   }
    104  private:
    105   bool IsMatchInternal(
    106       const Message& message1,
    107       const Message& message2,
    108       const vector<SpecificField>& parent_fields,
    109       const vector<const FieldDescriptor*>& key_field_path,
    110       int path_index) const {
    111     const FieldDescriptor* field = key_field_path[path_index];
    112     vector<SpecificField> current_parent_fields(parent_fields);
    113     if (path_index == key_field_path.size() - 1) {
    114       if (field->is_repeated()) {
    115         if (!message_differencer_->CompareRepeatedField(
    116             message1, message2, field, &current_parent_fields)) {
    117           return false;
    118         }
    119       } else {
    120         if (!message_differencer_->CompareFieldValueUsingParentFields(
    121             message1, message2, field, -1, -1, &current_parent_fields)) {
    122           return false;
    123         }
    124       }
    125       return true;
    126     } else {
    127       const Reflection* reflection1 = message1.GetReflection();
    128       const Reflection* reflection2 = message2.GetReflection();
    129       bool has_field1 = reflection1->HasField(message1, field);
    130       bool has_field2 = reflection2->HasField(message2, field);
    131       if (!has_field1 && !has_field2) {
    132         return true;
    133       }
    134       if (has_field1 != has_field2) {
    135         return false;
    136       }
    137       SpecificField specific_field;
    138       specific_field.field = field;
    139       current_parent_fields.push_back(specific_field);
    140       return IsMatchInternal(
    141           reflection1->GetMessage(message1, field),
    142           reflection2->GetMessage(message2, field),
    143           current_parent_fields,
    144           key_field_path,
    145           path_index + 1);
    146     }
    147   }
    148   MessageDifferencer* message_differencer_;
    149   vector<vector<const FieldDescriptor*> > key_field_paths_;
    150   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
    151 };
    152 
    153 bool MessageDifferencer::Equals(const Message& message1,
    154                                 const Message& message2) {
    155   MessageDifferencer differencer;
    156 
    157   return differencer.Compare(message1, message2);
    158 }
    159 
    160 bool MessageDifferencer::Equivalent(const Message& message1,
    161                                     const Message& message2) {
    162   MessageDifferencer differencer;
    163   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
    164 
    165   return differencer.Compare(message1, message2);
    166 }
    167 
    168 bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
    169                                              const Message& message2) {
    170   MessageDifferencer differencer;
    171   differencer.set_float_comparison(
    172       MessageDifferencer::APPROXIMATE);
    173 
    174   return differencer.Compare(message1, message2);
    175 }
    176 
    177 bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
    178                                                  const Message& message2) {
    179   MessageDifferencer differencer;
    180   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
    181   differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
    182 
    183   return differencer.Compare(message1, message2);
    184 }
    185 
    186 // ===========================================================================
    187 
    188 MessageDifferencer::MessageDifferencer()
    189     : reporter_(NULL),
    190       field_comparator_(NULL),
    191       message_field_comparison_(EQUAL),
    192       scope_(FULL),
    193       repeated_field_comparison_(AS_LIST),
    194       report_matches_(false),
    195       output_string_(NULL) { }
    196 
    197 MessageDifferencer::~MessageDifferencer() {
    198   for (int i = 0; i < owned_key_comparators_.size(); ++i) {
    199     delete owned_key_comparators_[i];
    200   }
    201   for (int i = 0; i < ignore_criteria_.size(); ++i) {
    202     delete ignore_criteria_[i];
    203   }
    204 }
    205 
    206 void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
    207   GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
    208   field_comparator_ = comparator;
    209 }
    210 
    211 void MessageDifferencer::set_message_field_comparison(
    212     MessageFieldComparison comparison) {
    213   message_field_comparison_ = comparison;
    214 }
    215 
    216 void MessageDifferencer::set_scope(Scope scope) {
    217   scope_ = scope;
    218 }
    219 
    220 MessageDifferencer::Scope MessageDifferencer::scope() {
    221   return scope_;
    222 }
    223 
    224 void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
    225   default_field_comparator_.set_float_comparison(
    226       comparison == EXACT ?
    227       DefaultFieldComparator::EXACT : DefaultFieldComparator::APPROXIMATE);
    228 }
    229 
    230 void MessageDifferencer::set_repeated_field_comparison(
    231     RepeatedFieldComparison comparison) {
    232   repeated_field_comparison_ = comparison;
    233 }
    234 
    235 void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
    236   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
    237                                << field->full_name();
    238   const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
    239   GOOGLE_CHECK(key_comparator == NULL)
    240       << "Cannot treat this repeated field as both Map and Set for"
    241       << " comparison.  Field name is: " << field->full_name();
    242   GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
    243       << "Cannot treat the same field as both SET and LIST. Field name is: "
    244       << field->full_name();
    245   set_fields_.insert(field);
    246 }
    247 
    248 void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
    249   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
    250                               << field->full_name();
    251   const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
    252   GOOGLE_CHECK(key_comparator == NULL)
    253       << "Cannot treat this repeated field as both Map and Set for"
    254       << " comparison.  Field name is: " << field->full_name();
    255   GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
    256       << "Cannot treat the same field as both SET and LIST. Field name is: "
    257       << field->full_name();
    258   list_fields_.insert(field);
    259 }
    260 
    261 void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
    262                                     const FieldDescriptor* key) {
    263   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
    264                                << field->full_name();
    265   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
    266       << "Field has to be message type.  Field name is: "
    267       << field->full_name();
    268   GOOGLE_CHECK(key->containing_type() == field->message_type())
    269       << key->full_name()
    270       << " must be a direct subfield within the repeated field "
    271       << field->full_name() << ", not " << key->containing_type()->full_name();
    272   GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
    273       << "Cannot treat this repeated field as both Map and Set for "
    274       << "comparison.";
    275   GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
    276       << "Cannot treat this repeated field as both Map and List for "
    277       << "comparison.";
    278   MapKeyComparator* key_comparator =
    279       new MultipleFieldsMapKeyComparator(this, key);
    280   owned_key_comparators_.push_back(key_comparator);
    281   map_field_key_comparator_[field] = key_comparator;
    282 }
    283 
    284 void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
    285     const FieldDescriptor* field,
    286     const vector<const FieldDescriptor*>& key_fields) {
    287   vector<vector<const FieldDescriptor*> > key_field_paths;
    288   for (int i = 0; i < key_fields.size(); ++i) {
    289     vector<const FieldDescriptor*> key_field_path;
    290     key_field_path.push_back(key_fields[i]);
    291     key_field_paths.push_back(key_field_path);
    292   }
    293   TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
    294 }
    295 
    296 void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
    297     const FieldDescriptor* field,
    298     const vector<vector<const FieldDescriptor*> >& key_field_paths) {
    299   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
    300                               << field->full_name();
    301   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
    302       << "Field has to be message type.  Field name is: "
    303       << field->full_name();
    304   for (int i = 0; i < key_field_paths.size(); ++i) {
    305     const vector<const FieldDescriptor*>& key_field_path = key_field_paths[i];
    306     for (int j = 0; j < key_field_path.size(); ++j) {
    307       const FieldDescriptor* parent_field =
    308           j == 0 ? field : key_field_path[j - 1];
    309       const FieldDescriptor* child_field = key_field_path[j];
    310       GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
    311           << child_field->full_name()
    312           << " must be a direct subfield within the field: "
    313           << parent_field->full_name();
    314       if (j != 0) {
    315         GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
    316             << parent_field->full_name() << " has to be of type message.";
    317         GOOGLE_CHECK(!parent_field->is_repeated())
    318             << parent_field->full_name() << " cannot be a repeated field.";
    319       }
    320     }
    321   }
    322   GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
    323       << "Cannot treat this repeated field as both Map and Set for "
    324       << "comparison.";
    325   MapKeyComparator* key_comparator =
    326       new MultipleFieldsMapKeyComparator(this, key_field_paths);
    327   owned_key_comparators_.push_back(key_comparator);
    328   map_field_key_comparator_[field] = key_comparator;
    329 }
    330 
    331 void MessageDifferencer::TreatAsMapUsingKeyComparator(
    332     const FieldDescriptor* field,
    333     const MapKeyComparator* key_comparator) {
    334   GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
    335                                << field->full_name();
    336   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
    337       << "Field has to be message type.  Field name is: "
    338       << field->full_name();
    339   GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
    340       << "Cannot treat this repeated field as both Map and Set for "
    341       << "comparison.";
    342   map_field_key_comparator_[field] = key_comparator;
    343 }
    344 
    345 void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
    346   ignore_criteria_.push_back(ignore_criteria);
    347 }
    348 
    349 void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
    350   ignored_fields_.insert(field);
    351 }
    352 
    353 void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
    354                                               double fraction, double margin) {
    355   default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
    356 }
    357 
    358 void MessageDifferencer::ReportDifferencesToString(string* output) {
    359   GOOGLE_DCHECK(output) << "Specified output string was NULL";
    360 
    361   output_string_ = output;
    362   output_string_->clear();
    363 }
    364 
    365 void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
    366   // If an output string is set, clear it to prevent
    367   // it superceding the specified reporter.
    368   if (output_string_) {
    369     output_string_ = NULL;
    370   }
    371 
    372   reporter_ = reporter;
    373 }
    374 
    375 bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
    376                                      const FieldDescriptor* field2) {
    377   // Handle sentinel values (i.e. make sure NULLs are always ordered
    378   // at the end of the list).
    379   if (field1 == NULL) {
    380     return false;
    381   }
    382 
    383   if (field2 == NULL) {
    384     return true;
    385   }
    386 
    387   // Always order fields by their tag number
    388   return (field1->number() < field2->number());
    389 }
    390 
    391 bool MessageDifferencer::Compare(const Message& message1,
    392                                  const Message& message2) {
    393   vector<SpecificField> parent_fields;
    394 
    395   bool result = false;
    396 
    397   // Setup the internal reporter if need be.
    398   if (output_string_) {
    399     io::StringOutputStream output_stream(output_string_);
    400     StreamReporter reporter(&output_stream);
    401     reporter_ = &reporter;
    402     result = Compare(message1, message2, &parent_fields);
    403     reporter_ = NULL;
    404   } else {
    405     result = Compare(message1, message2, &parent_fields);
    406   }
    407 
    408   return result;
    409 }
    410 
    411 bool MessageDifferencer::CompareWithFields(
    412     const Message& message1,
    413     const Message& message2,
    414     const vector<const FieldDescriptor*>& message1_fields_arg,
    415     const vector<const FieldDescriptor*>& message2_fields_arg) {
    416   if (message1.GetDescriptor() != message2.GetDescriptor()) {
    417     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
    418                 << "descriptors.";
    419     return false;
    420   }
    421 
    422   vector<SpecificField> parent_fields;
    423 
    424   bool result = false;
    425 
    426   vector<const FieldDescriptor*> message1_fields(message1_fields_arg);
    427   vector<const FieldDescriptor*> message2_fields(message2_fields_arg);
    428 
    429   std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
    430   std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
    431   // Append NULL sentinel values.
    432   message1_fields.push_back(NULL);
    433   message2_fields.push_back(NULL);
    434 
    435   // Setup the internal reporter if need be.
    436   if (output_string_) {
    437     io::StringOutputStream output_stream(output_string_);
    438     StreamReporter reporter(&output_stream);
    439     reporter_ = &reporter;
    440     result = CompareRequestedFieldsUsingSettings(
    441         message1, message2, message1_fields, message2_fields, &parent_fields);
    442     reporter_ = NULL;
    443   } else {
    444     result = CompareRequestedFieldsUsingSettings(
    445         message1, message2, message1_fields, message2_fields, &parent_fields);
    446   }
    447 
    448   return result;
    449 }
    450 
    451 bool MessageDifferencer::Compare(
    452     const Message& message1,
    453     const Message& message2,
    454     vector<SpecificField>* parent_fields) {
    455   const Descriptor* descriptor1 = message1.GetDescriptor();
    456   const Descriptor* descriptor2 = message2.GetDescriptor();
    457   if (descriptor1 != descriptor2) {
    458     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
    459                 << "descriptors. "
    460                 << descriptor1->full_name() << " vs "
    461                 << descriptor2->full_name();
    462     return false;
    463   }
    464   // Expand google.protobuf.Any payload if possible.
    465   if (descriptor1->full_name() == internal::kAnyFullTypeName) {
    466     google::protobuf::scoped_ptr<Message> data1;
    467     google::protobuf::scoped_ptr<Message> data2;
    468     if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
    469       return Compare(*data1, *data2, parent_fields);
    470     }
    471   }
    472   const Reflection* reflection1 = message1.GetReflection();
    473   const Reflection* reflection2 = message2.GetReflection();
    474 
    475   // Retrieve all the set fields, including extensions.
    476   vector<const FieldDescriptor*> message1_fields;
    477   vector<const FieldDescriptor*> message2_fields;
    478 
    479   reflection1->ListFields(message1, &message1_fields);
    480   reflection2->ListFields(message2, &message2_fields);
    481 
    482   // Add sentinel values to deal with the
    483   // case where the number of the fields in
    484   // each list are different.
    485   message1_fields.push_back(NULL);
    486   message2_fields.push_back(NULL);
    487 
    488   bool unknown_compare_result = true;
    489   // Ignore unknown fields in EQUIVALENT mode
    490   if (message_field_comparison_ != EQUIVALENT) {
    491     const google::protobuf::UnknownFieldSet* unknown_field_set1 =
    492         &reflection1->GetUnknownFields(message1);
    493     const google::protobuf::UnknownFieldSet* unknown_field_set2 =
    494         &reflection2->GetUnknownFields(message2);
    495     if (!CompareUnknownFields(message1, message2,
    496                               *unknown_field_set1, *unknown_field_set2,
    497                               parent_fields)) {
    498       if (reporter_ == NULL) {
    499         return false;
    500       };
    501       unknown_compare_result = false;
    502     }
    503   }
    504 
    505   return CompareRequestedFieldsUsingSettings(
    506       message1, message2,
    507       message1_fields, message2_fields,
    508       parent_fields) && unknown_compare_result;
    509 }
    510 
    511 bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
    512     const Message& message1,
    513     const Message& message2,
    514     const vector<const FieldDescriptor*>& message1_fields,
    515     const vector<const FieldDescriptor*>& message2_fields,
    516     vector<SpecificField>* parent_fields) {
    517   if (scope_ == FULL) {
    518     if (message_field_comparison_ == EQUIVALENT) {
    519       // We need to merge the field lists of both messages (i.e.
    520       // we are merely checking for a difference in field values,
    521       // rather than the addition or deletion of fields).
    522       vector<const FieldDescriptor*> fields_union;
    523       CombineFields(message1_fields, FULL, message2_fields, FULL,
    524                     &fields_union);
    525       return CompareWithFieldsInternal(message1, message2, fields_union,
    526                                        fields_union, parent_fields);
    527     } else {
    528       // Simple equality comparison, use the unaltered field lists.
    529       return CompareWithFieldsInternal(message1, message2, message1_fields,
    530                                        message2_fields, parent_fields);
    531     }
    532   } else {
    533     if (message_field_comparison_ == EQUIVALENT) {
    534       // We use the list of fields for message1 for both messages when
    535       // comparing.  This way, extra fields in message2 are ignored,
    536       // and missing fields in message2 use their default value.
    537       return CompareWithFieldsInternal(message1, message2, message1_fields,
    538                                        message1_fields, parent_fields);
    539     } else {
    540       // We need to consider the full list of fields for message1
    541       // but only the intersection for message2.  This way, any fields
    542       // only present in message2 will be ignored, but any fields only
    543       // present in message1 will be marked as a difference.
    544       vector<const FieldDescriptor*> fields_intersection;
    545       CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL,
    546                     &fields_intersection);
    547       return CompareWithFieldsInternal(message1, message2, message1_fields,
    548                                        fields_intersection, parent_fields);
    549     }
    550   }
    551 }
    552 
    553 void MessageDifferencer::CombineFields(
    554     const vector<const FieldDescriptor*>& fields1,
    555     Scope fields1_scope,
    556     const vector<const FieldDescriptor*>& fields2,
    557     Scope fields2_scope,
    558     vector<const FieldDescriptor*>* combined_fields) {
    559 
    560   int index1 = 0;
    561   int index2 = 0;
    562 
    563   while (index1 < fields1.size() && index2 < fields2.size()) {
    564     const FieldDescriptor* field1 = fields1[index1];
    565     const FieldDescriptor* field2 = fields2[index2];
    566 
    567     if (FieldBefore(field1, field2)) {
    568       if (fields1_scope == FULL) {
    569         combined_fields->push_back(fields1[index1]);
    570       }
    571       ++index1;
    572     } else if (FieldBefore(field2, field1)) {
    573       if (fields2_scope == FULL) {
    574         combined_fields->push_back(fields2[index2]);
    575       }
    576       ++index2;
    577     } else {
    578       combined_fields->push_back(fields1[index1]);
    579       ++index1;
    580       ++index2;
    581     }
    582   }
    583 }
    584 
    585 bool MessageDifferencer::CompareWithFieldsInternal(
    586     const Message& message1,
    587     const Message& message2,
    588     const vector<const FieldDescriptor*>& message1_fields,
    589     const vector<const FieldDescriptor*>& message2_fields,
    590     vector<SpecificField>* parent_fields) {
    591   bool isDifferent = false;
    592   int field_index1 = 0;
    593   int field_index2 = 0;
    594 
    595   const Reflection* reflection1 = message1.GetReflection();
    596   const Reflection* reflection2 = message2.GetReflection();
    597 
    598   while (true) {
    599     const FieldDescriptor* field1 = message1_fields[field_index1];
    600     const FieldDescriptor* field2 = message2_fields[field_index2];
    601 
    602     // Once we have reached sentinel values, we are done the comparison.
    603     if (field1 == NULL && field2 == NULL) {
    604       break;
    605     }
    606 
    607     // Check for differences in the field itself.
    608     if (FieldBefore(field1, field2)) {
    609       // Field 1 is not in the field list for message 2.
    610       if (IsIgnored(message1, message2, field1, *parent_fields)) {
    611         // We are ignoring field1. Report the ignore and move on to
    612         // the next field in message1_fields.
    613         if (reporter_ != NULL) {
    614           SpecificField specific_field;
    615           specific_field.field = field1;
    616 
    617           parent_fields->push_back(specific_field);
    618           reporter_->ReportIgnored(message1, message2, *parent_fields);
    619           parent_fields->pop_back();
    620         }
    621         ++field_index1;
    622         continue;
    623       }
    624 
    625       if (reporter_ != NULL) {
    626         int count = field1->is_repeated() ?
    627             reflection1->FieldSize(message1, field1) : 1;
    628 
    629         for (int i = 0; i < count; ++i) {
    630           SpecificField specific_field;
    631           specific_field.field = field1;
    632           specific_field.index = field1->is_repeated() ? i : -1;
    633 
    634           parent_fields->push_back(specific_field);
    635           reporter_->ReportDeleted(message1, message2, *parent_fields);
    636           parent_fields->pop_back();
    637         }
    638 
    639         isDifferent = true;
    640       } else {
    641         return false;
    642       }
    643 
    644       ++field_index1;
    645       continue;
    646     } else if (FieldBefore(field2, field1)) {
    647       // Field 2 is not in the field list for message 1.
    648       if (IsIgnored(message1, message2, field2, *parent_fields)) {
    649         // We are ignoring field2. Report the ignore and move on to
    650         // the next field in message2_fields.
    651         if (reporter_ != NULL) {
    652           SpecificField specific_field;
    653           specific_field.field = field2;
    654 
    655           parent_fields->push_back(specific_field);
    656           reporter_->ReportIgnored(message1, message2, *parent_fields);
    657           parent_fields->pop_back();
    658         }
    659         ++field_index2;
    660         continue;
    661       }
    662 
    663       if (reporter_ != NULL) {
    664         int count = field2->is_repeated() ?
    665             reflection2->FieldSize(message2, field2) : 1;
    666 
    667         for (int i = 0; i < count; ++i) {
    668           SpecificField specific_field;
    669           specific_field.field = field2;
    670           specific_field.index = field2->is_repeated() ? i : -1;
    671           specific_field.new_index = specific_field.index;
    672 
    673           parent_fields->push_back(specific_field);
    674           reporter_->ReportAdded(message1, message2, *parent_fields);
    675           parent_fields->pop_back();
    676         }
    677 
    678         isDifferent = true;
    679       } else {
    680         return false;
    681       }
    682 
    683       ++field_index2;
    684       continue;
    685     }
    686 
    687     // By this point, field1 and field2 are guarenteed to point to the same
    688     // field, so we can now compare the values.
    689     if (IsIgnored(message1, message2, field1, *parent_fields)) {
    690       // Ignore this field. Report and move on.
    691       if (reporter_ != NULL) {
    692         SpecificField specific_field;
    693         specific_field.field = field1;
    694 
    695         parent_fields->push_back(specific_field);
    696         reporter_->ReportIgnored(message1, message2, *parent_fields);
    697         parent_fields->pop_back();
    698       }
    699 
    700       ++field_index1;
    701       ++field_index2;
    702       continue;
    703     }
    704 
    705     bool fieldDifferent = false;
    706     if (field1->is_repeated()) {
    707       fieldDifferent = !CompareRepeatedField(message1, message2, field1,
    708                                              parent_fields);
    709       if (fieldDifferent) {
    710         if (reporter_ == NULL) return false;
    711         isDifferent = true;
    712       }
    713     } else {
    714       fieldDifferent = !CompareFieldValueUsingParentFields(
    715           message1, message2, field1, -1, -1, parent_fields);
    716 
    717       // If we have found differences, either report them or terminate if
    718       // no reporter is present.
    719       if (fieldDifferent && reporter_ == NULL) {
    720         return false;
    721       }
    722 
    723       if (reporter_ != NULL) {
    724         SpecificField specific_field;
    725         specific_field.field = field1;
    726         parent_fields->push_back(specific_field);
    727         if (fieldDifferent) {
    728           reporter_->ReportModified(message1, message2, *parent_fields);
    729           isDifferent = true;
    730         } else if (report_matches_) {
    731           reporter_->ReportMatched(message1, message2, *parent_fields);
    732         }
    733         parent_fields->pop_back();
    734       }
    735     }
    736     // Increment the field indicies.
    737     ++field_index1;
    738     ++field_index2;
    739   }
    740 
    741   return !isDifferent;
    742 }
    743 
    744 bool MessageDifferencer::IsMatch(const FieldDescriptor* repeated_field,
    745                                  const MapKeyComparator* key_comparator,
    746                                  const Message* message1,
    747                                  const Message* message2,
    748                                  const vector<SpecificField>& parent_fields,
    749                                  int index1, int index2) {
    750   vector<SpecificField> current_parent_fields(parent_fields);
    751   if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
    752     return CompareFieldValueUsingParentFields(
    753         *message1, *message2, repeated_field, index1, index2,
    754         &current_parent_fields);
    755   }
    756   // Back up the Reporter and output_string_.  They will be reset in the
    757   // following code.
    758   Reporter* backup_reporter = reporter_;
    759   string* output_string = output_string_;
    760   reporter_ = NULL;
    761   output_string_ = NULL;
    762   bool match;
    763 
    764   if (key_comparator == NULL) {
    765     match = CompareFieldValueUsingParentFields(
    766         *message1, *message2, repeated_field, index1, index2,
    767         &current_parent_fields);
    768   } else {
    769     const Reflection* reflection1 = message1->GetReflection();
    770     const Reflection* reflection2 = message2->GetReflection();
    771     const Message& m1 =
    772         reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
    773     const Message& m2 =
    774         reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
    775     SpecificField specific_field;
    776     specific_field.field = repeated_field;
    777     current_parent_fields.push_back(specific_field);
    778     match = key_comparator->IsMatch(m1, m2, current_parent_fields);
    779   }
    780 
    781   reporter_ = backup_reporter;
    782   output_string_ = output_string;
    783   return match;
    784 }
    785 
    786 bool MessageDifferencer::CompareRepeatedField(
    787     const Message& message1,
    788     const Message& message2,
    789     const FieldDescriptor* repeated_field,
    790     vector<SpecificField>* parent_fields) {
    791   // the input FieldDescriptor is guaranteed to be repeated field.
    792   const Reflection* reflection1 = message1.GetReflection();
    793   const Reflection* reflection2 = message2.GetReflection();
    794   const int count1 = reflection1->FieldSize(message1, repeated_field);
    795   const int count2 = reflection2->FieldSize(message2, repeated_field);
    796   const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
    797 
    798   // If the field is not treated as subset and no detailed reports is needed,
    799   // we do a quick check on the number of the elements to avoid unnecessary
    800   // comparison.
    801   if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
    802     return false;
    803   }
    804   // A match can never be found if message1 has more items than message2.
    805   if (count1 > count2 && reporter_ == NULL) {
    806     return false;
    807   }
    808 
    809   // These two list are used for store the index of the correspondent
    810   // element in peer repeated field.
    811   vector<int> match_list1;
    812   vector<int> match_list2;
    813 
    814   // Try to match indices of the repeated fields. Return false if match fails
    815   // and there's no detailed report needed.
    816   if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
    817                                  *parent_fields, &match_list1, &match_list2) &&
    818       reporter_ == NULL) {
    819     return false;
    820   }
    821 
    822   bool fieldDifferent = false;
    823   SpecificField specific_field;
    824   specific_field.field = repeated_field;
    825 
    826   // At this point, we have already matched pairs of fields (with the reporting
    827   // to be done later). Now to check if the paired elements are different.
    828   for (int i = 0; i < count1; i++) {
    829     if (match_list1[i] == -1) continue;
    830     specific_field.index = i;
    831     specific_field.new_index = match_list1[i];
    832 
    833     const bool result = CompareFieldValueUsingParentFields(
    834         message1, message2, repeated_field, i, specific_field.new_index,
    835         parent_fields);
    836 
    837     // If we have found differences, either report them or terminate if
    838     // no reporter is present. Note that ReportModified, ReportMoved, and
    839     // ReportMatched are all mutually exclusive.
    840     if (!result) {
    841       if (reporter_ == NULL) return false;
    842       parent_fields->push_back(specific_field);
    843       reporter_->ReportModified(message1, message2, *parent_fields);
    844       parent_fields->pop_back();
    845       fieldDifferent = true;
    846     } else if (reporter_ != NULL &&
    847                specific_field.index != specific_field.new_index) {
    848       parent_fields->push_back(specific_field);
    849       reporter_->ReportMoved(message1, message2, *parent_fields);
    850       parent_fields->pop_back();
    851     } else if (report_matches_ && reporter_ != NULL) {
    852       parent_fields->push_back(specific_field);
    853       reporter_->ReportMatched(message1, message2, *parent_fields);
    854       parent_fields->pop_back();
    855     }
    856   }
    857 
    858   // Report any remaining additions or deletions.
    859   for (int i = 0; i < count2; ++i) {
    860     if (match_list2[i] != -1) continue;
    861     if (!treated_as_subset) {
    862       fieldDifferent = true;
    863     }
    864 
    865     if (reporter_ == NULL) continue;
    866     specific_field.index = i;
    867     specific_field.new_index = i;
    868     parent_fields->push_back(specific_field);
    869     reporter_->ReportAdded(message1, message2, *parent_fields);
    870     parent_fields->pop_back();
    871   }
    872 
    873   for (int i = 0; i < count1; ++i) {
    874     if (match_list1[i] != -1) continue;
    875     specific_field.index = i;
    876     parent_fields->push_back(specific_field);
    877     reporter_->ReportDeleted(message1, message2, *parent_fields);
    878     parent_fields->pop_back();
    879     fieldDifferent = true;
    880   }
    881   return !fieldDifferent;
    882 }
    883 
    884 bool MessageDifferencer::CompareFieldValue(const Message& message1,
    885                                            const Message& message2,
    886                                            const FieldDescriptor* field,
    887                                            int index1,
    888                                            int index2) {
    889   return CompareFieldValueUsingParentFields(message1, message2, field, index1,
    890                                             index2, NULL);
    891 }
    892 
    893 bool MessageDifferencer::CompareFieldValueUsingParentFields(
    894     const Message& message1, const Message& message2,
    895     const FieldDescriptor* field, int index1, int index2,
    896     vector<SpecificField>* parent_fields) {
    897   FieldContext field_context(parent_fields);
    898   FieldComparator::ComparisonResult result = GetFieldComparisonResult(
    899       message1, message2, field, index1, index2, &field_context);
    900 
    901   if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
    902       result == FieldComparator::RECURSE) {
    903     // Get the nested messages and compare them using one of the Compare
    904     // methods.
    905     const Reflection* reflection1 = message1.GetReflection();
    906     const Reflection* reflection2 = message2.GetReflection();
    907     const Message& m1 = field->is_repeated() ?
    908         reflection1->GetRepeatedMessage(message1, field, index1) :
    909         reflection1->GetMessage(message1, field);
    910     const Message& m2 = field->is_repeated() ?
    911         reflection2->GetRepeatedMessage(message2, field, index2) :
    912         reflection2->GetMessage(message2, field);
    913 
    914     // parent_fields is used in calls to Reporter methods.
    915     if (parent_fields != NULL) {
    916       // Append currently compared field to the end of parent_fields.
    917       SpecificField specific_field;
    918       specific_field.field = field;
    919       specific_field.index = index1;
    920       specific_field.new_index = index2;
    921       parent_fields->push_back(specific_field);
    922       const bool compare_result = Compare(m1, m2, parent_fields);
    923       parent_fields->pop_back();
    924       return compare_result;
    925     } else {
    926       // Recreates parent_fields as if m1 and m2 had no parents.
    927       return Compare(m1, m2);
    928     }
    929   } else {
    930     return (result == FieldComparator::SAME);
    931   }
    932 }
    933 
    934 bool MessageDifferencer::CheckPathChanged(
    935     const vector<SpecificField>& field_path) {
    936   for (int i = 0; i < field_path.size(); ++i) {
    937     if (field_path[i].index != field_path[i].new_index) return true;
    938   }
    939   return false;
    940 }
    941 
    942 bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
    943   if (!field->is_repeated()) return false;
    944   if (field->is_map()) return true;
    945   if (repeated_field_comparison_ == AS_SET)
    946     return list_fields_.find(field) == list_fields_.end();
    947   return (set_fields_.find(field) != set_fields_.end());
    948 }
    949 
    950 bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
    951   return scope_ == PARTIAL &&
    952       (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
    953 }
    954 
    955 bool MessageDifferencer::IsIgnored(
    956     const Message& message1,
    957     const Message& message2,
    958     const FieldDescriptor* field,
    959     const vector<SpecificField>& parent_fields) {
    960   if (ignored_fields_.find(field) != ignored_fields_.end()) {
    961     return true;
    962   }
    963   for (int i = 0; i < ignore_criteria_.size(); ++i) {
    964     if (ignore_criteria_[i]->IsIgnored(message1, message2, field,
    965                                        parent_fields)) {
    966       return true;
    967     }
    968   }
    969   return false;
    970 }
    971 
    972 bool MessageDifferencer::IsUnknownFieldIgnored(
    973     const Message& message1, const Message& message2,
    974     const SpecificField& field, const vector<SpecificField>& parent_fields) {
    975   for (int i = 0; i < ignore_criteria_.size(); ++i) {
    976     if (ignore_criteria_[i]->IsUnknownFieldIgnored(message1, message2, field,
    977                                                    parent_fields)) {
    978       return true;
    979     }
    980   }
    981   return false;
    982 }
    983 
    984 const MessageDifferencer::MapKeyComparator* MessageDifferencer
    985     ::GetMapKeyComparator(const FieldDescriptor* field) {
    986   if (!field->is_repeated()) return NULL;
    987   if (map_field_key_comparator_.find(field) !=
    988       map_field_key_comparator_.end()) {
    989     return map_field_key_comparator_[field];
    990   }
    991   return NULL;
    992 }
    993 
    994 namespace {
    995 
    996 typedef pair<int, const UnknownField*> IndexUnknownFieldPair;
    997 
    998 struct UnknownFieldOrdering {
    999   inline bool operator()(const IndexUnknownFieldPair& a,
   1000                          const IndexUnknownFieldPair& b) const {
   1001     if (a.second->number() < b.second->number()) return true;
   1002     if (a.second->number() > b.second->number()) return false;
   1003     return a.second->type() < b.second->type();
   1004   }
   1005 };
   1006 
   1007 }  // namespace
   1008 
   1009 bool MessageDifferencer::UnpackAny(const Message& any,
   1010                                    google::protobuf::scoped_ptr<Message>* data) {
   1011   const Reflection* reflection = any.GetReflection();
   1012   const FieldDescriptor* type_url_field;
   1013   const FieldDescriptor* value_field;
   1014   if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
   1015     return false;
   1016   }
   1017   const string& type_url = reflection->GetString(any, type_url_field);
   1018   string full_type_name;
   1019   if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
   1020     return false;
   1021   }
   1022 
   1023   const google::protobuf::Descriptor* desc =
   1024       any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
   1025           full_type_name);
   1026   if (desc == NULL) {
   1027     GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
   1028     return false;
   1029   }
   1030 
   1031   if (dynamic_message_factory_ == NULL) {
   1032     dynamic_message_factory_.reset(new DynamicMessageFactory());
   1033   }
   1034   data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
   1035   string serialized_value = reflection->GetString(any, value_field);
   1036   if (!(*data)->ParseFromString(serialized_value)) {
   1037     GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
   1038     return false;
   1039   }
   1040   return true;
   1041 }
   1042 
   1043 bool MessageDifferencer::CompareUnknownFields(
   1044     const Message& message1, const Message& message2,
   1045     const google::protobuf::UnknownFieldSet& unknown_field_set1,
   1046     const google::protobuf::UnknownFieldSet& unknown_field_set2,
   1047     vector<SpecificField>* parent_field) {
   1048   // Ignore unknown fields in EQUIVALENT mode.
   1049   if (message_field_comparison_ == EQUIVALENT) return true;
   1050 
   1051   if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
   1052     return true;
   1053   }
   1054 
   1055   bool is_different = false;
   1056 
   1057   // We first sort the unknown fields by field number and type (in other words,
   1058   // in tag order), making sure to preserve ordering of values with the same
   1059   // tag.  This allows us to report only meaningful differences between the
   1060   // two sets -- that is, differing values for the same tag.  We use
   1061   // IndexUnknownFieldPairs to keep track of the field's original index for
   1062   // reporting purposes.
   1063   vector<IndexUnknownFieldPair> fields1;  // unknown_field_set1, sorted
   1064   vector<IndexUnknownFieldPair> fields2;  // unknown_field_set2, sorted
   1065   fields1.reserve(unknown_field_set1.field_count());
   1066   fields2.reserve(unknown_field_set2.field_count());
   1067 
   1068   for (int i = 0; i < unknown_field_set1.field_count(); i++) {
   1069     fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
   1070   }
   1071   for (int i = 0; i < unknown_field_set2.field_count(); i++) {
   1072     fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
   1073   }
   1074 
   1075   UnknownFieldOrdering is_before;
   1076   std::stable_sort(fields1.begin(), fields1.end(), is_before);
   1077   std::stable_sort(fields2.begin(), fields2.end(), is_before);
   1078 
   1079   // In order to fill in SpecificField::index, we have to keep track of how
   1080   // many values we've seen with the same field number and type.
   1081   // current_repeated points at the first field in this range, and
   1082   // current_repeated_start{1,2} are the indexes of the first field in the
   1083   // range within fields1 and fields2.
   1084   const UnknownField* current_repeated = NULL;
   1085   int current_repeated_start1 = 0;
   1086   int current_repeated_start2 = 0;
   1087 
   1088   // Now that we have two sorted lists, we can detect fields which appear only
   1089   // in one list or the other by traversing them simultaneously.
   1090   int index1 = 0;
   1091   int index2 = 0;
   1092   while (index1 < fields1.size() || index2 < fields2.size()) {
   1093     enum { ADDITION, DELETION, MODIFICATION, COMPARE_GROUPS,
   1094       NO_CHANGE } change_type;
   1095 
   1096     // focus_field is the field we're currently reporting on.  (In the case
   1097     // of a modification, it's the field on the left side.)
   1098     const UnknownField* focus_field;
   1099     bool match = false;
   1100 
   1101     if (index2 == fields2.size() ||
   1102         (index1 < fields1.size() &&
   1103           is_before(fields1[index1], fields2[index2]))) {
   1104       // fields1[index1] is not present in fields2.
   1105       change_type = DELETION;
   1106       focus_field = fields1[index1].second;
   1107     } else if (index1 == fields1.size() ||
   1108                is_before(fields2[index2], fields1[index1])) {
   1109       // fields2[index2] is not present in fields1.
   1110       if (scope_ == PARTIAL) {
   1111         // Ignore.
   1112         ++index2;
   1113         continue;
   1114       }
   1115       change_type = ADDITION;
   1116       focus_field = fields2[index2].second;
   1117     } else {
   1118       // Field type and number are the same.  See if the values differ.
   1119       change_type = MODIFICATION;
   1120       focus_field = fields1[index1].second;
   1121 
   1122       switch (focus_field->type()) {
   1123         case UnknownField::TYPE_VARINT:
   1124           match = fields1[index1].second->varint() ==
   1125                   fields2[index2].second->varint();
   1126           break;
   1127         case UnknownField::TYPE_FIXED32:
   1128           match = fields1[index1].second->fixed32() ==
   1129                   fields2[index2].second->fixed32();
   1130           break;
   1131         case UnknownField::TYPE_FIXED64:
   1132           match = fields1[index1].second->fixed64() ==
   1133                   fields2[index2].second->fixed64();
   1134           break;
   1135         case UnknownField::TYPE_LENGTH_DELIMITED:
   1136           match = fields1[index1].second->length_delimited() ==
   1137                   fields2[index2].second->length_delimited();
   1138           break;
   1139         case UnknownField::TYPE_GROUP:
   1140           // We must deal with this later, after building the SpecificField.
   1141           change_type = COMPARE_GROUPS;
   1142           break;
   1143       }
   1144       if (match && change_type != COMPARE_GROUPS) {
   1145         change_type = NO_CHANGE;
   1146       }
   1147     }
   1148 
   1149     if (current_repeated == NULL ||
   1150         focus_field->number() != current_repeated->number() ||
   1151         focus_field->type() != current_repeated->type()) {
   1152       // We've started a new repeated field.
   1153       current_repeated = focus_field;
   1154       current_repeated_start1 = index1;
   1155       current_repeated_start2 = index2;
   1156     }
   1157 
   1158     if (change_type == NO_CHANGE && reporter_ == NULL) {
   1159       // Fields were already compared and matched and we have no reporter.
   1160       ++index1;
   1161       ++index2;
   1162       continue;
   1163     }
   1164 
   1165     // Build the SpecificField.  This is slightly complicated.
   1166     SpecificField specific_field;
   1167     specific_field.unknown_field_number = focus_field->number();
   1168     specific_field.unknown_field_type = focus_field->type();
   1169 
   1170     specific_field.unknown_field_set1 = &unknown_field_set1;
   1171     specific_field.unknown_field_set2 = &unknown_field_set2;
   1172 
   1173     if (change_type != ADDITION) {
   1174       specific_field.unknown_field_index1 = fields1[index1].first;
   1175     }
   1176     if (change_type != DELETION) {
   1177       specific_field.unknown_field_index2 = fields2[index2].first;
   1178     }
   1179 
   1180     // Calculate the field index.
   1181     if (change_type == ADDITION) {
   1182       specific_field.index = index2 - current_repeated_start2;
   1183       specific_field.new_index = index2 - current_repeated_start2;
   1184     } else {
   1185       specific_field.index = index1 - current_repeated_start1;
   1186       specific_field.new_index = index2 - current_repeated_start2;
   1187     }
   1188 
   1189     if (IsUnknownFieldIgnored(message1, message2, specific_field,
   1190                               *parent_field)) {
   1191       if (reporter_ != NULL) {
   1192         parent_field->push_back(specific_field);
   1193         reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
   1194         parent_field->pop_back();
   1195       }
   1196       return true;
   1197     }
   1198 
   1199     if (change_type == ADDITION || change_type == DELETION ||
   1200         change_type == MODIFICATION) {
   1201       if (reporter_ == NULL) {
   1202         // We found a difference and we have no reproter.
   1203         return false;
   1204       }
   1205       is_different = true;
   1206     }
   1207 
   1208     parent_field->push_back(specific_field);
   1209 
   1210     switch (change_type) {
   1211       case ADDITION:
   1212         reporter_->ReportAdded(message1, message2, *parent_field);
   1213         ++index2;
   1214         break;
   1215       case DELETION:
   1216         reporter_->ReportDeleted(message1, message2, *parent_field);
   1217         ++index1;
   1218         break;
   1219       case MODIFICATION:
   1220         reporter_->ReportModified(message1, message2, *parent_field);
   1221         ++index1;
   1222         ++index2;
   1223         break;
   1224       case COMPARE_GROUPS:
   1225         if (!CompareUnknownFields(message1, message2,
   1226                                   fields1[index1].second->group(),
   1227                                   fields2[index2].second->group(),
   1228                                   parent_field)) {
   1229           if (reporter_ == NULL) return false;
   1230           is_different = true;
   1231           reporter_->ReportModified(message1, message2, *parent_field);
   1232         }
   1233         ++index1;
   1234         ++index2;
   1235         break;
   1236       case NO_CHANGE:
   1237         ++index1;
   1238         ++index2;
   1239         if (report_matches_) {
   1240           reporter_->ReportMatched(message1, message2, *parent_field);
   1241         }
   1242     }
   1243 
   1244     parent_field->pop_back();
   1245   }
   1246 
   1247   return !is_different;
   1248 }
   1249 
   1250 namespace {
   1251 
   1252 // Find maximum bipartite matching using the argumenting path algorithm.
   1253 class MaximumMatcher {
   1254  public:
   1255   typedef ResultCallback2<bool, int, int> NodeMatchCallback;
   1256   // MaximumMatcher takes ownership of the passed in callback and uses it to
   1257   // determine whether a node on the left side of the bipartial graph matches
   1258   // a node on the right side. count1 is the number of nodes on the left side
   1259   // of the graph and count2 to is the number of nodes on the right side.
   1260   // Every node is referred to using 0-based indices.
   1261   // If a maximum match is found, the result will be stored in match_list1 and
   1262   // match_list2. match_list1[i] == j means the i-th node on the left side is
   1263   // matched to the j-th node on the right side and match_list2[x] == y means
   1264   // the x-th node on the right side is matched to y-th node on the left side.
   1265   // match_list1[i] == -1 means the node is not matched. Same with match_list2.
   1266   MaximumMatcher(int count1, int count2, NodeMatchCallback* callback,
   1267                  vector<int>* match_list1, vector<int>* match_list2);
   1268   // Find a maximum match and return the number of matched node pairs.
   1269   // If early_return is true, this method will return 0 immediately when it
   1270   // finds that not all nodes on the left side can be matched.
   1271   int FindMaximumMatch(bool early_return);
   1272  private:
   1273   // Determines whether the node on the left side of the bipartial graph
   1274   // matches the one on the right side.
   1275   bool Match(int left, int right);
   1276   // Find an argumenting path starting from the node v on the left side. If a
   1277   // path can be found, update match_list2_ to reflect the path and return
   1278   // true.
   1279   bool FindArgumentPathDFS(int v, vector<bool>* visited);
   1280 
   1281   int count1_;
   1282   int count2_;
   1283   google::protobuf::scoped_ptr<NodeMatchCallback> match_callback_;
   1284   map<pair<int, int>, bool> cached_match_results_;
   1285   vector<int>* match_list1_;
   1286   vector<int>* match_list2_;
   1287   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
   1288 };
   1289 
   1290 MaximumMatcher::MaximumMatcher(int count1, int count2,
   1291                                NodeMatchCallback* callback,
   1292                                vector<int>* match_list1,
   1293                                vector<int>* match_list2)
   1294     : count1_(count1), count2_(count2), match_callback_(callback),
   1295       match_list1_(match_list1), match_list2_(match_list2) {
   1296   match_list1_->assign(count1, -1);
   1297   match_list2_->assign(count2, -1);
   1298 }
   1299 
   1300 int MaximumMatcher::FindMaximumMatch(bool early_return) {
   1301   int result = 0;
   1302   for (int i = 0; i < count1_; ++i) {
   1303     vector<bool> visited(count1_);
   1304     if (FindArgumentPathDFS(i, &visited)) {
   1305       ++result;
   1306     } else if (early_return) {
   1307       return 0;
   1308     }
   1309   }
   1310   // Backfill match_list1_ as we only filled match_list2_ when finding
   1311   // argumenting pathes.
   1312   for (int i = 0; i < count2_; ++i) {
   1313     if ((*match_list2_)[i] != -1) {
   1314       (*match_list1_)[(*match_list2_)[i]] = i;
   1315     }
   1316   }
   1317   return result;
   1318 }
   1319 
   1320 bool MaximumMatcher::Match(int left, int right) {
   1321   pair<int, int> p(left, right);
   1322   map<pair<int, int>, bool>::iterator it = cached_match_results_.find(p);
   1323   if (it != cached_match_results_.end()) {
   1324     return it->second;
   1325   }
   1326   cached_match_results_[p] = match_callback_->Run(left, right);
   1327   return cached_match_results_[p];
   1328 }
   1329 
   1330 bool MaximumMatcher::FindArgumentPathDFS(int v, vector<bool>* visited) {
   1331   (*visited)[v] = true;
   1332   // We try to match those un-matched nodes on the right side first. This is
   1333   // the step that the navie greedy matching algorithm uses. In the best cases
   1334   // where the greedy algorithm can find a maximum matching, we will always
   1335   // find a match in this step and the performance will be identical to the
   1336   // greedy algorithm.
   1337   for (int i = 0; i < count2_; ++i) {
   1338     int matched = (*match_list2_)[i];
   1339     if (matched == -1 && Match(v, i)) {
   1340       (*match_list2_)[i] = v;
   1341       return true;
   1342     }
   1343   }
   1344   // Then we try those already matched nodes and see if we can find an
   1345   // alternaive match for the node matched to them.
   1346   // The greedy algorithm will stop before this and fail to produce the
   1347   // correct result.
   1348   for (int i = 0; i < count2_; ++i) {
   1349     int matched = (*match_list2_)[i];
   1350     if (matched != -1 && Match(v, i)) {
   1351       if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
   1352         (*match_list2_)[i] = v;
   1353         return true;
   1354       }
   1355     }
   1356   }
   1357   return false;
   1358 }
   1359 
   1360 }  // namespace
   1361 
   1362 bool MessageDifferencer::MatchRepeatedFieldIndices(
   1363     const Message& message1,
   1364     const Message& message2,
   1365     const FieldDescriptor* repeated_field,
   1366     const vector<SpecificField>& parent_fields,
   1367     vector<int>* match_list1,
   1368     vector<int>* match_list2) {
   1369   const int count1 =
   1370       message1.GetReflection()->FieldSize(message1, repeated_field);
   1371   const int count2 =
   1372       message2.GetReflection()->FieldSize(message2, repeated_field);
   1373   const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
   1374 
   1375   match_list1->assign(count1, -1);
   1376   match_list2->assign(count2, -1);
   1377 
   1378   SpecificField specific_field;
   1379   specific_field.field = repeated_field;
   1380 
   1381   bool success = true;
   1382   // Find potential match if this is a special repeated field.
   1383   if (key_comparator != NULL || IsTreatedAsSet(repeated_field)) {
   1384     if (scope_ == PARTIAL) {
   1385       // When partial matching is enabled, Compare(a, b) && Compare(a, c)
   1386       // doesn't neccessarily imply Compare(b, c). Therefore a naive greedy
   1387       // algorithm will fail to find a maximum matching.
   1388       // Here we use the argumenting path algorithm.
   1389       MaximumMatcher::NodeMatchCallback* callback =
   1390           ::google::protobuf::internal::NewPermanentCallback(
   1391               this, &MessageDifferencer::IsMatch,
   1392               repeated_field, key_comparator,
   1393               &message1, &message2, parent_fields);
   1394       MaximumMatcher matcher(count1, count2, callback, match_list1,
   1395                              match_list2);
   1396       // If diff info is not needed, we should end the matching process as
   1397       // soon as possible if not all items can be matched.
   1398       bool early_return = (reporter_ == NULL);
   1399       int match_count = matcher.FindMaximumMatch(early_return);
   1400       if (match_count != count1 && reporter_ == NULL) return false;
   1401       success = success && (match_count == count1);
   1402     } else {
   1403       for (int i = 0; i < count1; ++i) {
   1404         // Indicates any matched elements for this repeated field.
   1405         bool match = false;
   1406 
   1407         specific_field.index = i;
   1408         specific_field.new_index = i;
   1409 
   1410         for (int j = 0; j < count2; j++) {
   1411           if (match_list2->at(j) != -1) continue;
   1412           specific_field.index = i;
   1413           specific_field.new_index = j;
   1414 
   1415           match = IsMatch(repeated_field, key_comparator,
   1416                           &message1, &message2, parent_fields, i, j);
   1417 
   1418           if (match) {
   1419             match_list1->at(specific_field.index) = specific_field.new_index;
   1420             match_list2->at(specific_field.new_index) = specific_field.index;
   1421             break;
   1422           }
   1423         }
   1424         if (!match && reporter_ == NULL) return false;
   1425         success = success && match;
   1426       }
   1427     }
   1428   } else {
   1429     // If this field should be treated as list, just label the match_list.
   1430     for (int i = 0; i < count1 && i < count2; i++) {
   1431       match_list1->at(i) = i;
   1432       match_list2->at(i) = i;
   1433     }
   1434   }
   1435 
   1436   return success;
   1437 }
   1438 
   1439 FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
   1440     const Message& message1, const Message& message2,
   1441     const FieldDescriptor* field, int index1, int index2,
   1442     const FieldContext* field_context) {
   1443   FieldComparator* comparator = field_comparator_ != NULL ?
   1444       field_comparator_ : &default_field_comparator_;
   1445   return comparator->Compare(message1, message2, field,
   1446                              index1, index2, field_context);
   1447 }
   1448 
   1449 // ===========================================================================
   1450 
   1451 MessageDifferencer::Reporter::Reporter() { }
   1452 MessageDifferencer::Reporter::~Reporter() {}
   1453 
   1454 // ===========================================================================
   1455 
   1456 MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
   1457 MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
   1458 
   1459 // ===========================================================================
   1460 
   1461 MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
   1462 MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
   1463 
   1464 // ===========================================================================
   1465 
   1466 // Note that the printer's delimiter is not used, because if we are given a
   1467 // printer, we don't know its delimiter.
   1468 MessageDifferencer::StreamReporter::StreamReporter(
   1469     io::ZeroCopyOutputStream* output) : printer_(new io::Printer(output, '$')),
   1470                                         delete_printer_(true),
   1471                                         report_modified_aggregates_(false) { }
   1472 
   1473 MessageDifferencer::StreamReporter::StreamReporter(
   1474     io::Printer* printer) : printer_(printer),
   1475                             delete_printer_(false),
   1476                             report_modified_aggregates_(false) { }
   1477 
   1478 MessageDifferencer::StreamReporter::~StreamReporter() {
   1479   if (delete_printer_) delete printer_;
   1480 }
   1481 
   1482 void MessageDifferencer::StreamReporter::PrintPath(
   1483     const vector<SpecificField>& field_path, bool left_side) {
   1484   for (int i = 0; i < field_path.size(); ++i) {
   1485     if (i > 0) {
   1486       printer_->Print(".");
   1487     }
   1488 
   1489     SpecificField specific_field = field_path[i];
   1490 
   1491     if (specific_field.field != NULL) {
   1492       if (specific_field.field->is_extension()) {
   1493         printer_->Print("($name$)", "name",
   1494                         specific_field.field->full_name());
   1495       } else {
   1496         printer_->PrintRaw(specific_field.field->name());
   1497       }
   1498     } else {
   1499       printer_->PrintRaw(SimpleItoa(specific_field.unknown_field_number));
   1500     }
   1501     if (left_side && specific_field.index >= 0) {
   1502       printer_->Print("[$name$]", "name", SimpleItoa(specific_field.index));
   1503     }
   1504     if (!left_side && specific_field.new_index >= 0) {
   1505       printer_->Print("[$name$]", "name", SimpleItoa(specific_field.new_index));
   1506     }
   1507   }
   1508 }
   1509 
   1510 void MessageDifferencer::
   1511 StreamReporter::PrintValue(const Message& message,
   1512                            const vector<SpecificField>& field_path,
   1513                            bool left_side) {
   1514   const SpecificField& specific_field = field_path.back();
   1515   const FieldDescriptor* field = specific_field.field;
   1516   if (field != NULL) {
   1517     string output;
   1518     int index = left_side ? specific_field.index : specific_field.new_index;
   1519     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   1520       const Reflection* reflection = message.GetReflection();
   1521       const Message& field_message = field->is_repeated() ?
   1522           reflection->GetRepeatedMessage(message, field, index) :
   1523           reflection->GetMessage(message, field);
   1524       output = field_message.ShortDebugString();
   1525       if (output.empty()) {
   1526         printer_->Print("{ }");
   1527       } else {
   1528         printer_->Print("{ $name$ }", "name", output);
   1529       }
   1530     } else {
   1531       TextFormat::PrintFieldValueToString(message, field, index, &output);
   1532       printer_->PrintRaw(output);
   1533     }
   1534   } else {
   1535     const UnknownFieldSet* unknown_fields =
   1536         (left_side ?
   1537          specific_field.unknown_field_set1 :
   1538          specific_field.unknown_field_set2);
   1539     const UnknownField* unknown_field = &unknown_fields->field(
   1540         left_side ?
   1541         specific_field.unknown_field_index1 :
   1542         specific_field.unknown_field_index2);
   1543     PrintUnknownFieldValue(unknown_field);
   1544   }
   1545 }
   1546 
   1547 void MessageDifferencer::
   1548 StreamReporter::PrintUnknownFieldValue(const UnknownField* unknown_field) {
   1549   GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
   1550 
   1551   string output;
   1552   switch (unknown_field->type()) {
   1553     case UnknownField::TYPE_VARINT:
   1554       output = SimpleItoa(unknown_field->varint());
   1555       break;
   1556     case UnknownField::TYPE_FIXED32:
   1557       output = StrCat("0x", strings::Hex(unknown_field->fixed32(),
   1558                                          strings::ZERO_PAD_8));
   1559       break;
   1560     case UnknownField::TYPE_FIXED64:
   1561       output = StrCat("0x", strings::Hex(unknown_field->fixed64(),
   1562                                          strings::ZERO_PAD_16));
   1563       break;
   1564     case UnknownField::TYPE_LENGTH_DELIMITED:
   1565       output = StringPrintf("\"%s\"",
   1566           CEscape(unknown_field->length_delimited()).c_str());
   1567       break;
   1568     case UnknownField::TYPE_GROUP:
   1569       // TODO(kenton):  Print the contents of the group like we do for
   1570       //   messages.  Requires an equivalent of ShortDebugString() for
   1571       //   UnknownFieldSet.
   1572       output = "{ ... }";
   1573       break;
   1574   }
   1575   printer_->PrintRaw(output);
   1576 }
   1577 
   1578 void MessageDifferencer::StreamReporter::Print(const string& str) {
   1579   printer_->Print(str.c_str());
   1580 }
   1581 
   1582 void MessageDifferencer::StreamReporter::ReportAdded(
   1583     const Message& message1,
   1584     const Message& message2,
   1585     const vector<SpecificField>& field_path) {
   1586   printer_->Print("added: ");
   1587   PrintPath(field_path, false);
   1588   printer_->Print(": ");
   1589   PrintValue(message2, field_path, false);
   1590   printer_->Print("\n");  // Print for newlines.
   1591 }
   1592 
   1593 void MessageDifferencer::StreamReporter::ReportDeleted(
   1594     const Message& message1,
   1595     const Message& message2,
   1596     const vector<SpecificField>& field_path) {
   1597   printer_->Print("deleted: ");
   1598   PrintPath(field_path, true);
   1599   printer_->Print(": ");
   1600   PrintValue(message1, field_path, true);
   1601   printer_->Print("\n");  // Print for newlines
   1602 }
   1603 
   1604 void MessageDifferencer::StreamReporter::ReportModified(
   1605     const Message& message1,
   1606     const Message& message2,
   1607     const vector<SpecificField>& field_path) {
   1608   if (!report_modified_aggregates_ && field_path.back().field == NULL) {
   1609     if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
   1610       // Any changes to the subfields have already been printed.
   1611       return;
   1612     }
   1613   } else if (!report_modified_aggregates_) {
   1614     if (field_path.back().field->cpp_type() ==
   1615         FieldDescriptor::CPPTYPE_MESSAGE) {
   1616       // Any changes to the subfields have already been printed.
   1617       return;
   1618     }
   1619   }
   1620 
   1621   printer_->Print("modified: ");
   1622   PrintPath(field_path, true);
   1623   if (CheckPathChanged(field_path)) {
   1624     printer_->Print(" -> ");
   1625     PrintPath(field_path, false);
   1626   }
   1627   printer_->Print(": ");
   1628   PrintValue(message1, field_path, true);
   1629   printer_->Print(" -> ");
   1630   PrintValue(message2, field_path, false);
   1631   printer_->Print("\n");  // Print for newlines.
   1632 }
   1633 
   1634 void MessageDifferencer::StreamReporter::ReportMoved(
   1635     const Message& message1,
   1636     const Message& message2,
   1637     const vector<SpecificField>& field_path) {
   1638   printer_->Print("moved: ");
   1639   PrintPath(field_path, true);
   1640   printer_->Print(" -> ");
   1641   PrintPath(field_path, false);
   1642   printer_->Print(" : ");
   1643   PrintValue(message1, field_path, true);
   1644   printer_->Print("\n");  // Print for newlines.
   1645 }
   1646 
   1647 void MessageDifferencer::StreamReporter::ReportMatched(
   1648     const Message& message1,
   1649     const Message& message2,
   1650     const vector<SpecificField>& field_path) {
   1651   printer_->Print("matched: ");
   1652   PrintPath(field_path, true);
   1653   if (CheckPathChanged(field_path)) {
   1654     printer_->Print(" -> ");
   1655     PrintPath(field_path, false);
   1656   }
   1657   printer_->Print(" : ");
   1658   PrintValue(message1, field_path, true);
   1659   printer_->Print("\n");  // Print for newlines.
   1660 }
   1661 
   1662 void MessageDifferencer::StreamReporter::ReportIgnored(
   1663     const Message& message1,
   1664     const Message& message2,
   1665     const vector<SpecificField>& field_path) {
   1666   printer_->Print("ignored: ");
   1667   PrintPath(field_path, true);
   1668   if (CheckPathChanged(field_path)) {
   1669     printer_->Print(" -> ");
   1670     PrintPath(field_path, false);
   1671   }
   1672   printer_->Print("\n");  // Print for newlines.
   1673 }
   1674 
   1675 void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
   1676     const Message& message1, const Message& message2,
   1677     const vector<SpecificField>& field_path) {
   1678   printer_->Print("ignored: ");
   1679   PrintPath(field_path, true);
   1680   if (CheckPathChanged(field_path)) {
   1681     printer_->Print(" -> ");
   1682     PrintPath(field_path, false);
   1683   }
   1684   printer_->Print("\n");  // Print for newlines.
   1685 }
   1686 
   1687 }  // namespace util
   1688 }  // namespace protobuf
   1689 }  // namespace google
   1690