Home | History | Annotate | Download | only in courgette
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // The main idea in Courgette is to do patching *under a tranformation*.  The
      6 // input is transformed into a new representation, patching occurs in the new
      7 // repesentation, and then the tranform is reversed to get the patched data.
      8 //
      9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
     10 // Each of the elements has to go through the same set of steps in lock-step.
     11 
     12 // This file contains the code to create the patch.
     13 
     14 
     15 #include "courgette/ensemble.h"
     16 
     17 #include <limits>
     18 #include <vector>
     19 
     20 #include "base/basictypes.h"
     21 #include "base/logging.h"
     22 #include "base/time/time.h"
     23 
     24 #include "courgette/crc.h"
     25 #include "courgette/difference_estimator.h"
     26 #include "courgette/region.h"
     27 #include "courgette/simple_delta.h"
     28 #include "courgette/streams.h"
     29 #include "courgette/third_party/bsdiff.h"
     30 
     31 #include "courgette/patcher_x86_32.h"
     32 #include "courgette/patch_generator_x86_32.h"
     33 
     34 namespace courgette {
     35 
     36 TransformationPatchGenerator::TransformationPatchGenerator(
     37     Element* old_element,
     38     Element* new_element,
     39     TransformationPatcher* patcher)
     40     : old_element_(old_element),
     41       new_element_(new_element),
     42       patcher_(patcher) {
     43 }
     44 
     45 TransformationPatchGenerator::~TransformationPatchGenerator() {
     46   delete patcher_;
     47 }
     48 
     49 // The default implementation of PredictTransformParameters delegates to the
     50 // patcher.
     51 Status TransformationPatchGenerator::PredictTransformParameters(
     52     SinkStreamSet* prediction) {
     53   return patcher_->PredictTransformParameters(prediction);
     54 }
     55 
     56 // The default implementation of Reform delegates to the patcher.
     57 Status TransformationPatchGenerator::Reform(
     58     SourceStreamSet* transformed_element,
     59     SinkStream* reformed_element) {
     60   return patcher_->Reform(transformed_element, reformed_element);
     61 }
     62 
     63 // Makes a TransformationPatchGenerator of the appropriate variety for the
     64 // Element kind.
     65 TransformationPatchGenerator* MakeGenerator(Element* old_element,
     66                                             Element* new_element) {
     67   switch (new_element->kind()) {
     68     case EXE_UNKNOWN:
     69       break;
     70     case EXE_WIN_32_X86: {
     71       TransformationPatchGenerator* generator =
     72           new PatchGeneratorX86_32(
     73               old_element,
     74               new_element,
     75               new PatcherX86_32(old_element->region()),
     76               EXE_WIN_32_X86);
     77       return generator;
     78     }
     79     case EXE_ELF_32_X86: {
     80       TransformationPatchGenerator* generator =
     81           new PatchGeneratorX86_32(
     82               old_element,
     83               new_element,
     84               new PatcherX86_32(old_element->region()),
     85               EXE_ELF_32_X86);
     86       return generator;
     87     }
     88     case EXE_ELF_32_ARM: {
     89       TransformationPatchGenerator* generator =
     90           new PatchGeneratorX86_32(
     91               old_element,
     92               new_element,
     93               new PatcherX86_32(old_element->region()),
     94               EXE_ELF_32_ARM);
     95       return generator;
     96     }
     97   }
     98 
     99   LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
    100   return NULL;
    101 }
    102 
    103 // Checks to see if the proposed comparison is 'unsafe'.  Sometimes one element
    104 // from 'old' is matched as the closest element to multiple elements from 'new'.
    105 // Each time this happens, the old element is transformed and serialized.  This
    106 // is a problem when the old element is huge compared with the new element
    107 // because the mutliple serialized copies can be much bigger than the size of
    108 // either ensemble.
    109 //
    110 // The right way to avoid this is to ensure any one element from 'old' is
    111 // serialized once, which requires matching code in the patch application.
    112 //
    113 // This is a quick hack to avoid the problem by prohibiting a big difference in
    114 // size between matching elements.
    115 bool UnsafeDifference(Element* old_element, Element* new_element) {
    116   double kMaxBloat = 2.0;
    117   size_t kMinWorrysomeDifference = 2 << 20;  // 2MB
    118   size_t old_size = old_element->region().length();
    119   size_t new_size = new_element->region().length();
    120   size_t low_size = std::min(old_size, new_size);
    121   size_t high_size = std::max(old_size, new_size);
    122   if (high_size - low_size < kMinWorrysomeDifference) return false;
    123   if (high_size < low_size * kMaxBloat) return false;
    124   return true;
    125 }
    126 
    127 // FindGenerators finds TransformationPatchGenerators for the elements of
    128 // |new_ensemble|.  For each element of |new_ensemble| we find the closest
    129 // matching element from |old_ensemble| and use that as the basis for
    130 // differential compression.  The elements have to be the same kind so as to
    131 // support transformation into the same kind of 'new representation'.
    132 //
    133 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
    134                       std::vector<TransformationPatchGenerator*>* generators) {
    135   base::Time start_find_time = base::Time::Now();
    136   old_ensemble->FindEmbeddedElements();
    137   new_ensemble->FindEmbeddedElements();
    138   VLOG(1) << "done FindEmbeddedElements "
    139           << (base::Time::Now() - start_find_time).InSecondsF();
    140 
    141   std::vector<Element*> old_elements(old_ensemble->elements());
    142   std::vector<Element*> new_elements(new_ensemble->elements());
    143 
    144   VLOG(1) << "old has " << old_elements.size() << " elements";
    145   VLOG(1) << "new has " << new_elements.size() << " elements";
    146 
    147   DifferenceEstimator difference_estimator;
    148   std::vector<DifferenceEstimator::Base*> bases;
    149 
    150   base::Time start_bases_time = base::Time::Now();
    151   for (size_t i = 0;  i < old_elements.size();  ++i) {
    152     bases.push_back(
    153         difference_estimator.MakeBase(old_elements[i]->region()));
    154   }
    155   VLOG(1) << "done make bases "
    156           << (base::Time::Now() - start_bases_time).InSecondsF() << "s";
    157 
    158   for (size_t new_index = 0;  new_index < new_elements.size();  ++new_index) {
    159     Element* new_element = new_elements[new_index];
    160     DifferenceEstimator::Subject* new_subject =
    161         difference_estimator.MakeSubject(new_element->region());
    162 
    163     // Search through old elements to find the best match.
    164     //
    165     // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
    166     // new_ensemble probably have a very similar structure.  We can make the
    167     // search faster by making the comparison provided by DifferenceEstimator
    168     // more nuanced, returning early if the measured difference is greater than
    169     // the current best.  This will be most effective if we can arrange that the
    170     // first elements we try to match are likely the 'right' ones.  We could
    171     // prioritize elements that are of a similar size or similar position in the
    172     // sequence of elements.
    173     //
    174     Element* best_old_element = NULL;
    175     size_t best_difference = std::numeric_limits<size_t>::max();
    176     for (size_t old_index = 0;  old_index < old_elements.size();  ++old_index) {
    177       Element* old_element = old_elements[old_index];
    178       // Elements of different kinds are incompatible.
    179       if (old_element->kind() != new_element->kind())
    180         continue;
    181 
    182       if (UnsafeDifference(old_element, new_element))
    183         continue;
    184 
    185       base::Time start_compare = base::Time::Now();
    186       DifferenceEstimator::Base* old_base = bases[old_index];
    187       size_t difference = difference_estimator.Measure(old_base, new_subject);
    188 
    189       VLOG(1) << "Compare " << old_element->Name()
    190               << " to " << new_element->Name()
    191               << " --> " << difference
    192               << " in " << (base::Time::Now() - start_compare).InSecondsF()
    193               << "s";
    194       if (difference == 0) {
    195         VLOG(1) << "Skip " << new_element->Name()
    196                 << " - identical to " << old_element->Name();
    197         best_difference = 0;
    198         best_old_element = NULL;
    199         break;
    200       }
    201       if (difference < best_difference) {
    202         best_difference = difference;
    203         best_old_element = old_element;
    204       }
    205     }
    206 
    207     if (best_old_element) {
    208       VLOG(1) << "Matched " << best_old_element->Name()
    209               << " to " << new_element->Name()
    210               << " --> " << best_difference;
    211       TransformationPatchGenerator* generator =
    212           MakeGenerator(best_old_element, new_element);
    213       if (generator)
    214         generators->push_back(generator);
    215     }
    216   }
    217 
    218   VLOG(1) << "done FindGenerators found " << generators->size()
    219           << " in " << (base::Time::Now() - start_find_time).InSecondsF()
    220           << "s";
    221 
    222   return C_OK;
    223 }
    224 
    225 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
    226   for (size_t i = 0;  i < generators->size();  ++i) {
    227     delete (*generators)[i];
    228   }
    229   generators->clear();
    230 }
    231 
    232 ////////////////////////////////////////////////////////////////////////////////
    233 
    234 Status GenerateEnsemblePatch(SourceStream* base,
    235                              SourceStream* update,
    236                              SinkStream* final_patch) {
    237   VLOG(1) << "start GenerateEnsemblePatch";
    238   base::Time start_time = base::Time::Now();
    239 
    240   Region old_region(base->Buffer(), base->Remaining());
    241   Region new_region(update->Buffer(), update->Remaining());
    242   Ensemble old_ensemble(old_region, "old");
    243   Ensemble new_ensemble(new_region, "new");
    244   std::vector<TransformationPatchGenerator*> generators;
    245   Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
    246                                             &generators);
    247   if (generators_status != C_OK)
    248     return generators_status;
    249 
    250   SinkStreamSet patch_streams;
    251 
    252   SinkStream* tranformation_descriptions      = patch_streams.stream(0);
    253   SinkStream* parameter_correction            = patch_streams.stream(1);
    254   SinkStream* transformed_elements_correction = patch_streams.stream(2);
    255   SinkStream* ensemble_correction             = patch_streams.stream(3);
    256 
    257   size_t number_of_transformations = generators.size();
    258   if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations))
    259     return C_STREAM_ERROR;
    260 
    261   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    262     ExecutableType kind = generators[i]->Kind();
    263     if (!tranformation_descriptions->WriteVarint32(kind))
    264       return C_STREAM_ERROR;
    265   }
    266 
    267   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    268     Status status =
    269         generators[i]->WriteInitialParameters(tranformation_descriptions);
    270     if (status != C_OK)
    271       return status;
    272   }
    273 
    274   //
    275   // Generate sub-patch for parameters.
    276   //
    277   SinkStreamSet predicted_parameters_sink;
    278   SinkStreamSet corrected_parameters_sink;
    279 
    280   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    281     SinkStreamSet single_predicted_parameters;
    282     Status status;
    283     status = generators[i]->PredictTransformParameters(
    284         &single_predicted_parameters);
    285     if (status != C_OK)
    286       return status;
    287     if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
    288       return C_STREAM_ERROR;
    289 
    290     SinkStreamSet single_corrected_parameters;
    291     status = generators[i]->CorrectedTransformParameters(
    292         &single_corrected_parameters);
    293     if (status != C_OK)
    294       return status;
    295     if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
    296       return C_STREAM_ERROR;
    297   }
    298 
    299   SinkStream linearized_predicted_parameters;
    300   SinkStream linearized_corrected_parameters;
    301 
    302   if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
    303     return C_STREAM_ERROR;
    304   if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
    305     return C_STREAM_ERROR;
    306 
    307   SourceStream predicted_parameters_source;
    308   SourceStream corrected_parameters_source;
    309   predicted_parameters_source.Init(linearized_predicted_parameters);
    310   corrected_parameters_source.Init(linearized_corrected_parameters);
    311 
    312   Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
    313                                              &corrected_parameters_source,
    314                                              parameter_correction);
    315   if (delta1_status != C_OK)
    316     return delta1_status;
    317 
    318   //
    319   // Generate sub-patch for elements.
    320   //
    321   corrected_parameters_source.Init(linearized_corrected_parameters);
    322   SourceStreamSet corrected_parameters_source_set;
    323   if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
    324     return C_STREAM_ERROR;
    325 
    326   SinkStreamSet predicted_transformed_elements;
    327   SinkStreamSet corrected_transformed_elements;
    328 
    329   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    330     SourceStreamSet single_parameters;
    331     if (!corrected_parameters_source_set.ReadSet(&single_parameters))
    332       return C_STREAM_ERROR;
    333     SinkStreamSet single_predicted_transformed_element;
    334     SinkStreamSet single_corrected_transformed_element;
    335     Status status = generators[i]->Transform(
    336         &single_parameters,
    337         &single_predicted_transformed_element,
    338         &single_corrected_transformed_element);
    339     if (status != C_OK)
    340       return status;
    341     if (!single_parameters.Empty())
    342       return C_STREAM_NOT_CONSUMED;
    343     if (!predicted_transformed_elements.WriteSet(
    344             &single_predicted_transformed_element))
    345       return C_STREAM_ERROR;
    346     if (!corrected_transformed_elements.WriteSet(
    347             &single_corrected_transformed_element))
    348       return C_STREAM_ERROR;
    349   }
    350 
    351   if (!corrected_parameters_source_set.Empty())
    352     return C_STREAM_NOT_CONSUMED;
    353 
    354   SinkStream linearized_predicted_transformed_elements;
    355   SinkStream linearized_corrected_transformed_elements;
    356 
    357   if (!predicted_transformed_elements.CopyTo(
    358           &linearized_predicted_transformed_elements))
    359     return C_STREAM_ERROR;
    360   if (!corrected_transformed_elements.CopyTo(
    361           &linearized_corrected_transformed_elements))
    362     return C_STREAM_ERROR;
    363 
    364   SourceStream predicted_transformed_elements_source;
    365   SourceStream corrected_transformed_elements_source;
    366   predicted_transformed_elements_source
    367       .Init(linearized_predicted_transformed_elements);
    368   corrected_transformed_elements_source
    369       .Init(linearized_corrected_transformed_elements);
    370 
    371   Status delta2_status =
    372       GenerateSimpleDelta(&predicted_transformed_elements_source,
    373                           &corrected_transformed_elements_source,
    374                           transformed_elements_correction);
    375   if (delta2_status != C_OK)
    376     return delta2_status;
    377 
    378   // Last use, free storage.
    379   linearized_predicted_transformed_elements.Retire();
    380 
    381   //
    382   // Generate sub-patch for whole enchilada.
    383   //
    384   SinkStream predicted_ensemble;
    385 
    386   if (!predicted_ensemble.Write(base->Buffer(), base->Remaining()))
    387     return C_STREAM_ERROR;
    388 
    389   SourceStreamSet corrected_transformed_elements_source_set;
    390   corrected_transformed_elements_source
    391       .Init(linearized_corrected_transformed_elements);
    392   if (!corrected_transformed_elements_source_set
    393       .Init(&corrected_transformed_elements_source))
    394     return C_STREAM_ERROR;
    395 
    396   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    397     SourceStreamSet single_corrected_transformed_element;
    398     if (!corrected_transformed_elements_source_set.ReadSet(
    399             &single_corrected_transformed_element))
    400       return C_STREAM_ERROR;
    401     Status status = generators[i]->Reform(&single_corrected_transformed_element,
    402                                           &predicted_ensemble);
    403     if (status != C_OK)
    404       return status;
    405     if (!single_corrected_transformed_element.Empty())
    406       return C_STREAM_NOT_CONSUMED;
    407   }
    408 
    409   if (!corrected_transformed_elements_source_set.Empty())
    410     return C_STREAM_NOT_CONSUMED;
    411 
    412   // No more references to this stream's buffer.
    413   linearized_corrected_transformed_elements.Retire();
    414 
    415   FreeGenerators(&generators);
    416 
    417   size_t final_patch_input_size = predicted_ensemble.Length();
    418   SourceStream predicted_ensemble_source;
    419   predicted_ensemble_source.Init(predicted_ensemble);
    420   Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
    421                                              update,
    422                                              ensemble_correction);
    423   if (delta3_status != C_OK)
    424     return delta3_status;
    425 
    426   //
    427   // Final output stream has a header followed by a StreamSet.
    428   //
    429   if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) ||
    430       !final_patch->WriteVarint32(CourgettePatchFile::kVersion) ||
    431       !final_patch->WriteVarint32(CalculateCrc(old_region.start(),
    432                                                old_region.length())) ||
    433       !final_patch->WriteVarint32(CalculateCrc(new_region.start(),
    434                                                new_region.length())) ||
    435       !final_patch->WriteSizeVarint32(final_patch_input_size) ||
    436       !patch_streams.CopyTo(final_patch)) {
    437     return C_STREAM_ERROR;
    438   }
    439 
    440   VLOG(1) << "done GenerateEnsemblePatch "
    441           << (base::Time::Now() - start_time).InSecondsF() << "s";
    442 
    443   return C_OK;
    444 }
    445 
    446 }  // namespace
    447