Home | History | Annotate | Download | only in courgette
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // The main idea in Courgette is to do patching *under a tranformation*.  The
      6 // input is transformed into a new representation, patching occurs in the new
      7 // repesentation, and then the tranform is reversed to get the patched data.
      8 //
      9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
     10 // Each of the elements has to go through the same set of steps in lock-step.
     11 
     12 // This file contains the code to create the patch.
     13 
     14 
     15 #include "courgette/ensemble.h"
     16 
     17 #include <limits>
     18 #include <vector>
     19 
     20 #include "base/basictypes.h"
     21 #include "base/logging.h"
     22 #include "base/time/time.h"
     23 
     24 #include "courgette/crc.h"
     25 #include "courgette/difference_estimator.h"
     26 #include "courgette/region.h"
     27 #include "courgette/simple_delta.h"
     28 #include "courgette/streams.h"
     29 #include "courgette/third_party/bsdiff.h"
     30 
     31 #include "courgette/patcher_x86_32.h"
     32 #include "courgette/patch_generator_x86_32.h"
     33 
     34 namespace courgette {
     35 
     36 TransformationPatchGenerator::TransformationPatchGenerator(
     37     Element* old_element,
     38     Element* new_element,
     39     TransformationPatcher* patcher)
     40     : old_element_(old_element),
     41       new_element_(new_element),
     42       patcher_(patcher) {
     43 }
     44 
     45 TransformationPatchGenerator::~TransformationPatchGenerator() {
     46   delete patcher_;
     47 }
     48 
     49 // The default implementation of PredictTransformParameters delegates to the
     50 // patcher.
     51 Status TransformationPatchGenerator::PredictTransformParameters(
     52     SinkStreamSet* prediction) {
     53   return patcher_->PredictTransformParameters(prediction);
     54 }
     55 
     56 // The default implementation of Reform delegates to the patcher.
     57 Status TransformationPatchGenerator::Reform(
     58     SourceStreamSet* transformed_element,
     59     SinkStream* reformed_element) {
     60   return patcher_->Reform(transformed_element, reformed_element);
     61 }
     62 
     63 // Makes a TransformationPatchGenerator of the appropriate variety for the
     64 // Element kind.
     65 TransformationPatchGenerator* MakeGenerator(Element* old_element,
     66                                             Element* new_element) {
     67   switch (new_element->kind()) {
     68     case EXE_UNKNOWN:
     69       break;
     70     case EXE_WIN_32_X86: {
     71       TransformationPatchGenerator* generator =
     72           new PatchGeneratorX86_32(
     73               old_element,
     74               new_element,
     75               new PatcherX86_32(old_element->region()),
     76               EXE_WIN_32_X86);
     77       return generator;
     78     }
     79     case EXE_ELF_32_X86: {
     80       TransformationPatchGenerator* generator =
     81           new PatchGeneratorX86_32(
     82               old_element,
     83               new_element,
     84               new PatcherX86_32(old_element->region()),
     85               EXE_ELF_32_X86);
     86       return generator;
     87     }
     88     case EXE_ELF_32_ARM: {
     89       TransformationPatchGenerator* generator =
     90           new PatchGeneratorX86_32(
     91               old_element,
     92               new_element,
     93               new PatcherX86_32(old_element->region()),
     94               EXE_ELF_32_ARM);
     95       return generator;
     96     }
     97     case EXE_WIN_32_X64: {
     98       TransformationPatchGenerator* generator =
     99           new PatchGeneratorX86_32(
    100               old_element,
    101               new_element,
    102               new PatcherX86_32(old_element->region()),
    103               EXE_WIN_32_X64);
    104       return generator;
    105     }
    106   }
    107 
    108   LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
    109   return NULL;
    110 }
    111 
    112 // Checks to see if the proposed comparison is 'unsafe'.  Sometimes one element
    113 // from 'old' is matched as the closest element to multiple elements from 'new'.
    114 // Each time this happens, the old element is transformed and serialized.  This
    115 // is a problem when the old element is huge compared with the new element
    116 // because the mutliple serialized copies can be much bigger than the size of
    117 // either ensemble.
    118 //
    119 // The right way to avoid this is to ensure any one element from 'old' is
    120 // serialized once, which requires matching code in the patch application.
    121 //
    122 // This is a quick hack to avoid the problem by prohibiting a big difference in
    123 // size between matching elements.
    124 bool UnsafeDifference(Element* old_element, Element* new_element) {
    125   double kMaxBloat = 2.0;
    126   size_t kMinWorrysomeDifference = 2 << 20;  // 2MB
    127   size_t old_size = old_element->region().length();
    128   size_t new_size = new_element->region().length();
    129   size_t low_size = std::min(old_size, new_size);
    130   size_t high_size = std::max(old_size, new_size);
    131   if (high_size - low_size < kMinWorrysomeDifference) return false;
    132   if (high_size < low_size * kMaxBloat) return false;
    133   return true;
    134 }
    135 
    136 // FindGenerators finds TransformationPatchGenerators for the elements of
    137 // |new_ensemble|.  For each element of |new_ensemble| we find the closest
    138 // matching element from |old_ensemble| and use that as the basis for
    139 // differential compression.  The elements have to be the same kind so as to
    140 // support transformation into the same kind of 'new representation'.
    141 //
    142 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
    143                       std::vector<TransformationPatchGenerator*>* generators) {
    144   base::Time start_find_time = base::Time::Now();
    145   old_ensemble->FindEmbeddedElements();
    146   new_ensemble->FindEmbeddedElements();
    147   VLOG(1) << "done FindEmbeddedElements "
    148           << (base::Time::Now() - start_find_time).InSecondsF();
    149 
    150   std::vector<Element*> old_elements(old_ensemble->elements());
    151   std::vector<Element*> new_elements(new_ensemble->elements());
    152 
    153   VLOG(1) << "old has " << old_elements.size() << " elements";
    154   VLOG(1) << "new has " << new_elements.size() << " elements";
    155 
    156   DifferenceEstimator difference_estimator;
    157   std::vector<DifferenceEstimator::Base*> bases;
    158 
    159   base::Time start_bases_time = base::Time::Now();
    160   for (size_t i = 0;  i < old_elements.size();  ++i) {
    161     bases.push_back(
    162         difference_estimator.MakeBase(old_elements[i]->region()));
    163   }
    164   VLOG(1) << "done make bases "
    165           << (base::Time::Now() - start_bases_time).InSecondsF() << "s";
    166 
    167   for (size_t new_index = 0;  new_index < new_elements.size();  ++new_index) {
    168     Element* new_element = new_elements[new_index];
    169     DifferenceEstimator::Subject* new_subject =
    170         difference_estimator.MakeSubject(new_element->region());
    171 
    172     // Search through old elements to find the best match.
    173     //
    174     // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
    175     // new_ensemble probably have a very similar structure.  We can make the
    176     // search faster by making the comparison provided by DifferenceEstimator
    177     // more nuanced, returning early if the measured difference is greater than
    178     // the current best.  This will be most effective if we can arrange that the
    179     // first elements we try to match are likely the 'right' ones.  We could
    180     // prioritize elements that are of a similar size or similar position in the
    181     // sequence of elements.
    182     //
    183     Element* best_old_element = NULL;
    184     size_t best_difference = std::numeric_limits<size_t>::max();
    185     for (size_t old_index = 0;  old_index < old_elements.size();  ++old_index) {
    186       Element* old_element = old_elements[old_index];
    187       // Elements of different kinds are incompatible.
    188       if (old_element->kind() != new_element->kind())
    189         continue;
    190 
    191       if (UnsafeDifference(old_element, new_element))
    192         continue;
    193 
    194       base::Time start_compare = base::Time::Now();
    195       DifferenceEstimator::Base* old_base = bases[old_index];
    196       size_t difference = difference_estimator.Measure(old_base, new_subject);
    197 
    198       VLOG(1) << "Compare " << old_element->Name()
    199               << " to " << new_element->Name()
    200               << " --> " << difference
    201               << " in " << (base::Time::Now() - start_compare).InSecondsF()
    202               << "s";
    203       if (difference == 0) {
    204         VLOG(1) << "Skip " << new_element->Name()
    205                 << " - identical to " << old_element->Name();
    206         best_difference = 0;
    207         best_old_element = NULL;
    208         break;
    209       }
    210       if (difference < best_difference) {
    211         best_difference = difference;
    212         best_old_element = old_element;
    213       }
    214     }
    215 
    216     if (best_old_element) {
    217       VLOG(1) << "Matched " << best_old_element->Name()
    218               << " to " << new_element->Name()
    219               << " --> " << best_difference;
    220       TransformationPatchGenerator* generator =
    221           MakeGenerator(best_old_element, new_element);
    222       if (generator)
    223         generators->push_back(generator);
    224     }
    225   }
    226 
    227   VLOG(1) << "done FindGenerators found " << generators->size()
    228           << " in " << (base::Time::Now() - start_find_time).InSecondsF()
    229           << "s";
    230 
    231   return C_OK;
    232 }
    233 
    234 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
    235   for (size_t i = 0;  i < generators->size();  ++i) {
    236     delete (*generators)[i];
    237   }
    238   generators->clear();
    239 }
    240 
    241 ////////////////////////////////////////////////////////////////////////////////
    242 
    243 Status GenerateEnsemblePatch(SourceStream* base,
    244                              SourceStream* update,
    245                              SinkStream* final_patch) {
    246   VLOG(1) << "start GenerateEnsemblePatch";
    247   base::Time start_time = base::Time::Now();
    248 
    249   Region old_region(base->Buffer(), base->Remaining());
    250   Region new_region(update->Buffer(), update->Remaining());
    251   Ensemble old_ensemble(old_region, "old");
    252   Ensemble new_ensemble(new_region, "new");
    253   std::vector<TransformationPatchGenerator*> generators;
    254   Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
    255                                             &generators);
    256   if (generators_status != C_OK)
    257     return generators_status;
    258 
    259   SinkStreamSet patch_streams;
    260 
    261   SinkStream* tranformation_descriptions      = patch_streams.stream(0);
    262   SinkStream* parameter_correction            = patch_streams.stream(1);
    263   SinkStream* transformed_elements_correction = patch_streams.stream(2);
    264   SinkStream* ensemble_correction             = patch_streams.stream(3);
    265 
    266   size_t number_of_transformations = generators.size();
    267   if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations))
    268     return C_STREAM_ERROR;
    269 
    270   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    271     ExecutableType kind = generators[i]->Kind();
    272     if (!tranformation_descriptions->WriteVarint32(kind))
    273       return C_STREAM_ERROR;
    274   }
    275 
    276   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    277     Status status =
    278         generators[i]->WriteInitialParameters(tranformation_descriptions);
    279     if (status != C_OK)
    280       return status;
    281   }
    282 
    283   //
    284   // Generate sub-patch for parameters.
    285   //
    286   SinkStreamSet predicted_parameters_sink;
    287   SinkStreamSet corrected_parameters_sink;
    288 
    289   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    290     SinkStreamSet single_predicted_parameters;
    291     Status status;
    292     status = generators[i]->PredictTransformParameters(
    293         &single_predicted_parameters);
    294     if (status != C_OK)
    295       return status;
    296     if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
    297       return C_STREAM_ERROR;
    298 
    299     SinkStreamSet single_corrected_parameters;
    300     status = generators[i]->CorrectedTransformParameters(
    301         &single_corrected_parameters);
    302     if (status != C_OK)
    303       return status;
    304     if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
    305       return C_STREAM_ERROR;
    306   }
    307 
    308   SinkStream linearized_predicted_parameters;
    309   SinkStream linearized_corrected_parameters;
    310 
    311   if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
    312     return C_STREAM_ERROR;
    313   if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
    314     return C_STREAM_ERROR;
    315 
    316   SourceStream predicted_parameters_source;
    317   SourceStream corrected_parameters_source;
    318   predicted_parameters_source.Init(linearized_predicted_parameters);
    319   corrected_parameters_source.Init(linearized_corrected_parameters);
    320 
    321   Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
    322                                              &corrected_parameters_source,
    323                                              parameter_correction);
    324   if (delta1_status != C_OK)
    325     return delta1_status;
    326 
    327   //
    328   // Generate sub-patch for elements.
    329   //
    330   corrected_parameters_source.Init(linearized_corrected_parameters);
    331   SourceStreamSet corrected_parameters_source_set;
    332   if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
    333     return C_STREAM_ERROR;
    334 
    335   SinkStreamSet predicted_transformed_elements;
    336   SinkStreamSet corrected_transformed_elements;
    337 
    338   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    339     SourceStreamSet single_parameters;
    340     if (!corrected_parameters_source_set.ReadSet(&single_parameters))
    341       return C_STREAM_ERROR;
    342     SinkStreamSet single_predicted_transformed_element;
    343     SinkStreamSet single_corrected_transformed_element;
    344     Status status = generators[i]->Transform(
    345         &single_parameters,
    346         &single_predicted_transformed_element,
    347         &single_corrected_transformed_element);
    348     if (status != C_OK)
    349       return status;
    350     if (!single_parameters.Empty())
    351       return C_STREAM_NOT_CONSUMED;
    352     if (!predicted_transformed_elements.WriteSet(
    353             &single_predicted_transformed_element))
    354       return C_STREAM_ERROR;
    355     if (!corrected_transformed_elements.WriteSet(
    356             &single_corrected_transformed_element))
    357       return C_STREAM_ERROR;
    358   }
    359 
    360   if (!corrected_parameters_source_set.Empty())
    361     return C_STREAM_NOT_CONSUMED;
    362 
    363   SinkStream linearized_predicted_transformed_elements;
    364   SinkStream linearized_corrected_transformed_elements;
    365 
    366   if (!predicted_transformed_elements.CopyTo(
    367           &linearized_predicted_transformed_elements))
    368     return C_STREAM_ERROR;
    369   if (!corrected_transformed_elements.CopyTo(
    370           &linearized_corrected_transformed_elements))
    371     return C_STREAM_ERROR;
    372 
    373   SourceStream predicted_transformed_elements_source;
    374   SourceStream corrected_transformed_elements_source;
    375   predicted_transformed_elements_source
    376       .Init(linearized_predicted_transformed_elements);
    377   corrected_transformed_elements_source
    378       .Init(linearized_corrected_transformed_elements);
    379 
    380   Status delta2_status =
    381       GenerateSimpleDelta(&predicted_transformed_elements_source,
    382                           &corrected_transformed_elements_source,
    383                           transformed_elements_correction);
    384   if (delta2_status != C_OK)
    385     return delta2_status;
    386 
    387   // Last use, free storage.
    388   linearized_predicted_transformed_elements.Retire();
    389 
    390   //
    391   // Generate sub-patch for whole enchilada.
    392   //
    393   SinkStream predicted_ensemble;
    394 
    395   if (!predicted_ensemble.Write(base->Buffer(), base->Remaining()))
    396     return C_STREAM_ERROR;
    397 
    398   SourceStreamSet corrected_transformed_elements_source_set;
    399   corrected_transformed_elements_source
    400       .Init(linearized_corrected_transformed_elements);
    401   if (!corrected_transformed_elements_source_set
    402       .Init(&corrected_transformed_elements_source))
    403     return C_STREAM_ERROR;
    404 
    405   for (size_t i = 0;  i < number_of_transformations;  ++i) {
    406     SourceStreamSet single_corrected_transformed_element;
    407     if (!corrected_transformed_elements_source_set.ReadSet(
    408             &single_corrected_transformed_element))
    409       return C_STREAM_ERROR;
    410     Status status = generators[i]->Reform(&single_corrected_transformed_element,
    411                                           &predicted_ensemble);
    412     if (status != C_OK)
    413       return status;
    414     if (!single_corrected_transformed_element.Empty())
    415       return C_STREAM_NOT_CONSUMED;
    416   }
    417 
    418   if (!corrected_transformed_elements_source_set.Empty())
    419     return C_STREAM_NOT_CONSUMED;
    420 
    421   // No more references to this stream's buffer.
    422   linearized_corrected_transformed_elements.Retire();
    423 
    424   FreeGenerators(&generators);
    425 
    426   size_t final_patch_input_size = predicted_ensemble.Length();
    427   SourceStream predicted_ensemble_source;
    428   predicted_ensemble_source.Init(predicted_ensemble);
    429   Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
    430                                              update,
    431                                              ensemble_correction);
    432   if (delta3_status != C_OK)
    433     return delta3_status;
    434 
    435   //
    436   // Final output stream has a header followed by a StreamSet.
    437   //
    438   if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) ||
    439       !final_patch->WriteVarint32(CourgettePatchFile::kVersion) ||
    440       !final_patch->WriteVarint32(CalculateCrc(old_region.start(),
    441                                                old_region.length())) ||
    442       !final_patch->WriteVarint32(CalculateCrc(new_region.start(),
    443                                                new_region.length())) ||
    444       !final_patch->WriteSizeVarint32(final_patch_input_size) ||
    445       !patch_streams.CopyTo(final_patch)) {
    446     return C_STREAM_ERROR;
    447   }
    448 
    449   VLOG(1) << "done GenerateEnsemblePatch "
    450           << (base::Time::Now() - start_time).InSecondsF() << "s";
    451 
    452   return C_OK;
    453 }
    454 
    455 }  // namespace
    456