1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // The main idea in Courgette is to do patching *under a tranformation*. The 6 // input is transformed into a new representation, patching occurs in the new 7 // repesentation, and then the tranform is reversed to get the patched data. 8 // 9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble'). 10 // Each of the elements has to go through the same set of steps in lock-step. 11 12 // This file contains the code to create the patch. 13 14 15 #include "courgette/ensemble.h" 16 17 #include <limits> 18 #include <vector> 19 20 #include "base/basictypes.h" 21 #include "base/logging.h" 22 #include "base/time/time.h" 23 24 #include "courgette/crc.h" 25 #include "courgette/difference_estimator.h" 26 #include "courgette/region.h" 27 #include "courgette/simple_delta.h" 28 #include "courgette/streams.h" 29 #include "courgette/third_party/bsdiff.h" 30 31 #include "courgette/patcher_x86_32.h" 32 #include "courgette/patch_generator_x86_32.h" 33 34 namespace courgette { 35 36 TransformationPatchGenerator::TransformationPatchGenerator( 37 Element* old_element, 38 Element* new_element, 39 TransformationPatcher* patcher) 40 : old_element_(old_element), 41 new_element_(new_element), 42 patcher_(patcher) { 43 } 44 45 TransformationPatchGenerator::~TransformationPatchGenerator() { 46 delete patcher_; 47 } 48 49 // The default implementation of PredictTransformParameters delegates to the 50 // patcher. 51 Status TransformationPatchGenerator::PredictTransformParameters( 52 SinkStreamSet* prediction) { 53 return patcher_->PredictTransformParameters(prediction); 54 } 55 56 // The default implementation of Reform delegates to the patcher. 57 Status TransformationPatchGenerator::Reform( 58 SourceStreamSet* transformed_element, 59 SinkStream* reformed_element) { 60 return patcher_->Reform(transformed_element, reformed_element); 61 } 62 63 // Makes a TransformationPatchGenerator of the appropriate variety for the 64 // Element kind. 65 TransformationPatchGenerator* MakeGenerator(Element* old_element, 66 Element* new_element) { 67 switch (new_element->kind()) { 68 case EXE_UNKNOWN: 69 break; 70 case EXE_WIN_32_X86: { 71 TransformationPatchGenerator* generator = 72 new PatchGeneratorX86_32( 73 old_element, 74 new_element, 75 new PatcherX86_32(old_element->region()), 76 EXE_WIN_32_X86); 77 return generator; 78 } 79 case EXE_ELF_32_X86: { 80 TransformationPatchGenerator* generator = 81 new PatchGeneratorX86_32( 82 old_element, 83 new_element, 84 new PatcherX86_32(old_element->region()), 85 EXE_ELF_32_X86); 86 return generator; 87 } 88 case EXE_ELF_32_ARM: { 89 TransformationPatchGenerator* generator = 90 new PatchGeneratorX86_32( 91 old_element, 92 new_element, 93 new PatcherX86_32(old_element->region()), 94 EXE_ELF_32_ARM); 95 return generator; 96 } 97 } 98 99 LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind(); 100 return NULL; 101 } 102 103 // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element 104 // from 'old' is matched as the closest element to multiple elements from 'new'. 105 // Each time this happens, the old element is transformed and serialized. This 106 // is a problem when the old element is huge compared with the new element 107 // because the mutliple serialized copies can be much bigger than the size of 108 // either ensemble. 109 // 110 // The right way to avoid this is to ensure any one element from 'old' is 111 // serialized once, which requires matching code in the patch application. 112 // 113 // This is a quick hack to avoid the problem by prohibiting a big difference in 114 // size between matching elements. 115 bool UnsafeDifference(Element* old_element, Element* new_element) { 116 double kMaxBloat = 2.0; 117 size_t kMinWorrysomeDifference = 2 << 20; // 2MB 118 size_t old_size = old_element->region().length(); 119 size_t new_size = new_element->region().length(); 120 size_t low_size = std::min(old_size, new_size); 121 size_t high_size = std::max(old_size, new_size); 122 if (high_size - low_size < kMinWorrysomeDifference) return false; 123 if (high_size < low_size * kMaxBloat) return false; 124 return true; 125 } 126 127 // FindGenerators finds TransformationPatchGenerators for the elements of 128 // |new_ensemble|. For each element of |new_ensemble| we find the closest 129 // matching element from |old_ensemble| and use that as the basis for 130 // differential compression. The elements have to be the same kind so as to 131 // support transformation into the same kind of 'new representation'. 132 // 133 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble, 134 std::vector<TransformationPatchGenerator*>* generators) { 135 base::Time start_find_time = base::Time::Now(); 136 old_ensemble->FindEmbeddedElements(); 137 new_ensemble->FindEmbeddedElements(); 138 VLOG(1) << "done FindEmbeddedElements " 139 << (base::Time::Now() - start_find_time).InSecondsF(); 140 141 std::vector<Element*> old_elements(old_ensemble->elements()); 142 std::vector<Element*> new_elements(new_ensemble->elements()); 143 144 VLOG(1) << "old has " << old_elements.size() << " elements"; 145 VLOG(1) << "new has " << new_elements.size() << " elements"; 146 147 DifferenceEstimator difference_estimator; 148 std::vector<DifferenceEstimator::Base*> bases; 149 150 base::Time start_bases_time = base::Time::Now(); 151 for (size_t i = 0; i < old_elements.size(); ++i) { 152 bases.push_back( 153 difference_estimator.MakeBase(old_elements[i]->region())); 154 } 155 VLOG(1) << "done make bases " 156 << (base::Time::Now() - start_bases_time).InSecondsF() << "s"; 157 158 for (size_t new_index = 0; new_index < new_elements.size(); ++new_index) { 159 Element* new_element = new_elements[new_index]; 160 DifferenceEstimator::Subject* new_subject = 161 difference_estimator.MakeSubject(new_element->region()); 162 163 // Search through old elements to find the best match. 164 // 165 // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and 166 // new_ensemble probably have a very similar structure. We can make the 167 // search faster by making the comparison provided by DifferenceEstimator 168 // more nuanced, returning early if the measured difference is greater than 169 // the current best. This will be most effective if we can arrange that the 170 // first elements we try to match are likely the 'right' ones. We could 171 // prioritize elements that are of a similar size or similar position in the 172 // sequence of elements. 173 // 174 Element* best_old_element = NULL; 175 size_t best_difference = std::numeric_limits<size_t>::max(); 176 for (size_t old_index = 0; old_index < old_elements.size(); ++old_index) { 177 Element* old_element = old_elements[old_index]; 178 // Elements of different kinds are incompatible. 179 if (old_element->kind() != new_element->kind()) 180 continue; 181 182 if (UnsafeDifference(old_element, new_element)) 183 continue; 184 185 base::Time start_compare = base::Time::Now(); 186 DifferenceEstimator::Base* old_base = bases[old_index]; 187 size_t difference = difference_estimator.Measure(old_base, new_subject); 188 189 VLOG(1) << "Compare " << old_element->Name() 190 << " to " << new_element->Name() 191 << " --> " << difference 192 << " in " << (base::Time::Now() - start_compare).InSecondsF() 193 << "s"; 194 if (difference == 0) { 195 VLOG(1) << "Skip " << new_element->Name() 196 << " - identical to " << old_element->Name(); 197 best_difference = 0; 198 best_old_element = NULL; 199 break; 200 } 201 if (difference < best_difference) { 202 best_difference = difference; 203 best_old_element = old_element; 204 } 205 } 206 207 if (best_old_element) { 208 VLOG(1) << "Matched " << best_old_element->Name() 209 << " to " << new_element->Name() 210 << " --> " << best_difference; 211 TransformationPatchGenerator* generator = 212 MakeGenerator(best_old_element, new_element); 213 if (generator) 214 generators->push_back(generator); 215 } 216 } 217 218 VLOG(1) << "done FindGenerators found " << generators->size() 219 << " in " << (base::Time::Now() - start_find_time).InSecondsF() 220 << "s"; 221 222 return C_OK; 223 } 224 225 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) { 226 for (size_t i = 0; i < generators->size(); ++i) { 227 delete (*generators)[i]; 228 } 229 generators->clear(); 230 } 231 232 //////////////////////////////////////////////////////////////////////////////// 233 234 Status GenerateEnsemblePatch(SourceStream* base, 235 SourceStream* update, 236 SinkStream* final_patch) { 237 VLOG(1) << "start GenerateEnsemblePatch"; 238 base::Time start_time = base::Time::Now(); 239 240 Region old_region(base->Buffer(), base->Remaining()); 241 Region new_region(update->Buffer(), update->Remaining()); 242 Ensemble old_ensemble(old_region, "old"); 243 Ensemble new_ensemble(new_region, "new"); 244 std::vector<TransformationPatchGenerator*> generators; 245 Status generators_status = FindGenerators(&old_ensemble, &new_ensemble, 246 &generators); 247 if (generators_status != C_OK) 248 return generators_status; 249 250 SinkStreamSet patch_streams; 251 252 SinkStream* tranformation_descriptions = patch_streams.stream(0); 253 SinkStream* parameter_correction = patch_streams.stream(1); 254 SinkStream* transformed_elements_correction = patch_streams.stream(2); 255 SinkStream* ensemble_correction = patch_streams.stream(3); 256 257 size_t number_of_transformations = generators.size(); 258 if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations)) 259 return C_STREAM_ERROR; 260 261 for (size_t i = 0; i < number_of_transformations; ++i) { 262 ExecutableType kind = generators[i]->Kind(); 263 if (!tranformation_descriptions->WriteVarint32(kind)) 264 return C_STREAM_ERROR; 265 } 266 267 for (size_t i = 0; i < number_of_transformations; ++i) { 268 Status status = 269 generators[i]->WriteInitialParameters(tranformation_descriptions); 270 if (status != C_OK) 271 return status; 272 } 273 274 // 275 // Generate sub-patch for parameters. 276 // 277 SinkStreamSet predicted_parameters_sink; 278 SinkStreamSet corrected_parameters_sink; 279 280 for (size_t i = 0; i < number_of_transformations; ++i) { 281 SinkStreamSet single_predicted_parameters; 282 Status status; 283 status = generators[i]->PredictTransformParameters( 284 &single_predicted_parameters); 285 if (status != C_OK) 286 return status; 287 if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters)) 288 return C_STREAM_ERROR; 289 290 SinkStreamSet single_corrected_parameters; 291 status = generators[i]->CorrectedTransformParameters( 292 &single_corrected_parameters); 293 if (status != C_OK) 294 return status; 295 if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters)) 296 return C_STREAM_ERROR; 297 } 298 299 SinkStream linearized_predicted_parameters; 300 SinkStream linearized_corrected_parameters; 301 302 if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters)) 303 return C_STREAM_ERROR; 304 if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters)) 305 return C_STREAM_ERROR; 306 307 SourceStream predicted_parameters_source; 308 SourceStream corrected_parameters_source; 309 predicted_parameters_source.Init(linearized_predicted_parameters); 310 corrected_parameters_source.Init(linearized_corrected_parameters); 311 312 Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source, 313 &corrected_parameters_source, 314 parameter_correction); 315 if (delta1_status != C_OK) 316 return delta1_status; 317 318 // 319 // Generate sub-patch for elements. 320 // 321 corrected_parameters_source.Init(linearized_corrected_parameters); 322 SourceStreamSet corrected_parameters_source_set; 323 if (!corrected_parameters_source_set.Init(&corrected_parameters_source)) 324 return C_STREAM_ERROR; 325 326 SinkStreamSet predicted_transformed_elements; 327 SinkStreamSet corrected_transformed_elements; 328 329 for (size_t i = 0; i < number_of_transformations; ++i) { 330 SourceStreamSet single_parameters; 331 if (!corrected_parameters_source_set.ReadSet(&single_parameters)) 332 return C_STREAM_ERROR; 333 SinkStreamSet single_predicted_transformed_element; 334 SinkStreamSet single_corrected_transformed_element; 335 Status status = generators[i]->Transform( 336 &single_parameters, 337 &single_predicted_transformed_element, 338 &single_corrected_transformed_element); 339 if (status != C_OK) 340 return status; 341 if (!single_parameters.Empty()) 342 return C_STREAM_NOT_CONSUMED; 343 if (!predicted_transformed_elements.WriteSet( 344 &single_predicted_transformed_element)) 345 return C_STREAM_ERROR; 346 if (!corrected_transformed_elements.WriteSet( 347 &single_corrected_transformed_element)) 348 return C_STREAM_ERROR; 349 } 350 351 if (!corrected_parameters_source_set.Empty()) 352 return C_STREAM_NOT_CONSUMED; 353 354 SinkStream linearized_predicted_transformed_elements; 355 SinkStream linearized_corrected_transformed_elements; 356 357 if (!predicted_transformed_elements.CopyTo( 358 &linearized_predicted_transformed_elements)) 359 return C_STREAM_ERROR; 360 if (!corrected_transformed_elements.CopyTo( 361 &linearized_corrected_transformed_elements)) 362 return C_STREAM_ERROR; 363 364 SourceStream predicted_transformed_elements_source; 365 SourceStream corrected_transformed_elements_source; 366 predicted_transformed_elements_source 367 .Init(linearized_predicted_transformed_elements); 368 corrected_transformed_elements_source 369 .Init(linearized_corrected_transformed_elements); 370 371 Status delta2_status = 372 GenerateSimpleDelta(&predicted_transformed_elements_source, 373 &corrected_transformed_elements_source, 374 transformed_elements_correction); 375 if (delta2_status != C_OK) 376 return delta2_status; 377 378 // Last use, free storage. 379 linearized_predicted_transformed_elements.Retire(); 380 381 // 382 // Generate sub-patch for whole enchilada. 383 // 384 SinkStream predicted_ensemble; 385 386 if (!predicted_ensemble.Write(base->Buffer(), base->Remaining())) 387 return C_STREAM_ERROR; 388 389 SourceStreamSet corrected_transformed_elements_source_set; 390 corrected_transformed_elements_source 391 .Init(linearized_corrected_transformed_elements); 392 if (!corrected_transformed_elements_source_set 393 .Init(&corrected_transformed_elements_source)) 394 return C_STREAM_ERROR; 395 396 for (size_t i = 0; i < number_of_transformations; ++i) { 397 SourceStreamSet single_corrected_transformed_element; 398 if (!corrected_transformed_elements_source_set.ReadSet( 399 &single_corrected_transformed_element)) 400 return C_STREAM_ERROR; 401 Status status = generators[i]->Reform(&single_corrected_transformed_element, 402 &predicted_ensemble); 403 if (status != C_OK) 404 return status; 405 if (!single_corrected_transformed_element.Empty()) 406 return C_STREAM_NOT_CONSUMED; 407 } 408 409 if (!corrected_transformed_elements_source_set.Empty()) 410 return C_STREAM_NOT_CONSUMED; 411 412 // No more references to this stream's buffer. 413 linearized_corrected_transformed_elements.Retire(); 414 415 FreeGenerators(&generators); 416 417 size_t final_patch_input_size = predicted_ensemble.Length(); 418 SourceStream predicted_ensemble_source; 419 predicted_ensemble_source.Init(predicted_ensemble); 420 Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source, 421 update, 422 ensemble_correction); 423 if (delta3_status != C_OK) 424 return delta3_status; 425 426 // 427 // Final output stream has a header followed by a StreamSet. 428 // 429 if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) || 430 !final_patch->WriteVarint32(CourgettePatchFile::kVersion) || 431 !final_patch->WriteVarint32(CalculateCrc(old_region.start(), 432 old_region.length())) || 433 !final_patch->WriteVarint32(CalculateCrc(new_region.start(), 434 new_region.length())) || 435 !final_patch->WriteSizeVarint32(final_patch_input_size) || 436 !patch_streams.CopyTo(final_patch)) { 437 return C_STREAM_ERROR; 438 } 439 440 VLOG(1) << "done GenerateEnsemblePatch " 441 << (base::Time::Now() - start_time).InSecondsF() << "s"; 442 443 return C_OK; 444 } 445 446 } // namespace 447