1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // The main idea in Courgette is to do patching *under a tranformation*. The 6 // input is transformed into a new representation, patching occurs in the new 7 // repesentation, and then the tranform is reversed to get the patched data. 8 // 9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble'). 10 // Each of the elements has to go through the same set of steps in lock-step. 11 12 // This file contains the code to create the patch. 13 14 15 #include "courgette/ensemble.h" 16 17 #include <limits> 18 #include <vector> 19 20 #include "base/basictypes.h" 21 #include "base/logging.h" 22 #include "base/time/time.h" 23 24 #include "courgette/crc.h" 25 #include "courgette/difference_estimator.h" 26 #include "courgette/region.h" 27 #include "courgette/simple_delta.h" 28 #include "courgette/streams.h" 29 #include "courgette/third_party/bsdiff.h" 30 31 #include "courgette/patcher_x86_32.h" 32 #include "courgette/patch_generator_x86_32.h" 33 34 namespace courgette { 35 36 TransformationPatchGenerator::TransformationPatchGenerator( 37 Element* old_element, 38 Element* new_element, 39 TransformationPatcher* patcher) 40 : old_element_(old_element), 41 new_element_(new_element), 42 patcher_(patcher) { 43 } 44 45 TransformationPatchGenerator::~TransformationPatchGenerator() { 46 delete patcher_; 47 } 48 49 // The default implementation of PredictTransformParameters delegates to the 50 // patcher. 51 Status TransformationPatchGenerator::PredictTransformParameters( 52 SinkStreamSet* prediction) { 53 return patcher_->PredictTransformParameters(prediction); 54 } 55 56 // The default implementation of Reform delegates to the patcher. 57 Status TransformationPatchGenerator::Reform( 58 SourceStreamSet* transformed_element, 59 SinkStream* reformed_element) { 60 return patcher_->Reform(transformed_element, reformed_element); 61 } 62 63 // Makes a TransformationPatchGenerator of the appropriate variety for the 64 // Element kind. 65 TransformationPatchGenerator* MakeGenerator(Element* old_element, 66 Element* new_element) { 67 switch (new_element->kind()) { 68 case EXE_UNKNOWN: 69 break; 70 case EXE_WIN_32_X86: { 71 TransformationPatchGenerator* generator = 72 new PatchGeneratorX86_32( 73 old_element, 74 new_element, 75 new PatcherX86_32(old_element->region()), 76 EXE_WIN_32_X86); 77 return generator; 78 } 79 case EXE_ELF_32_X86: { 80 TransformationPatchGenerator* generator = 81 new PatchGeneratorX86_32( 82 old_element, 83 new_element, 84 new PatcherX86_32(old_element->region()), 85 EXE_ELF_32_X86); 86 return generator; 87 } 88 case EXE_ELF_32_ARM: { 89 TransformationPatchGenerator* generator = 90 new PatchGeneratorX86_32( 91 old_element, 92 new_element, 93 new PatcherX86_32(old_element->region()), 94 EXE_ELF_32_ARM); 95 return generator; 96 } 97 case EXE_WIN_32_X64: { 98 TransformationPatchGenerator* generator = 99 new PatchGeneratorX86_32( 100 old_element, 101 new_element, 102 new PatcherX86_32(old_element->region()), 103 EXE_WIN_32_X64); 104 return generator; 105 } 106 } 107 108 LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind(); 109 return NULL; 110 } 111 112 // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element 113 // from 'old' is matched as the closest element to multiple elements from 'new'. 114 // Each time this happens, the old element is transformed and serialized. This 115 // is a problem when the old element is huge compared with the new element 116 // because the mutliple serialized copies can be much bigger than the size of 117 // either ensemble. 118 // 119 // The right way to avoid this is to ensure any one element from 'old' is 120 // serialized once, which requires matching code in the patch application. 121 // 122 // This is a quick hack to avoid the problem by prohibiting a big difference in 123 // size between matching elements. 124 bool UnsafeDifference(Element* old_element, Element* new_element) { 125 double kMaxBloat = 2.0; 126 size_t kMinWorrysomeDifference = 2 << 20; // 2MB 127 size_t old_size = old_element->region().length(); 128 size_t new_size = new_element->region().length(); 129 size_t low_size = std::min(old_size, new_size); 130 size_t high_size = std::max(old_size, new_size); 131 if (high_size - low_size < kMinWorrysomeDifference) return false; 132 if (high_size < low_size * kMaxBloat) return false; 133 return true; 134 } 135 136 // FindGenerators finds TransformationPatchGenerators for the elements of 137 // |new_ensemble|. For each element of |new_ensemble| we find the closest 138 // matching element from |old_ensemble| and use that as the basis for 139 // differential compression. The elements have to be the same kind so as to 140 // support transformation into the same kind of 'new representation'. 141 // 142 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble, 143 std::vector<TransformationPatchGenerator*>* generators) { 144 base::Time start_find_time = base::Time::Now(); 145 old_ensemble->FindEmbeddedElements(); 146 new_ensemble->FindEmbeddedElements(); 147 VLOG(1) << "done FindEmbeddedElements " 148 << (base::Time::Now() - start_find_time).InSecondsF(); 149 150 std::vector<Element*> old_elements(old_ensemble->elements()); 151 std::vector<Element*> new_elements(new_ensemble->elements()); 152 153 VLOG(1) << "old has " << old_elements.size() << " elements"; 154 VLOG(1) << "new has " << new_elements.size() << " elements"; 155 156 DifferenceEstimator difference_estimator; 157 std::vector<DifferenceEstimator::Base*> bases; 158 159 base::Time start_bases_time = base::Time::Now(); 160 for (size_t i = 0; i < old_elements.size(); ++i) { 161 bases.push_back( 162 difference_estimator.MakeBase(old_elements[i]->region())); 163 } 164 VLOG(1) << "done make bases " 165 << (base::Time::Now() - start_bases_time).InSecondsF() << "s"; 166 167 for (size_t new_index = 0; new_index < new_elements.size(); ++new_index) { 168 Element* new_element = new_elements[new_index]; 169 DifferenceEstimator::Subject* new_subject = 170 difference_estimator.MakeSubject(new_element->region()); 171 172 // Search through old elements to find the best match. 173 // 174 // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and 175 // new_ensemble probably have a very similar structure. We can make the 176 // search faster by making the comparison provided by DifferenceEstimator 177 // more nuanced, returning early if the measured difference is greater than 178 // the current best. This will be most effective if we can arrange that the 179 // first elements we try to match are likely the 'right' ones. We could 180 // prioritize elements that are of a similar size or similar position in the 181 // sequence of elements. 182 // 183 Element* best_old_element = NULL; 184 size_t best_difference = std::numeric_limits<size_t>::max(); 185 for (size_t old_index = 0; old_index < old_elements.size(); ++old_index) { 186 Element* old_element = old_elements[old_index]; 187 // Elements of different kinds are incompatible. 188 if (old_element->kind() != new_element->kind()) 189 continue; 190 191 if (UnsafeDifference(old_element, new_element)) 192 continue; 193 194 base::Time start_compare = base::Time::Now(); 195 DifferenceEstimator::Base* old_base = bases[old_index]; 196 size_t difference = difference_estimator.Measure(old_base, new_subject); 197 198 VLOG(1) << "Compare " << old_element->Name() 199 << " to " << new_element->Name() 200 << " --> " << difference 201 << " in " << (base::Time::Now() - start_compare).InSecondsF() 202 << "s"; 203 if (difference == 0) { 204 VLOG(1) << "Skip " << new_element->Name() 205 << " - identical to " << old_element->Name(); 206 best_difference = 0; 207 best_old_element = NULL; 208 break; 209 } 210 if (difference < best_difference) { 211 best_difference = difference; 212 best_old_element = old_element; 213 } 214 } 215 216 if (best_old_element) { 217 VLOG(1) << "Matched " << best_old_element->Name() 218 << " to " << new_element->Name() 219 << " --> " << best_difference; 220 TransformationPatchGenerator* generator = 221 MakeGenerator(best_old_element, new_element); 222 if (generator) 223 generators->push_back(generator); 224 } 225 } 226 227 VLOG(1) << "done FindGenerators found " << generators->size() 228 << " in " << (base::Time::Now() - start_find_time).InSecondsF() 229 << "s"; 230 231 return C_OK; 232 } 233 234 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) { 235 for (size_t i = 0; i < generators->size(); ++i) { 236 delete (*generators)[i]; 237 } 238 generators->clear(); 239 } 240 241 //////////////////////////////////////////////////////////////////////////////// 242 243 Status GenerateEnsemblePatch(SourceStream* base, 244 SourceStream* update, 245 SinkStream* final_patch) { 246 VLOG(1) << "start GenerateEnsemblePatch"; 247 base::Time start_time = base::Time::Now(); 248 249 Region old_region(base->Buffer(), base->Remaining()); 250 Region new_region(update->Buffer(), update->Remaining()); 251 Ensemble old_ensemble(old_region, "old"); 252 Ensemble new_ensemble(new_region, "new"); 253 std::vector<TransformationPatchGenerator*> generators; 254 Status generators_status = FindGenerators(&old_ensemble, &new_ensemble, 255 &generators); 256 if (generators_status != C_OK) 257 return generators_status; 258 259 SinkStreamSet patch_streams; 260 261 SinkStream* tranformation_descriptions = patch_streams.stream(0); 262 SinkStream* parameter_correction = patch_streams.stream(1); 263 SinkStream* transformed_elements_correction = patch_streams.stream(2); 264 SinkStream* ensemble_correction = patch_streams.stream(3); 265 266 size_t number_of_transformations = generators.size(); 267 if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations)) 268 return C_STREAM_ERROR; 269 270 for (size_t i = 0; i < number_of_transformations; ++i) { 271 ExecutableType kind = generators[i]->Kind(); 272 if (!tranformation_descriptions->WriteVarint32(kind)) 273 return C_STREAM_ERROR; 274 } 275 276 for (size_t i = 0; i < number_of_transformations; ++i) { 277 Status status = 278 generators[i]->WriteInitialParameters(tranformation_descriptions); 279 if (status != C_OK) 280 return status; 281 } 282 283 // 284 // Generate sub-patch for parameters. 285 // 286 SinkStreamSet predicted_parameters_sink; 287 SinkStreamSet corrected_parameters_sink; 288 289 for (size_t i = 0; i < number_of_transformations; ++i) { 290 SinkStreamSet single_predicted_parameters; 291 Status status; 292 status = generators[i]->PredictTransformParameters( 293 &single_predicted_parameters); 294 if (status != C_OK) 295 return status; 296 if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters)) 297 return C_STREAM_ERROR; 298 299 SinkStreamSet single_corrected_parameters; 300 status = generators[i]->CorrectedTransformParameters( 301 &single_corrected_parameters); 302 if (status != C_OK) 303 return status; 304 if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters)) 305 return C_STREAM_ERROR; 306 } 307 308 SinkStream linearized_predicted_parameters; 309 SinkStream linearized_corrected_parameters; 310 311 if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters)) 312 return C_STREAM_ERROR; 313 if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters)) 314 return C_STREAM_ERROR; 315 316 SourceStream predicted_parameters_source; 317 SourceStream corrected_parameters_source; 318 predicted_parameters_source.Init(linearized_predicted_parameters); 319 corrected_parameters_source.Init(linearized_corrected_parameters); 320 321 Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source, 322 &corrected_parameters_source, 323 parameter_correction); 324 if (delta1_status != C_OK) 325 return delta1_status; 326 327 // 328 // Generate sub-patch for elements. 329 // 330 corrected_parameters_source.Init(linearized_corrected_parameters); 331 SourceStreamSet corrected_parameters_source_set; 332 if (!corrected_parameters_source_set.Init(&corrected_parameters_source)) 333 return C_STREAM_ERROR; 334 335 SinkStreamSet predicted_transformed_elements; 336 SinkStreamSet corrected_transformed_elements; 337 338 for (size_t i = 0; i < number_of_transformations; ++i) { 339 SourceStreamSet single_parameters; 340 if (!corrected_parameters_source_set.ReadSet(&single_parameters)) 341 return C_STREAM_ERROR; 342 SinkStreamSet single_predicted_transformed_element; 343 SinkStreamSet single_corrected_transformed_element; 344 Status status = generators[i]->Transform( 345 &single_parameters, 346 &single_predicted_transformed_element, 347 &single_corrected_transformed_element); 348 if (status != C_OK) 349 return status; 350 if (!single_parameters.Empty()) 351 return C_STREAM_NOT_CONSUMED; 352 if (!predicted_transformed_elements.WriteSet( 353 &single_predicted_transformed_element)) 354 return C_STREAM_ERROR; 355 if (!corrected_transformed_elements.WriteSet( 356 &single_corrected_transformed_element)) 357 return C_STREAM_ERROR; 358 } 359 360 if (!corrected_parameters_source_set.Empty()) 361 return C_STREAM_NOT_CONSUMED; 362 363 SinkStream linearized_predicted_transformed_elements; 364 SinkStream linearized_corrected_transformed_elements; 365 366 if (!predicted_transformed_elements.CopyTo( 367 &linearized_predicted_transformed_elements)) 368 return C_STREAM_ERROR; 369 if (!corrected_transformed_elements.CopyTo( 370 &linearized_corrected_transformed_elements)) 371 return C_STREAM_ERROR; 372 373 SourceStream predicted_transformed_elements_source; 374 SourceStream corrected_transformed_elements_source; 375 predicted_transformed_elements_source 376 .Init(linearized_predicted_transformed_elements); 377 corrected_transformed_elements_source 378 .Init(linearized_corrected_transformed_elements); 379 380 Status delta2_status = 381 GenerateSimpleDelta(&predicted_transformed_elements_source, 382 &corrected_transformed_elements_source, 383 transformed_elements_correction); 384 if (delta2_status != C_OK) 385 return delta2_status; 386 387 // Last use, free storage. 388 linearized_predicted_transformed_elements.Retire(); 389 390 // 391 // Generate sub-patch for whole enchilada. 392 // 393 SinkStream predicted_ensemble; 394 395 if (!predicted_ensemble.Write(base->Buffer(), base->Remaining())) 396 return C_STREAM_ERROR; 397 398 SourceStreamSet corrected_transformed_elements_source_set; 399 corrected_transformed_elements_source 400 .Init(linearized_corrected_transformed_elements); 401 if (!corrected_transformed_elements_source_set 402 .Init(&corrected_transformed_elements_source)) 403 return C_STREAM_ERROR; 404 405 for (size_t i = 0; i < number_of_transformations; ++i) { 406 SourceStreamSet single_corrected_transformed_element; 407 if (!corrected_transformed_elements_source_set.ReadSet( 408 &single_corrected_transformed_element)) 409 return C_STREAM_ERROR; 410 Status status = generators[i]->Reform(&single_corrected_transformed_element, 411 &predicted_ensemble); 412 if (status != C_OK) 413 return status; 414 if (!single_corrected_transformed_element.Empty()) 415 return C_STREAM_NOT_CONSUMED; 416 } 417 418 if (!corrected_transformed_elements_source_set.Empty()) 419 return C_STREAM_NOT_CONSUMED; 420 421 // No more references to this stream's buffer. 422 linearized_corrected_transformed_elements.Retire(); 423 424 FreeGenerators(&generators); 425 426 size_t final_patch_input_size = predicted_ensemble.Length(); 427 SourceStream predicted_ensemble_source; 428 predicted_ensemble_source.Init(predicted_ensemble); 429 Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source, 430 update, 431 ensemble_correction); 432 if (delta3_status != C_OK) 433 return delta3_status; 434 435 // 436 // Final output stream has a header followed by a StreamSet. 437 // 438 if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) || 439 !final_patch->WriteVarint32(CourgettePatchFile::kVersion) || 440 !final_patch->WriteVarint32(CalculateCrc(old_region.start(), 441 old_region.length())) || 442 !final_patch->WriteVarint32(CalculateCrc(new_region.start(), 443 new_region.length())) || 444 !final_patch->WriteSizeVarint32(final_patch_input_size) || 445 !patch_streams.CopyTo(final_patch)) { 446 return C_STREAM_ERROR; 447 } 448 449 VLOG(1) << "done GenerateEnsemblePatch " 450 << (base::Time::Now() - start_time).InSecondsF() << "s"; 451 452 return C_OK; 453 } 454 455 } // namespace 456