1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // Implements a Decoder for the format described in 17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. 18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html 19 // 20 // The RFC describes the possibility of using a secondary compressor 21 // to further reduce the size of each section of the VCDIFF output. 22 // That feature is not supported in this implementation of the encoder 23 // and decoder. 24 // No secondary compressor types have been publicly registered with 25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids 26 // in the more than five years since the registry was created, so there 27 // is no standard set of compressor IDs which would be generated by other 28 // encoders or accepted by other decoders. 29 30 #include <config.h> 31 #include "google/vcdecoder.h" 32 #include <stddef.h> // size_t, ptrdiff_t 33 #include <stdint.h> // int32_t 34 #include <string.h> // memcpy, memset 35 #include <memory> // auto_ptr 36 #include <string> 37 #include "addrcache.h" 38 #include "checksum.h" 39 #include "codetable.h" 40 #include "decodetable.h" 41 #include "headerparser.h" 42 #include "logging.h" 43 #include "google/output_string.h" 44 #include "varint_bigendian.h" 45 #include "vcdiff_defs.h" 46 47 namespace open_vcdiff { 48 49 // This class is used to parse delta file windows as described 50 // in RFC sections 4.2 and 4.3. Its methods are not thread-safe. 51 // 52 // Here is the window format copied from the RFC: 53 // 54 // Window1 55 // Win_Indicator - byte 56 // [Source segment size] - integer 57 // [Source segment position] - integer 58 // The delta encoding of the target window 59 // Length of the delta encoding - integer 60 // The delta encoding 61 // Size of the target window - integer 62 // Delta_Indicator - byte 63 // Length of data for ADDs and RUNs - integer 64 // Length of instructions and sizes - integer 65 // Length of addresses for COPYs - integer 66 // Data section for ADDs and RUNs - array of bytes 67 // Instructions and sizes section - array of bytes 68 // Addresses section for COPYs - array of bytes 69 // Window2 70 // ... 71 // 72 // Sample usage: 73 // 74 // VCDiffDeltaFileWindow delta_window_; 75 // delta_window_.Init(parent); 76 // ParseableChunk parseable_chunk(input_buffer, 77 // input_size, 78 // leftover_unencoded_bytes); 79 // switch (delta_window_.DecodeWindows(&parseable_chunk)) { 80 // case RESULT_END_OF_DATA: 81 // <Read more input and retry DecodeWindows later.> 82 // case RESULT_ERROR: 83 // <Handle error case. An error log message has already been generated.> 84 // } 85 // 86 // DecodeWindows consumes as many windows from the input as it can. It only 87 // needs to be placed within a loop if the loop is used to obtain more input 88 // (delta file) data. 89 // 90 class VCDiffDeltaFileWindow { 91 public: 92 VCDiffDeltaFileWindow(); 93 ~VCDiffDeltaFileWindow(); 94 95 // Init() should be called immediately after constructing the 96 // VCDiffDeltaFileWindow(). It must be called before DecodeWindows() can be 97 // invoked, or an error will occur. 98 void Init(VCDiffStreamingDecoderImpl* parent); 99 100 // Resets the pointers to the data sections in the current window. 101 void Reset(); 102 103 bool UseCodeTable(const VCDiffCodeTableData& code_table_data, 104 unsigned char max_mode) { 105 return reader_.UseCodeTable(code_table_data, max_mode); 106 } 107 108 // Decodes as many delta windows as possible using the input data from 109 // *parseable_chunk. Appends the decoded target windows to 110 // parent_->decoded_target(). Returns RESULT_SUCCESS on success, or 111 // RESULT_END_OF_DATA if the end of input was reached before the entire window 112 // could be decoded and more input is expected (only possible if 113 // IsInterleaved() is true), or RESULT_ERROR if an error occurred during 114 // decoding. In the RESULT_ERROR case, the value of parseable_chunk->pointer_ 115 // is undefined; otherwise, parseable_chunk->Advance() is called to point to 116 // the input data position just after the data that has been decoded. 117 // 118 // If planned_target_file_size is not set to kUnlimitedBytes, then the decoder 119 // expects *exactly* this number of target bytes to be decoded from one or 120 // more delta file windows. If this number is met exactly after finishing a 121 // delta window, this function will return RESULT_SUCCESS without processing 122 // any more bytes from data_pointer. If this number is exceeded while 123 // decoding a window, but was not met before starting that window, 124 // then RESULT_ERROR will be returned. 125 // 126 VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk); 127 128 bool FoundWindowHeader() const { 129 return found_header_; 130 } 131 132 bool MoreDataExpected() const { 133 // When parsing an interleaved-format delta file, 134 // every time DecodeBody() exits, interleaved_bytes_expected_ 135 // will be decremented by the number of bytes parsed. If it 136 // reaches zero, then there is no more data expected because 137 // the size of the interleaved section (given in the window 138 // header) has been reached. 139 return IsInterleaved() && (interleaved_bytes_expected_ > 0); 140 } 141 142 size_t target_window_start_pos() const { return target_window_start_pos_; } 143 144 void set_target_window_start_pos(size_t new_start_pos) { 145 target_window_start_pos_ = new_start_pos; 146 } 147 148 // Returns the number of bytes remaining to be decoded in the target window. 149 // If not in the process of decoding a window, returns 0. 150 size_t TargetBytesRemaining(); 151 152 private: 153 // Reads the header of the window section as described in RFC sections 4.2 and 154 // 4.3, up to and including the value "Length of addresses for COPYs". If the 155 // entire header is found, this function sets up the DeltaWindowSections 156 // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so 157 // that the decoder can begin decoding the opcodes in these sections. Returns 158 // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of 159 // available data was reached before the entire header could be read. (The 160 // latter may be an error condition if there is no more data available.) 161 // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the 162 // parsed header. 163 // 164 VCDiffResult ReadHeader(ParseableChunk* parseable_chunk); 165 166 // After the window header has been parsed as far as the Delta_Indicator, 167 // this function is called to parse the following delta window header fields: 168 // 169 // Length of data for ADDs and RUNs - integer (VarintBE format) 170 // Length of instructions and sizes - integer (VarintBE format) 171 // Length of addresses for COPYs - integer (VarintBE format) 172 // 173 // If has_checksum_ is true, it also looks for the following element: 174 // 175 // Adler32 checksum - unsigned 32-bit integer (VarintBE format) 176 // 177 // It sets up the DeltaWindowSections instructions_and_sizes_, 178 // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format 179 // is being used, all three sections will include the entire window body; if 180 // the standard format is used, three non-overlapping window sections will be 181 // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA 182 // if standard format is being used and there is not enough input data to read 183 // the entire window body. Otherwise, returns RESULT_SUCCESS. 184 VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser); 185 186 // Decodes the body of the window section as described in RFC sections 4.3, 187 // including the sections "Data section for ADDs and RUNs", "Instructions 188 // and sizes section", and "Addresses section for COPYs". These sections 189 // must already have been set up by ReadWindowHeader(). Returns a 190 // non-negative value on success, or RESULT_END_OF_DATA if the end of input 191 // was reached before the entire window could be decoded (only possible if 192 // IsInterleaved() is true), or RESULT_ERROR if an error occurred during 193 // decoding. Appends as much of the decoded target window as possible to 194 // parent->decoded_target(). 195 // 196 int DecodeBody(ParseableChunk* parseable_chunk); 197 198 // Returns the number of bytes already decoded into the target window. 199 size_t TargetBytesDecoded(); 200 201 // Decodes a single ADD instruction, updating parent_->decoded_target_. 202 VCDiffResult DecodeAdd(size_t size); 203 204 // Decodes a single RUN instruction, updating parent_->decoded_target_. 205 VCDiffResult DecodeRun(size_t size); 206 207 // Decodes a single COPY instruction, updating parent_->decoded_target_. 208 VCDiffResult DecodeCopy(size_t size, unsigned char mode); 209 210 // When using the interleaved format, this function is called both on parsing 211 // the header and on resuming after a RESULT_END_OF_DATA was returned from a 212 // previous call to DecodeBody(). It sets up all three section pointers to 213 // reference the same interleaved stream of instructions, sizes, addresses, 214 // and data. These pointers must be reset every time that work resumes on a 215 // delta window, because the input data string may have been changed or 216 // resized since DecodeBody() last returned. 217 void UpdateInterleavedSectionPointers(const char* data_pos, 218 const char* data_end) { 219 const ptrdiff_t available_data = data_end - data_pos; 220 // Don't read past the end of currently-available data 221 if (available_data > interleaved_bytes_expected_) { 222 instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_); 223 } else { 224 instructions_and_sizes_.Init(data_pos, available_data); 225 } 226 data_for_add_and_run_.Init(&instructions_and_sizes_); 227 addresses_for_copy_.Init(&instructions_and_sizes_); 228 } 229 230 // If true, the interleaved format described in AllowInterleaved() is used 231 // for the current delta file. Only valid after ReadWindowHeader() has been 232 // called and returned a positive number (i.e., the whole header was parsed), 233 // but before the window has finished decoding. 234 // 235 bool IsInterleaved() const { 236 // If the sections are interleaved, both addresses_for_copy_ and 237 // data_for_add_and_run_ should point at instructions_and_sizes_. 238 return !addresses_for_copy_.IsOwned(); 239 } 240 241 // Executes a single COPY or ADD instruction, appending data to 242 // parent_->decoded_target(). 243 void CopyBytes(const char* data, size_t size); 244 245 // Executes a single RUN instruction, appending data to 246 // parent_->decoded_target(). 247 void RunByte(unsigned char byte, size_t size); 248 249 // Advance *parseable_chunk to point to the current position in the 250 // instructions/sizes section. If interleaved format is used, then 251 // decrement the number of expected bytes in the instructions/sizes section 252 // by the number of instruction/size bytes parsed. 253 void UpdateInstructionPointer(ParseableChunk* parseable_chunk); 254 255 // The parent object which was passed to Init(). 256 VCDiffStreamingDecoderImpl* parent_; 257 258 // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader() 259 // has been called and succeeded in parsing the delta window header, but the 260 // entire window has not yet been decoded. 261 bool found_header_; 262 263 // Contents and length of the current source window. source_segment_ptr_ 264 // will be non-NULL if (a) the window section header for the current window 265 // has been read, but the window has not yet finished decoding; or 266 // (b) the window did not specify a source segment. 267 const char* source_segment_ptr_; 268 size_t source_segment_length_; 269 270 // The delta encoding window sections as defined in RFC section 4.3. 271 // The pointer for each section will be incremented as data is consumed and 272 // decoded from that section. If the interleaved format is used, 273 // data_for_add_and_run_ and addresses_for_copy_ will both point to 274 // instructions_and_sizes_; otherwise, they will be separate data sections. 275 // 276 DeltaWindowSection instructions_and_sizes_; 277 DeltaWindowSection data_for_add_and_run_; 278 DeltaWindowSection addresses_for_copy_; 279 280 // The expected bytes left to decode in instructions_and_sizes_. Only used 281 // for the interleaved format. 282 int interleaved_bytes_expected_; 283 284 // The expected length of the target window once it has been decoded. 285 size_t target_window_length_; 286 287 // The index in decoded_target at which the first byte of the current 288 // target window was/will be written. 289 size_t target_window_start_pos_; 290 291 // If has_checksum_ is true, then expected_checksum_ contains an Adler32 292 // checksum of the target window data. This is an extension included in the 293 // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard. 294 bool has_checksum_; 295 VCDChecksum expected_checksum_; 296 297 VCDiffCodeTableReader reader_; 298 299 // Making these private avoids implicit copy constructor & assignment operator 300 VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT 301 void operator=(const VCDiffDeltaFileWindow&); 302 }; 303 304 // *** Inline methods for VCDiffDeltaFileWindow 305 306 inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) { 307 Reset(); 308 } 309 310 inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { } 311 312 inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) { 313 parent_ = parent; 314 } 315 316 class VCDiffStreamingDecoderImpl { 317 public: 318 typedef std::string string; 319 320 // The default maximum target file size (and target window size) if 321 // SetMaximumTargetFileSize() is not called. 322 static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB 323 324 // The largest value that can be passed to SetMaximumTargetWindowSize(). 325 // Using a larger value will result in an error. 326 static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX 327 328 // A constant that is the default value for planned_target_file_size_, 329 // indicating that the decoder does not have an expected length 330 // for the target data. 331 static const size_t kUnlimitedBytes = static_cast<size_t>(-3); 332 333 VCDiffStreamingDecoderImpl(); 334 ~VCDiffStreamingDecoderImpl(); 335 336 // Resets all member variables to their initial states. 337 void Reset(); 338 339 // These functions are identical to their counterparts 340 // in VCDiffStreamingDecoder. 341 // 342 void StartDecoding(const char* dictionary_ptr, size_t dictionary_size); 343 344 bool DecodeChunk(const char* data, 345 size_t len, 346 OutputStringInterface* output_string); 347 348 bool FinishDecoding(); 349 350 // If true, the version of VCDIFF used in the current delta file allows 351 // for the interleaved format, in which instructions, addresses and data 352 // are all sent interleaved in the instructions section of each window 353 // rather than being sent in separate sections. This is not part of 354 // the VCDIFF draft standard, so we've defined a special version code 355 // 'S' which implies that this feature is available. Even if interleaving 356 // is supported, it is not mandatory; interleaved format will be implied 357 // if the address and data sections are both zero-length. 358 // 359 bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; } 360 361 // If true, the version of VCDIFF used in the current delta file allows 362 // each delta window to contain an Adler32 checksum of the target window data. 363 // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then 364 // this checksum will appear as a variable-length integer, just after the 365 // "length of addresses for COPYs" value and before the window data sections. 366 // It is possible for some windows in a delta file to use the checksum feature 367 // and for others not to use it (and leave the flag bit set to 0.) 368 // Just as with AllowInterleaved(), this extension is not part of the draft 369 // standard and is only available when the version code 'S' is specified. 370 // 371 bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; } 372 373 bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) { 374 maximum_target_file_size_ = new_maximum_target_file_size; 375 return true; 376 } 377 378 bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) { 379 if (new_maximum_target_window_size > kTargetSizeLimit) { 380 LOG(ERROR) << "Specified maximum target window size " 381 << new_maximum_target_window_size << " exceeds limit of " 382 << kTargetSizeLimit << " bytes" << LOG_ENDL; 383 return false; 384 } 385 maximum_target_window_size_ = new_maximum_target_window_size; 386 return true; 387 } 388 389 // See description of planned_target_file_size_, below. 390 bool HasPlannedTargetFileSize() const { 391 return planned_target_file_size_ != kUnlimitedBytes; 392 } 393 394 void SetPlannedTargetFileSize(size_t planned_target_file_size) { 395 planned_target_file_size_ = planned_target_file_size; 396 } 397 398 void AddToTotalTargetWindowSize(size_t window_size) { 399 total_of_target_window_sizes_ += window_size; 400 } 401 402 // Checks to see whether the decoded target data has reached its planned size. 403 bool ReachedPlannedTargetFileSize() const { 404 if (!HasPlannedTargetFileSize()) { 405 return false; 406 } 407 // The planned target file size should not have been exceeded. 408 // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of 409 // each target window would not make the target file exceed that limit, and 410 // DecodeBody() will return RESULT_ERROR if the actual decoded output ever 411 // exceeds the advertised target window size. 412 if (total_of_target_window_sizes_ > planned_target_file_size_) { 413 LOG(DFATAL) << "Internal error: Decoded data size " 414 << total_of_target_window_sizes_ 415 << " exceeds planned target file size " 416 << planned_target_file_size_ << LOG_ENDL; 417 return true; 418 } 419 return total_of_target_window_sizes_ == planned_target_file_size_; 420 } 421 422 // Checks to see whether adding a new target window of the specified size 423 // would exceed the planned target file size, the maximum target file size, 424 // or the maximum target window size. If so, logs an error and returns true; 425 // otherwise, returns false. 426 bool TargetWindowWouldExceedSizeLimits(size_t window_size) const; 427 428 // Returns the amount of input data passed to the last DecodeChunk() 429 // that was not consumed by the decoder. This is essential if 430 // SetPlannedTargetFileSize() is being used, in order to preserve the 431 // remaining input data stream once the planned target file has been decoded. 432 size_t GetUnconsumedDataSize() const { 433 return unparsed_bytes_.size(); 434 } 435 436 // This function will return true if the decoder has parsed a complete delta 437 // file header plus zero or more delta file windows, with no data left over. 438 // It will also return true if no delta data at all was decoded. If these 439 // conditions are not met, then FinishDecoding() should not be called. 440 bool IsDecodingComplete() const { 441 if (!FoundFileHeader()) { 442 // No complete delta file header has been parsed yet. DecodeChunk() 443 // may have received some data that it hasn't yet parsed, in which case 444 // decoding is incomplete. 445 return unparsed_bytes_.empty(); 446 } else if (custom_code_table_decoder_.get()) { 447 // The decoder is in the middle of parsing a custom code table. 448 return false; 449 } else if (delta_window_.FoundWindowHeader()) { 450 // The decoder is in the middle of parsing an interleaved format delta 451 // window. 452 return false; 453 } else if (ReachedPlannedTargetFileSize()) { 454 // The decoder found exactly the planned number of bytes. In this case 455 // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover 456 // data after the end of the delta file. 457 return true; 458 } else { 459 // No complete delta file window has been parsed yet. DecodeChunk() 460 // may have received some data that it hasn't yet parsed, in which case 461 // decoding is incomplete. 462 return unparsed_bytes_.empty(); 463 } 464 } 465 466 const char* dictionary_ptr() const { return dictionary_ptr_; } 467 468 size_t dictionary_size() const { return dictionary_size_; } 469 470 VCDiffAddressCache* addr_cache() { return addr_cache_.get(); } 471 472 string* decoded_target() { return &decoded_target_; } 473 474 bool allow_vcd_target() const { return allow_vcd_target_; } 475 476 void SetAllowVcdTarget(bool allow_vcd_target) { 477 if (start_decoding_was_called_) { 478 LOG(DFATAL) << "SetAllowVcdTarget() called after StartDecoding()" 479 << LOG_ENDL; 480 return; 481 } 482 allow_vcd_target_ = allow_vcd_target; 483 } 484 485 // Removes the contents of decoded_target_ that precede the beginning of the 486 // current window. 487 void TruncateToBeginningOfWindow(); 488 489 private: 490 // Reads the VCDiff delta file header section as described in RFC section 4.1, 491 // except the custom code table data. Returns RESULT_ERROR if an error 492 // occurred, or RESULT_END_OF_DATA if the end of available data was reached 493 // before the entire header could be read. (The latter may be an error 494 // condition if there is no more data available.) Otherwise, advances 495 // data->position_ past the header and returns RESULT_SUCCESS. 496 // 497 VCDiffResult ReadDeltaFileHeader(ParseableChunk* data); 498 499 // Indicates whether or not the header has already been read. 500 bool FoundFileHeader() const { return addr_cache_.get() != NULL; } 501 502 // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta 503 // file header, this function parses the custom cache sizes and initializes 504 // a nested VCDiffStreamingDecoderImpl object that will be used to parse the 505 // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an 506 // error occurred, or RESULT_END_OF_DATA if the end of available data was 507 // reached before the custom cache sizes could be read. Otherwise, returns 508 // the number of bytes read. 509 // 510 int InitCustomCodeTable(const char* data_start, const char* data_end); 511 512 // If a custom code table was specified in the header section that was parsed 513 // by ReadDeltaFileHeader(), this function makes a recursive call to another 514 // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the 515 // custom code table is expected to be supplied as an embedded VCDIFF 516 // encoding that uses the standard code table. Returns RESULT_ERROR if an 517 // error occurs, or RESULT_END_OF_DATA if the end of available data was 518 // reached before the entire custom code table could be read. Otherwise, 519 // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded 520 // custom code table. If the function returns RESULT_SUCCESS or 521 // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes. 522 // 523 VCDiffResult ReadCustomCodeTable(ParseableChunk* data); 524 525 // Contents and length of the source (dictionary) data. 526 const char* dictionary_ptr_; 527 size_t dictionary_size_; 528 529 // This string will be used to store any unparsed bytes left over when 530 // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA. 531 // It will also be used to concatenate those unparsed bytes with the data 532 // supplied to the next call to DecodeChunk(), so that they appear in 533 // contiguous memory. 534 string unparsed_bytes_; 535 536 // The portion of the target file that has been decoded so far. This will be 537 // used to fill the output string for DecodeChunk(), and will also be used to 538 // execute COPY instructions that reference target data. Since the source 539 // window can come from a range of addresses in the previously decoded target 540 // data, the entire target file needs to be available to the decoder, not just 541 // the current target window. 542 string decoded_target_; 543 544 // The VCDIFF version byte (also known as "header4") from the 545 // delta file header. 546 unsigned char vcdiff_version_code_; 547 548 VCDiffDeltaFileWindow delta_window_; 549 550 std::auto_ptr<VCDiffAddressCache> addr_cache_; 551 552 // Will be NULL unless a custom code table has been defined. 553 std::auto_ptr<VCDiffCodeTableData> custom_code_table_; 554 555 // Used to receive the decoded custom code table. 556 string custom_code_table_string_; 557 558 // If a custom code table is specified, it will be expressed 559 // as an embedded VCDIFF delta file which uses the default code table 560 // as the source file (dictionary). Use a child decoder object 561 // to decode that delta file. 562 std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_; 563 564 // If set, then the decoder is expecting *exactly* this number of 565 // target bytes to be decoded from one or more delta file windows. 566 // If this number is exceeded while decoding a window, but was not met 567 // before starting on that window, an error will be reported. 568 // If FinishDecoding() is called before this number is met, an error 569 // will also be reported. This feature is used for decoding the 570 // embedded code table data within a VCDIFF delta file; we want to 571 // stop processing the embedded data once the entire code table has 572 // been decoded, and treat the rest of the available data as part 573 // of the enclosing delta file. 574 size_t planned_target_file_size_; 575 576 size_t maximum_target_file_size_; 577 578 size_t maximum_target_window_size_; 579 580 // Contains the sum of the decoded sizes of all target windows seen so far, 581 // including the expected total size of the current target window in progress 582 // (even if some of the current target window has not yet been decoded.) 583 size_t total_of_target_window_sizes_; 584 585 // This value is used to ensure the correct order of calls to the interface 586 // functions, i.e., a single call to StartDecoding(), followed by zero or 587 // more calls to DecodeChunk(), followed by a single call to 588 // FinishDecoding(). 589 bool start_decoding_was_called_; 590 591 // If this value is true then the VCD_TARGET flag can be specified to allow 592 // the source segment to be chosen from the previously-decoded target data. 593 // (This is the default behavior.) If it is false, then specifying the 594 // VCD_TARGET flag is considered an error, and the decoder does not need to 595 // keep in memory any decoded target data prior to the current window. 596 bool allow_vcd_target_; 597 598 // Making these private avoids implicit copy constructor & assignment operator 599 VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT 600 void operator=(const VCDiffStreamingDecoderImpl&); 601 }; 602 603 // *** Methods for VCDiffStreamingDecoderImpl 604 605 const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize; 606 const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes; 607 608 VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl() 609 : maximum_target_file_size_(kDefaultMaximumTargetFileSize), 610 maximum_target_window_size_(kDefaultMaximumTargetFileSize), 611 allow_vcd_target_(true) { 612 delta_window_.Init(this); 613 Reset(); 614 } 615 616 // Reset() will delete the component objects without reallocating them. 617 VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); } 618 619 void VCDiffStreamingDecoderImpl::Reset() { 620 start_decoding_was_called_ = false; 621 dictionary_ptr_ = NULL; 622 dictionary_size_ = 0; 623 vcdiff_version_code_ = '\0'; 624 planned_target_file_size_ = kUnlimitedBytes; 625 total_of_target_window_sizes_ = 0; 626 addr_cache_.reset(); 627 custom_code_table_.reset(); 628 custom_code_table_decoder_.reset(); 629 delta_window_.Reset(); 630 } 631 632 void VCDiffStreamingDecoderImpl::TruncateToBeginningOfWindow() { 633 // Conserve the data for the current window that has been partially decoded. 634 decoded_target_.erase(0, delta_window_.target_window_start_pos()); 635 delta_window_.set_target_window_start_pos(0); 636 } 637 638 void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr, 639 size_t dictionary_size) { 640 if (start_decoding_was_called_) { 641 LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()" 642 << LOG_ENDL; 643 return; 644 } 645 unparsed_bytes_.clear(); 646 decoded_target_.clear(); // delta_window_.Reset() depends on this 647 Reset(); 648 dictionary_ptr_ = dictionary_ptr; 649 dictionary_size_ = dictionary_size; 650 start_decoding_was_called_ = true; 651 } 652 653 // Reads the VCDiff delta file header section as described in RFC section 4.1: 654 // 655 // Header1 - byte = 0xD6 (ASCII 'V' | 0x80) 656 // Header2 - byte = 0xC3 (ASCII 'C' | 0x80) 657 // Header3 - byte = 0xC4 (ASCII 'D' | 0x80) 658 // Header4 - byte 659 // Hdr_Indicator - byte 660 // [Secondary compressor ID] - byte 661 // [Length of code table data] - integer 662 // [Code table data] 663 // 664 // Initializes the code table and address cache objects. Returns RESULT_ERROR 665 // if an error occurred, and RESULT_END_OF_DATA if the end of available data was 666 // reached before the entire header could be read. (The latter may be an error 667 // condition if there is no more data available.) Otherwise, returns 668 // RESULT_SUCCESS, and removes the header bytes from the data string. 669 // 670 // It's relatively inefficient to expect this function to parse any number of 671 // input bytes available, down to 1 byte, but it is necessary in case the input 672 // is not a properly formatted VCDIFF delta file. If the entire input consists 673 // of two bytes "12", then we should recognize that it does not match the 674 // initial VCDIFF magic number "VCD" and report an error, rather than waiting 675 // indefinitely for more input that will never arrive. 676 // 677 VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader( 678 ParseableChunk* data) { 679 if (FoundFileHeader()) { 680 return RESULT_SUCCESS; 681 } 682 size_t data_size = data->UnparsedSize(); 683 const DeltaFileHeader* header = 684 reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData()); 685 bool wrong_magic_number = false; 686 switch (data_size) { 687 // Verify only the bytes that are available. 688 default: 689 // Found header contents up to and including VCDIFF version 690 vcdiff_version_code_ = header->header4; 691 if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284) 692 (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol 693 LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL; 694 return RESULT_ERROR; 695 } 696 // fall through 697 case 3: 698 if (header->header3 != 0xC4) { // magic value 'D' | 0x80 699 wrong_magic_number = true; 700 } 701 // fall through 702 case 2: 703 if (header->header2 != 0xC3) { // magic value 'C' | 0x80 704 wrong_magic_number = true; 705 } 706 // fall through 707 case 1: 708 if (header->header1 != 0xD6) { // magic value 'V' | 0x80 709 wrong_magic_number = true; 710 } 711 // fall through 712 case 0: 713 if (wrong_magic_number) { 714 LOG(ERROR) << "Did not find VCDIFF header bytes; " 715 "input is not a VCDIFF delta file" << LOG_ENDL; 716 return RESULT_ERROR; 717 } 718 if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA; 719 } 720 // Secondary compressor not supported. 721 if (header->hdr_indicator & VCD_DECOMPRESS) { 722 LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL; 723 return RESULT_ERROR; 724 } 725 if (header->hdr_indicator & VCD_CODETABLE) { 726 int bytes_parsed = InitCustomCodeTable( 727 data->UnparsedData() + sizeof(DeltaFileHeader), 728 data->End()); 729 switch (bytes_parsed) { 730 case RESULT_ERROR: 731 return RESULT_ERROR; 732 case RESULT_END_OF_DATA: 733 return RESULT_END_OF_DATA; 734 default: 735 data->Advance(sizeof(DeltaFileHeader) + bytes_parsed); 736 } 737 } else { 738 addr_cache_.reset(new VCDiffAddressCache); 739 // addr_cache_->Init() will be called 740 // from VCDiffStreamingDecoderImpl::DecodeChunk() 741 data->Advance(sizeof(DeltaFileHeader)); 742 } 743 return RESULT_SUCCESS; 744 } 745 746 int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start, 747 const char* data_end) { 748 // A custom code table is being specified. Parse the variable-length 749 // cache sizes and begin parsing the encoded custom code table. 750 int32_t near_cache_size = 0, same_cache_size = 0; 751 VCDiffHeaderParser header_parser(data_start, data_end); 752 if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) { 753 return header_parser.GetResult(); 754 } 755 if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) { 756 return header_parser.GetResult(); 757 } 758 custom_code_table_.reset(new struct VCDiffCodeTableData); 759 memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData)); 760 custom_code_table_string_.clear(); 761 addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size)); 762 // addr_cache_->Init() will be called 763 // from VCDiffStreamingDecoderImpl::DecodeChunk() 764 765 // If we reach this point (the start of the custom code table) 766 // without encountering a RESULT_END_OF_DATA condition, then we won't call 767 // ReadDeltaFileHeader() again for this delta file. 768 // 769 // Instantiate a recursive decoder to interpret the custom code table 770 // as a VCDIFF encoding of the default code table. 771 custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl); 772 custom_code_table_decoder_->StartDecoding( 773 reinterpret_cast<const char*>( 774 &VCDiffCodeTableData::kDefaultCodeTableData), 775 sizeof(VCDiffCodeTableData::kDefaultCodeTableData)); 776 custom_code_table_decoder_->SetPlannedTargetFileSize( 777 sizeof(*custom_code_table_)); 778 return static_cast<int>(header_parser.ParsedSize()); 779 } 780 781 VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable( 782 ParseableChunk* data) { 783 if (!custom_code_table_decoder_.get()) { 784 return RESULT_SUCCESS; 785 } 786 if (!custom_code_table_.get()) { 787 LOG(DFATAL) << "Internal error: custom_code_table_decoder_ is set," 788 " but custom_code_table_ is NULL" << LOG_ENDL; 789 return RESULT_ERROR; 790 } 791 OutputString<string> output_string(&custom_code_table_string_); 792 if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(), 793 data->UnparsedSize(), 794 &output_string)) { 795 return RESULT_ERROR; 796 } 797 if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) { 798 // Skip over the consumed data. 799 data->Finish(); 800 return RESULT_END_OF_DATA; 801 } 802 if (!custom_code_table_decoder_->FinishDecoding()) { 803 return RESULT_ERROR; 804 } 805 if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) { 806 LOG(DFATAL) << "Decoded custom code table size (" 807 << custom_code_table_string_.length() 808 << ") does not match size of a code table (" 809 << sizeof(*custom_code_table_) << ")" << LOG_ENDL; 810 return RESULT_ERROR; 811 } 812 memcpy(custom_code_table_.get(), 813 custom_code_table_string_.data(), 814 sizeof(*custom_code_table_)); 815 custom_code_table_string_.clear(); 816 // Skip over the consumed data. 817 data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize()); 818 custom_code_table_decoder_.reset(); 819 delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode()); 820 return RESULT_SUCCESS; 821 } 822 823 namespace { 824 825 class TrackNewOutputText { 826 public: 827 typedef std::string string; 828 829 explicit TrackNewOutputText(const string& decoded_target) 830 : decoded_target_(decoded_target), 831 initial_decoded_target_size_(decoded_target.size()) { } 832 833 void AppendNewOutputText(size_t target_bytes_remaining, 834 OutputStringInterface* output_string) { 835 const size_t bytes_decoded_this_chunk = 836 decoded_target_.size() - initial_decoded_target_size_; 837 if (bytes_decoded_this_chunk > 0) { 838 if (target_bytes_remaining > 0) { 839 // The decoder is midway through decoding a target window. Resize 840 // output_string to match the expected length. The interface guarantees 841 // not to resize the output_string more than once per target window 842 // decoded. 843 output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk 844 + target_bytes_remaining); 845 } 846 output_string->append( 847 decoded_target_.data() + initial_decoded_target_size_, 848 bytes_decoded_this_chunk); 849 } 850 } 851 852 private: 853 const string& decoded_target_; 854 size_t initial_decoded_target_size_; 855 }; 856 857 } // anonymous namespace 858 859 bool VCDiffStreamingDecoderImpl::DecodeChunk( 860 const char* data, 861 size_t len, 862 OutputStringInterface* output_string) { 863 if (!start_decoding_was_called_) { 864 LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL; 865 Reset(); 866 return false; 867 } 868 ParseableChunk parseable_chunk(data, len); 869 if (!unparsed_bytes_.empty()) { 870 unparsed_bytes_.append(data, len); 871 parseable_chunk.SetDataBuffer(unparsed_bytes_.data(), 872 unparsed_bytes_.size()); 873 } 874 TrackNewOutputText output_tracker(decoded_target_); 875 VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk); 876 if (RESULT_SUCCESS == result) { 877 result = ReadCustomCodeTable(&parseable_chunk); 878 } 879 if (RESULT_SUCCESS == result) { 880 result = delta_window_.DecodeWindows(&parseable_chunk); 881 } 882 if (RESULT_ERROR == result) { 883 Reset(); // Don't allow further DecodeChunk calls 884 return false; 885 } 886 unparsed_bytes_.assign(parseable_chunk.UnparsedData(), 887 parseable_chunk.UnparsedSize()); 888 output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(), 889 output_string); 890 if (!allow_vcd_target()) { 891 // VCD_TARGET will never be used to reference target data beyond the start 892 // of the current window, so throw away any earlier target data. 893 TruncateToBeginningOfWindow(); 894 } 895 return true; 896 } 897 898 // Finishes decoding after all data has been received. Returns true 899 // if decoding of the entire stream was successful. 900 bool VCDiffStreamingDecoderImpl::FinishDecoding() { 901 bool success = true; 902 if (!start_decoding_was_called_) { 903 LOG(WARNING) << "FinishDecoding() called before StartDecoding()," 904 " or called after DecodeChunk() returned false" 905 << LOG_ENDL; 906 success = false; 907 } else if (!IsDecodingComplete()) { 908 LOG(ERROR) << "FinishDecoding() called before parsing entire" 909 " delta file window" << LOG_ENDL; 910 success = false; 911 } 912 // Reset the object state for the next decode operation 913 Reset(); 914 return success; 915 } 916 917 bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits( 918 size_t window_size) const { 919 if (window_size > maximum_target_window_size_) { 920 LOG(ERROR) << "Length of target window (" << window_size 921 << ") exceeds limit of " << maximum_target_window_size_ 922 << " bytes" << LOG_ENDL; 923 return true; 924 } 925 if (HasPlannedTargetFileSize()) { 926 // The logical expression to check would be: 927 // 928 // total_of_target_window_sizes_ + window_size > planned_target_file_size_ 929 // 930 // but the addition might cause an integer overflow if target_bytes_to_add 931 // is very large. So it is better to check target_bytes_to_add against 932 // the remaining planned target bytes. 933 size_t remaining_planned_target_file_size = 934 planned_target_file_size_ - total_of_target_window_sizes_; 935 if (window_size > remaining_planned_target_file_size) { 936 LOG(ERROR) << "Length of target window (" << window_size 937 << " bytes) plus previous windows (" 938 << total_of_target_window_sizes_ 939 << " bytes) would exceed planned size of " 940 << planned_target_file_size_ << " bytes" << LOG_ENDL; 941 return true; 942 } 943 } 944 size_t remaining_maximum_target_bytes = 945 maximum_target_file_size_ - total_of_target_window_sizes_; 946 if (window_size > remaining_maximum_target_bytes) { 947 LOG(ERROR) << "Length of target window (" << window_size 948 << " bytes) plus previous windows (" 949 << total_of_target_window_sizes_ 950 << " bytes) would exceed maximum target file size of " 951 << maximum_target_file_size_ << " bytes" << LOG_ENDL; 952 return true; 953 } 954 return false; 955 } 956 957 // *** Methods for VCDiffDeltaFileWindow 958 959 void VCDiffDeltaFileWindow::Reset() { 960 found_header_ = false; 961 962 // Mark the start of the current target window. 963 target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U; 964 target_window_length_ = 0; 965 966 source_segment_ptr_ = NULL; 967 source_segment_length_ = 0; 968 969 instructions_and_sizes_.Invalidate(); 970 data_for_add_and_run_.Invalidate(); 971 addresses_for_copy_.Invalidate(); 972 973 interleaved_bytes_expected_ = 0; 974 975 has_checksum_ = false; 976 expected_checksum_ = 0; 977 } 978 979 VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections( 980 VCDiffHeaderParser* header_parser) { 981 size_t add_and_run_data_length = 0; 982 size_t instructions_and_sizes_length = 0; 983 size_t addresses_length = 0; 984 if (!header_parser->ParseSectionLengths(has_checksum_, 985 &add_and_run_data_length, 986 &instructions_and_sizes_length, 987 &addresses_length, 988 &expected_checksum_)) { 989 return header_parser->GetResult(); 990 } 991 if (parent_->AllowInterleaved() && 992 (add_and_run_data_length == 0) && 993 (addresses_length == 0)) { 994 // The interleaved format is being used. 995 interleaved_bytes_expected_ = 996 static_cast<int>(instructions_and_sizes_length); 997 UpdateInterleavedSectionPointers(header_parser->UnparsedData(), 998 header_parser->End()); 999 } else { 1000 // If interleaved format is not used, then the whole window contents 1001 // must be available before decoding can begin. If only part of 1002 // the current window is available, then report end of data 1003 // and re-parse the whole header when DecodeChunk() is called again. 1004 if (header_parser->UnparsedSize() < (add_and_run_data_length + 1005 instructions_and_sizes_length + 1006 addresses_length)) { 1007 return RESULT_END_OF_DATA; 1008 } 1009 data_for_add_and_run_.Init(header_parser->UnparsedData(), 1010 add_and_run_data_length); 1011 instructions_and_sizes_.Init(data_for_add_and_run_.End(), 1012 instructions_and_sizes_length); 1013 addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length); 1014 if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) { 1015 LOG(ERROR) << "The end of the instructions section " 1016 "does not match the end of the delta window" << LOG_ENDL; 1017 return RESULT_ERROR; 1018 } 1019 } 1020 reader_.Init(instructions_and_sizes_.UnparsedDataAddr(), 1021 instructions_and_sizes_.End()); 1022 return RESULT_SUCCESS; 1023 } 1024 1025 // Here are the elements of the delta window header to be parsed, 1026 // from section 4 of the RFC: 1027 // 1028 // Window1 1029 // Win_Indicator - byte 1030 // [Source segment size] - integer 1031 // [Source segment position] - integer 1032 // The delta encoding of the target window 1033 // Length of the delta encoding - integer 1034 // The delta encoding 1035 // Size of the target window - integer 1036 // Delta_Indicator - byte 1037 // Length of data for ADDs and RUNs - integer 1038 // Length of instructions and sizes - integer 1039 // Length of addresses for COPYs - integer 1040 // Data section for ADDs and RUNs - array of bytes 1041 // Instructions and sizes section - array of bytes 1042 // Addresses section for COPYs - array of bytes 1043 // 1044 VCDiffResult VCDiffDeltaFileWindow::ReadHeader( 1045 ParseableChunk* parseable_chunk) { 1046 std::string* decoded_target = parent_->decoded_target(); 1047 VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(), 1048 parseable_chunk->End()); 1049 size_t source_segment_position = 0; 1050 unsigned char win_indicator = 0; 1051 if (!header_parser.ParseWinIndicatorAndSourceSegment( 1052 parent_->dictionary_size(), 1053 decoded_target->size(), 1054 parent_->allow_vcd_target(), 1055 &win_indicator, 1056 &source_segment_length_, 1057 &source_segment_position)) { 1058 return header_parser.GetResult(); 1059 } 1060 has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM); 1061 if (!header_parser.ParseWindowLengths(&target_window_length_)) { 1062 return header_parser.GetResult(); 1063 } 1064 if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) { 1065 // An error has been logged by TargetWindowWouldExceedSizeLimits(). 1066 return RESULT_ERROR; 1067 } 1068 header_parser.ParseDeltaIndicator(); 1069 VCDiffResult setup_return_code = SetUpWindowSections(&header_parser); 1070 if (RESULT_SUCCESS != setup_return_code) { 1071 return setup_return_code; 1072 } 1073 // Reserve enough space in the output string for the current target window. 1074 decoded_target->reserve(target_window_start_pos_ + target_window_length_); 1075 // Get a pointer to the start of the source segment. 1076 if (win_indicator & VCD_SOURCE) { 1077 source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position; 1078 } else if (win_indicator & VCD_TARGET) { 1079 // This assignment must happen after the reserve(). 1080 // decoded_target should not be resized again while processing this window, 1081 // so source_segment_ptr_ should remain valid. 1082 source_segment_ptr_ = decoded_target->data() + source_segment_position; 1083 } 1084 // The whole window header was found and parsed successfully. 1085 found_header_ = true; 1086 parseable_chunk->Advance(header_parser.ParsedSize()); 1087 parent_->AddToTotalTargetWindowSize(target_window_length_); 1088 return RESULT_SUCCESS; 1089 } 1090 1091 void VCDiffDeltaFileWindow::UpdateInstructionPointer( 1092 ParseableChunk* parseable_chunk) { 1093 if (IsInterleaved()) { 1094 size_t bytes_parsed = instructions_and_sizes_.ParsedSize(); 1095 // Reduce expected instruction segment length by bytes parsed 1096 interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed); 1097 parseable_chunk->Advance(bytes_parsed); 1098 } 1099 } 1100 1101 inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() { 1102 return parent_->decoded_target()->size() - target_window_start_pos_; 1103 } 1104 1105 size_t VCDiffDeltaFileWindow::TargetBytesRemaining() { 1106 if (target_window_length_ == 0) { 1107 // There is no window being decoded at present 1108 return 0; 1109 } else { 1110 return target_window_length_ - TargetBytesDecoded(); 1111 } 1112 } 1113 1114 inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) { 1115 parent_->decoded_target()->append(data, size); 1116 } 1117 1118 inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) { 1119 parent_->decoded_target()->append(size, byte); 1120 } 1121 1122 VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) { 1123 if (size > data_for_add_and_run_.UnparsedSize()) { 1124 return RESULT_END_OF_DATA; 1125 } 1126 // Write the next "size" data bytes 1127 CopyBytes(data_for_add_and_run_.UnparsedData(), size); 1128 data_for_add_and_run_.Advance(size); 1129 return RESULT_SUCCESS; 1130 } 1131 1132 VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) { 1133 if (data_for_add_and_run_.Empty()) { 1134 return RESULT_END_OF_DATA; 1135 } 1136 // Write "size" copies of the next data byte 1137 RunByte(*data_for_add_and_run_.UnparsedData(), size); 1138 data_for_add_and_run_.Advance(1); 1139 return RESULT_SUCCESS; 1140 } 1141 1142 VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size, 1143 unsigned char mode) { 1144 // Keep track of the number of target bytes decoded as a local variable 1145 // to avoid recalculating it each time it is needed. 1146 size_t target_bytes_decoded = TargetBytesDecoded(); 1147 const VCDAddress here_address = 1148 static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded); 1149 const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress( 1150 here_address, 1151 mode, 1152 addresses_for_copy_.UnparsedDataAddr(), 1153 addresses_for_copy_.End()); 1154 switch (decoded_address) { 1155 case RESULT_ERROR: 1156 LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL; 1157 return RESULT_ERROR; 1158 case RESULT_END_OF_DATA: 1159 return RESULT_END_OF_DATA; 1160 default: 1161 if ((decoded_address < 0) || (decoded_address > here_address)) { 1162 LOG(DFATAL) << "Internal error: unexpected address " << decoded_address 1163 << " returned from DecodeAddress, with here_address = " 1164 << here_address << LOG_ENDL; 1165 return RESULT_ERROR; 1166 } 1167 break; 1168 } 1169 size_t address = static_cast<size_t>(decoded_address); 1170 if ((address + size) <= source_segment_length_) { 1171 // Copy all data from source segment 1172 CopyBytes(&source_segment_ptr_[address], size); 1173 return RESULT_SUCCESS; 1174 } 1175 // Copy some data from target window... 1176 if (address < source_segment_length_) { 1177 // ... plus some data from source segment 1178 const size_t partial_copy_size = source_segment_length_ - address; 1179 CopyBytes(&source_segment_ptr_[address], partial_copy_size); 1180 target_bytes_decoded += partial_copy_size; 1181 address += partial_copy_size; 1182 size -= partial_copy_size; 1183 } 1184 address -= source_segment_length_; 1185 // address is now based at start of target window 1186 const char* const target_segment_ptr = parent_->decoded_target()->data() + 1187 target_window_start_pos_; 1188 while (size > (target_bytes_decoded - address)) { 1189 // Recursive copy that extends into the yet-to-be-copied target data 1190 const size_t partial_copy_size = target_bytes_decoded - address; 1191 CopyBytes(&target_segment_ptr[address], partial_copy_size); 1192 target_bytes_decoded += partial_copy_size; 1193 address += partial_copy_size; 1194 size -= partial_copy_size; 1195 } 1196 CopyBytes(&target_segment_ptr[address], size); 1197 return RESULT_SUCCESS; 1198 } 1199 1200 int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) { 1201 if (IsInterleaved() && (instructions_and_sizes_.UnparsedData() 1202 != parseable_chunk->UnparsedData())) { 1203 LOG(DFATAL) << "Internal error: interleaved format is used, but the" 1204 " input pointer does not point to the instructions section" 1205 << LOG_ENDL; 1206 return RESULT_ERROR; 1207 } 1208 while (TargetBytesDecoded() < target_window_length_) { 1209 int32_t decoded_size = VCD_INSTRUCTION_ERROR; 1210 unsigned char mode = 0; 1211 VCDiffInstructionType instruction = 1212 reader_.GetNextInstruction(&decoded_size, &mode); 1213 switch (instruction) { 1214 case VCD_INSTRUCTION_END_OF_DATA: 1215 UpdateInstructionPointer(parseable_chunk); 1216 return RESULT_END_OF_DATA; 1217 case VCD_INSTRUCTION_ERROR: 1218 return RESULT_ERROR; 1219 default: 1220 break; 1221 } 1222 const size_t size = static_cast<size_t>(decoded_size); 1223 // The value of "size" itself could be enormous (say, INT32_MAX) 1224 // so check it individually against the limit to protect against 1225 // overflow when adding it to something else. 1226 if ((size > target_window_length_) || 1227 ((size + TargetBytesDecoded()) > target_window_length_)) { 1228 LOG(ERROR) << VCDiffInstructionName(instruction) 1229 << " with size " << size 1230 << " plus existing " << TargetBytesDecoded() 1231 << " bytes of target data exceeds length of target" 1232 " window (" << target_window_length_ << " bytes)" 1233 << LOG_ENDL; 1234 return RESULT_ERROR; 1235 } 1236 VCDiffResult result = RESULT_SUCCESS; 1237 switch (instruction) { 1238 case VCD_ADD: 1239 result = DecodeAdd(size); 1240 break; 1241 case VCD_RUN: 1242 result = DecodeRun(size); 1243 break; 1244 case VCD_COPY: 1245 result = DecodeCopy(size, mode); 1246 break; 1247 default: 1248 LOG(DFATAL) << "Unexpected instruction type " << instruction 1249 << "in opcode stream" << LOG_ENDL; 1250 return RESULT_ERROR; 1251 } 1252 switch (result) { 1253 case RESULT_END_OF_DATA: 1254 reader_.UnGetInstruction(); 1255 UpdateInstructionPointer(parseable_chunk); 1256 return RESULT_END_OF_DATA; 1257 case RESULT_ERROR: 1258 return RESULT_ERROR; 1259 case RESULT_SUCCESS: 1260 break; 1261 } 1262 } 1263 if (TargetBytesDecoded() != target_window_length_) { 1264 LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded() 1265 << " bytes) does not match expected size (" 1266 << target_window_length_ << " bytes)" << LOG_ENDL; 1267 return RESULT_ERROR; 1268 } 1269 const char* const target_window_start = 1270 parent_->decoded_target()->data() + target_window_start_pos_; 1271 if (has_checksum_ && 1272 (ComputeAdler32(target_window_start, target_window_length_) 1273 != expected_checksum_)) { 1274 LOG(ERROR) << "Target data does not match checksum; this could mean " 1275 "that the wrong dictionary was used" << LOG_ENDL; 1276 return RESULT_ERROR; 1277 } 1278 if (!instructions_and_sizes_.Empty()) { 1279 LOG(ERROR) << "Excess instructions and sizes left over " 1280 "after decoding target window" << LOG_ENDL; 1281 return RESULT_ERROR; 1282 } 1283 if (!IsInterleaved()) { 1284 // Standard format is being used, with three separate sections for the 1285 // instructions, data, and addresses. 1286 if (!data_for_add_and_run_.Empty()) { 1287 LOG(ERROR) << "Excess ADD/RUN data left over " 1288 "after decoding target window" << LOG_ENDL; 1289 return RESULT_ERROR; 1290 } 1291 if (!addresses_for_copy_.Empty()) { 1292 LOG(ERROR) << "Excess COPY addresses left over " 1293 "after decoding target window" << LOG_ENDL; 1294 return RESULT_ERROR; 1295 } 1296 // Reached the end of the window. Update the ParseableChunk to point to the 1297 // end of the addresses section, which is the last section in the window. 1298 parseable_chunk->SetPosition(addresses_for_copy_.End()); 1299 } else { 1300 // Interleaved format is being used. 1301 UpdateInstructionPointer(parseable_chunk); 1302 } 1303 return RESULT_SUCCESS; 1304 } 1305 1306 VCDiffResult VCDiffDeltaFileWindow::DecodeWindows( 1307 ParseableChunk* parseable_chunk) { 1308 if (!parent_) { 1309 LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() " 1310 "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL; 1311 return RESULT_ERROR; 1312 } 1313 while (!parseable_chunk->Empty()) { 1314 if (!found_header_) { 1315 switch (ReadHeader(parseable_chunk)) { 1316 case RESULT_END_OF_DATA: 1317 return RESULT_END_OF_DATA; 1318 case RESULT_ERROR: 1319 return RESULT_ERROR; 1320 default: 1321 // Reset address cache between windows (RFC section 5.1) 1322 if (!parent_->addr_cache()->Init()) { 1323 LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL; 1324 return RESULT_ERROR; 1325 } 1326 } 1327 } else { 1328 // We are resuming a window that was partially decoded before a 1329 // RESULT_END_OF_DATA was returned. This can only happen on the first 1330 // loop iteration, and only if the interleaved format is enabled and used. 1331 if (!IsInterleaved()) { 1332 LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window" 1333 " when interleaved format is not being used" << LOG_ENDL; 1334 return RESULT_ERROR; 1335 } 1336 UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(), 1337 parseable_chunk->End()); 1338 reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(), 1339 instructions_and_sizes_.End()); 1340 } 1341 switch (DecodeBody(parseable_chunk)) { 1342 case RESULT_END_OF_DATA: 1343 if (MoreDataExpected()) { 1344 return RESULT_END_OF_DATA; 1345 } else { 1346 LOG(ERROR) << "End of data reached while decoding VCDIFF delta file" 1347 << LOG_ENDL; 1348 // fall through to RESULT_ERROR case 1349 } 1350 case RESULT_ERROR: 1351 return RESULT_ERROR; 1352 default: 1353 break; // DecodeBody succeeded 1354 } 1355 // Get ready to read a new delta window 1356 Reset(); 1357 if (parent_->ReachedPlannedTargetFileSize()) { 1358 // Found exactly the length we expected. Stop decoding. 1359 return RESULT_SUCCESS; 1360 } 1361 } 1362 return RESULT_SUCCESS; 1363 } 1364 1365 // *** Methods for VCDiffStreamingDecoder 1366 1367 VCDiffStreamingDecoder::VCDiffStreamingDecoder() 1368 : impl_(new VCDiffStreamingDecoderImpl) { } 1369 1370 VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; } 1371 1372 void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) { 1373 impl_->StartDecoding(source, len); 1374 } 1375 1376 bool VCDiffStreamingDecoder::DecodeChunkToInterface( 1377 const char* data, 1378 size_t len, 1379 OutputStringInterface* output_string) { 1380 return impl_->DecodeChunk(data, len, output_string); 1381 } 1382 1383 bool VCDiffStreamingDecoder::FinishDecoding() { 1384 return impl_->FinishDecoding(); 1385 } 1386 1387 bool VCDiffStreamingDecoder::SetMaximumTargetFileSize( 1388 size_t new_maximum_target_file_size) { 1389 return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size); 1390 } 1391 1392 bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize( 1393 size_t new_maximum_target_window_size) { 1394 return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size); 1395 } 1396 1397 void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) { 1398 impl_->SetAllowVcdTarget(allow_vcd_target); 1399 } 1400 1401 bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr, 1402 size_t dictionary_size, 1403 const string& encoding, 1404 OutputStringInterface* target) { 1405 target->clear(); 1406 decoder_.StartDecoding(dictionary_ptr, dictionary_size); 1407 if (!decoder_.DecodeChunkToInterface(encoding.data(), 1408 encoding.size(), 1409 target)) { 1410 return false; 1411 } 1412 return decoder_.FinishDecoding(); 1413 } 1414 1415 } // namespace open_vcdiff 1416