1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // Implements a Decoder for the format described in 17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. 18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html 19 // 20 // The RFC describes the possibility of using a secondary compressor 21 // to further reduce the size of each section of the VCDIFF output. 22 // That feature is not supported in this implementation of the encoder 23 // and decoder. 24 // No secondary compressor types have been publicly registered with 25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids 26 // in the more than five years since the registry was created, so there 27 // is no standard set of compressor IDs which would be generated by other 28 // encoders or accepted by other decoders. 29 30 #include <config.h> 31 #include "google/vcdecoder.h" 32 #include <stddef.h> // size_t, ptrdiff_t 33 #include <stdint.h> // int32_t 34 #include <string.h> // memcpy, memset 35 #include <memory> // auto_ptr 36 #include <string> 37 #include "addrcache.h" 38 #include "checksum.h" 39 #include "codetable.h" 40 #include "decodetable.h" 41 #include "headerparser.h" 42 #include "logging.h" 43 #include "google/output_string.h" 44 #include "varint_bigendian.h" 45 #include "vcdiff_defs.h" 46 47 namespace open_vcdiff { 48 49 // This class is used to parse delta file windows as described 50 // in RFC sections 4.2 and 4.3. Its methods are not thread-safe. 51 // 52 // Here is the window format copied from the RFC: 53 // 54 // Window1 55 // Win_Indicator - byte 56 // [Source segment size] - integer 57 // [Source segment position] - integer 58 // The delta encoding of the target window 59 // Length of the delta encoding - integer 60 // The delta encoding 61 // Size of the target window - integer 62 // Delta_Indicator - byte 63 // Length of data for ADDs and RUNs - integer 64 // Length of instructions and sizes - integer 65 // Length of addresses for COPYs - integer 66 // Data section for ADDs and RUNs - array of bytes 67 // Instructions and sizes section - array of bytes 68 // Addresses section for COPYs - array of bytes 69 // Window2 70 // ... 71 // 72 // Sample usage: 73 // 74 // VCDiffDeltaFileWindow delta_window_; 75 // delta_window_.Init(parent); 76 // ParseableChunk parseable_chunk(input_buffer, 77 // input_size, 78 // leftover_unencoded_bytes); 79 // while (!parseable_chunk.Empty()) { 80 // switch (delta_window_.DecodeWindow(&parseable_chunk)) { 81 // case RESULT_END_OF_DATA: 82 // <Read more input and retry DecodeWindow later.> 83 // case RESULT_ERROR: 84 // <Handle error case. An error log message has already been generated.> 85 // } 86 // } 87 // 88 // DecodeWindow consumes only a single window, and needs to be placed within 89 // a loop if multiple windows are to be processed. 90 // 91 class VCDiffDeltaFileWindow { 92 public: 93 VCDiffDeltaFileWindow(); 94 ~VCDiffDeltaFileWindow(); 95 96 // Init() should be called immediately after constructing the 97 // VCDiffDeltaFileWindow(). It must be called before DecodeWindow() can be 98 // invoked, or an error will occur. 99 void Init(VCDiffStreamingDecoderImpl* parent); 100 101 // Resets the pointers to the data sections in the current window. 102 void Reset(); 103 104 bool UseCodeTable(const VCDiffCodeTableData& code_table_data, 105 unsigned char max_mode) { 106 return reader_.UseCodeTable(code_table_data, max_mode); 107 } 108 109 // Decodes a single delta window using the input data from *parseable_chunk. 110 // Appends the decoded target window to parent_->decoded_target(). Returns 111 // RESULT_SUCCESS if an entire window was decoded, or RESULT_END_OF_DATA if 112 // the end of input was reached before the entire window could be decoded and 113 // more input is expected (only possible if IsInterleaved() is true), or 114 // RESULT_ERROR if an error occurred during decoding. In the RESULT_ERROR 115 // case, the value of parseable_chunk->pointer_ is undefined; otherwise, 116 // parseable_chunk->Advance() is called to point to the input data position 117 // just after the data that has been decoded. 118 // 119 VCDiffResult DecodeWindow(ParseableChunk* parseable_chunk); 120 121 bool FoundWindowHeader() const { 122 return found_header_; 123 } 124 125 bool MoreDataExpected() const { 126 // When parsing an interleaved-format delta file, 127 // every time DecodeBody() exits, interleaved_bytes_expected_ 128 // will be decremented by the number of bytes parsed. If it 129 // reaches zero, then there is no more data expected because 130 // the size of the interleaved section (given in the window 131 // header) has been reached. 132 return IsInterleaved() && (interleaved_bytes_expected_ > 0); 133 } 134 135 size_t target_window_start_pos() const { return target_window_start_pos_; } 136 137 void set_target_window_start_pos(size_t new_start_pos) { 138 target_window_start_pos_ = new_start_pos; 139 } 140 141 // Returns the number of bytes remaining to be decoded in the target window. 142 // If not in the process of decoding a window, returns 0. 143 size_t TargetBytesRemaining(); 144 145 private: 146 // Reads the header of the window section as described in RFC sections 4.2 and 147 // 4.3, up to and including the value "Length of addresses for COPYs". If the 148 // entire header is found, this function sets up the DeltaWindowSections 149 // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so 150 // that the decoder can begin decoding the opcodes in these sections. Returns 151 // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of 152 // available data was reached before the entire header could be read. (The 153 // latter may be an error condition if there is no more data available.) 154 // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the 155 // parsed header. 156 // 157 VCDiffResult ReadHeader(ParseableChunk* parseable_chunk); 158 159 // After the window header has been parsed as far as the Delta_Indicator, 160 // this function is called to parse the following delta window header fields: 161 // 162 // Length of data for ADDs and RUNs - integer (VarintBE format) 163 // Length of instructions and sizes - integer (VarintBE format) 164 // Length of addresses for COPYs - integer (VarintBE format) 165 // 166 // If has_checksum_ is true, it also looks for the following element: 167 // 168 // Adler32 checksum - unsigned 32-bit integer (VarintBE format) 169 // 170 // It sets up the DeltaWindowSections instructions_and_sizes_, 171 // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format 172 // is being used, all three sections will include the entire window body; if 173 // the standard format is used, three non-overlapping window sections will be 174 // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA 175 // if standard format is being used and there is not enough input data to read 176 // the entire window body. Otherwise, returns RESULT_SUCCESS. 177 VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser); 178 179 // Decodes the body of the window section as described in RFC sections 4.3, 180 // including the sections "Data section for ADDs and RUNs", "Instructions 181 // and sizes section", and "Addresses section for COPYs". These sections 182 // must already have been set up by ReadWindowHeader(). Returns a 183 // non-negative value on success, or RESULT_END_OF_DATA if the end of input 184 // was reached before the entire window could be decoded (only possible if 185 // IsInterleaved() is true), or RESULT_ERROR if an error occurred during 186 // decoding. Appends as much of the decoded target window as possible to 187 // parent->decoded_target(). 188 // 189 int DecodeBody(ParseableChunk* parseable_chunk); 190 191 // Returns the number of bytes already decoded into the target window. 192 size_t TargetBytesDecoded(); 193 194 // Decodes a single ADD instruction, updating parent_->decoded_target_. 195 VCDiffResult DecodeAdd(size_t size); 196 197 // Decodes a single RUN instruction, updating parent_->decoded_target_. 198 VCDiffResult DecodeRun(size_t size); 199 200 // Decodes a single COPY instruction, updating parent_->decoded_target_. 201 VCDiffResult DecodeCopy(size_t size, unsigned char mode); 202 203 // When using the interleaved format, this function is called both on parsing 204 // the header and on resuming after a RESULT_END_OF_DATA was returned from a 205 // previous call to DecodeBody(). It sets up all three section pointers to 206 // reference the same interleaved stream of instructions, sizes, addresses, 207 // and data. These pointers must be reset every time that work resumes on a 208 // delta window, because the input data string may have been changed or 209 // resized since DecodeBody() last returned. 210 void UpdateInterleavedSectionPointers(const char* data_pos, 211 const char* data_end) { 212 const ptrdiff_t available_data = data_end - data_pos; 213 // Don't read past the end of currently-available data 214 if (available_data > interleaved_bytes_expected_) { 215 instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_); 216 } else { 217 instructions_and_sizes_.Init(data_pos, available_data); 218 } 219 data_for_add_and_run_.Init(&instructions_and_sizes_); 220 addresses_for_copy_.Init(&instructions_and_sizes_); 221 } 222 223 // If true, the interleaved format described in AllowInterleaved() is used 224 // for the current delta file. Only valid after ReadWindowHeader() has been 225 // called and returned a positive number (i.e., the whole header was parsed), 226 // but before the window has finished decoding. 227 // 228 bool IsInterleaved() const { 229 // If the sections are interleaved, both addresses_for_copy_ and 230 // data_for_add_and_run_ should point at instructions_and_sizes_. 231 return !addresses_for_copy_.IsOwned(); 232 } 233 234 // Executes a single COPY or ADD instruction, appending data to 235 // parent_->decoded_target(). 236 void CopyBytes(const char* data, size_t size); 237 238 // Executes a single RUN instruction, appending data to 239 // parent_->decoded_target(). 240 void RunByte(unsigned char byte, size_t size); 241 242 // Advance *parseable_chunk to point to the current position in the 243 // instructions/sizes section. If interleaved format is used, then 244 // decrement the number of expected bytes in the instructions/sizes section 245 // by the number of instruction/size bytes parsed. 246 void UpdateInstructionPointer(ParseableChunk* parseable_chunk); 247 248 // The parent object which was passed to Init(). 249 VCDiffStreamingDecoderImpl* parent_; 250 251 // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader() 252 // has been called and succeeded in parsing the delta window header, but the 253 // entire window has not yet been decoded. 254 bool found_header_; 255 256 // Contents and length of the current source window. source_segment_ptr_ 257 // will be non-NULL if (a) the window section header for the current window 258 // has been read, but the window has not yet finished decoding; or 259 // (b) the window did not specify a source segment. 260 const char* source_segment_ptr_; 261 size_t source_segment_length_; 262 263 // The delta encoding window sections as defined in RFC section 4.3. 264 // The pointer for each section will be incremented as data is consumed and 265 // decoded from that section. If the interleaved format is used, 266 // data_for_add_and_run_ and addresses_for_copy_ will both point to 267 // instructions_and_sizes_; otherwise, they will be separate data sections. 268 // 269 DeltaWindowSection instructions_and_sizes_; 270 DeltaWindowSection data_for_add_and_run_; 271 DeltaWindowSection addresses_for_copy_; 272 273 // The expected bytes left to decode in instructions_and_sizes_. Only used 274 // for the interleaved format. 275 int interleaved_bytes_expected_; 276 277 // The expected length of the target window once it has been decoded. 278 size_t target_window_length_; 279 280 // The index in decoded_target at which the first byte of the current 281 // target window was/will be written. 282 size_t target_window_start_pos_; 283 284 // If has_checksum_ is true, then expected_checksum_ contains an Adler32 285 // checksum of the target window data. This is an extension included in the 286 // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard. 287 bool has_checksum_; 288 VCDChecksum expected_checksum_; 289 290 VCDiffCodeTableReader reader_; 291 292 // Making these private avoids implicit copy constructor & assignment operator 293 VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT 294 void operator=(const VCDiffDeltaFileWindow&); 295 }; 296 297 // *** Inline methods for VCDiffDeltaFileWindow 298 299 inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) { 300 Reset(); 301 } 302 303 inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { } 304 305 inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) { 306 parent_ = parent; 307 } 308 309 class VCDiffStreamingDecoderImpl { 310 public: 311 typedef std::string string; 312 313 // The default maximum target file size (and target window size) if 314 // SetMaximumTargetFileSize() is not called. 315 static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB 316 317 // The largest value that can be passed to SetMaximumTargetWindowSize(). 318 // Using a larger value will result in an error. 319 static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX 320 321 // A constant that is the default value for planned_target_file_size_, 322 // indicating that the decoder does not have an expected length 323 // for the target data. 324 static const size_t kUnlimitedBytes = static_cast<size_t>(-3); 325 326 VCDiffStreamingDecoderImpl(); 327 ~VCDiffStreamingDecoderImpl(); 328 329 // Resets all member variables to their initial states. 330 void Reset(); 331 332 // These functions are identical to their counterparts 333 // in VCDiffStreamingDecoder. 334 // 335 void StartDecoding(const char* dictionary_ptr, size_t dictionary_size); 336 337 bool DecodeChunk(const char* data, 338 size_t len, 339 OutputStringInterface* output_string); 340 341 bool FinishDecoding(); 342 343 // If true, the version of VCDIFF used in the current delta file allows 344 // for the interleaved format, in which instructions, addresses and data 345 // are all sent interleaved in the instructions section of each window 346 // rather than being sent in separate sections. This is not part of 347 // the VCDIFF draft standard, so we've defined a special version code 348 // 'S' which implies that this feature is available. Even if interleaving 349 // is supported, it is not mandatory; interleaved format will be implied 350 // if the address and data sections are both zero-length. 351 // 352 bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; } 353 354 // If true, the version of VCDIFF used in the current delta file allows 355 // each delta window to contain an Adler32 checksum of the target window data. 356 // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then 357 // this checksum will appear as a variable-length integer, just after the 358 // "length of addresses for COPYs" value and before the window data sections. 359 // It is possible for some windows in a delta file to use the checksum feature 360 // and for others not to use it (and leave the flag bit set to 0.) 361 // Just as with AllowInterleaved(), this extension is not part of the draft 362 // standard and is only available when the version code 'S' is specified. 363 // 364 bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; } 365 366 bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) { 367 maximum_target_file_size_ = new_maximum_target_file_size; 368 return true; 369 } 370 371 bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) { 372 if (new_maximum_target_window_size > kTargetSizeLimit) { 373 VCD_ERROR << "Specified maximum target window size " 374 << new_maximum_target_window_size << " exceeds limit of " 375 << kTargetSizeLimit << " bytes" << VCD_ENDL; 376 return false; 377 } 378 maximum_target_window_size_ = new_maximum_target_window_size; 379 return true; 380 } 381 382 // See description of planned_target_file_size_, below. 383 bool HasPlannedTargetFileSize() const { 384 return planned_target_file_size_ != kUnlimitedBytes; 385 } 386 387 void SetPlannedTargetFileSize(size_t planned_target_file_size) { 388 planned_target_file_size_ = planned_target_file_size; 389 } 390 391 void AddToTotalTargetWindowSize(size_t window_size) { 392 total_of_target_window_sizes_ += window_size; 393 } 394 395 // Checks to see whether the decoded target data has reached its planned size. 396 bool ReachedPlannedTargetFileSize() const { 397 if (!HasPlannedTargetFileSize()) { 398 return false; 399 } 400 // The planned target file size should not have been exceeded. 401 // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of 402 // each target window would not make the target file exceed that limit, and 403 // DecodeBody() will return RESULT_ERROR if the actual decoded output ever 404 // exceeds the advertised target window size. 405 if (total_of_target_window_sizes_ > planned_target_file_size_) { 406 VCD_DFATAL << "Internal error: Decoded data size " 407 << total_of_target_window_sizes_ 408 << " exceeds planned target file size " 409 << planned_target_file_size_ << VCD_ENDL; 410 return true; 411 } 412 return total_of_target_window_sizes_ == planned_target_file_size_; 413 } 414 415 // Checks to see whether adding a new target window of the specified size 416 // would exceed the planned target file size, the maximum target file size, 417 // or the maximum target window size. If so, logs an error and returns true; 418 // otherwise, returns false. 419 bool TargetWindowWouldExceedSizeLimits(size_t window_size) const; 420 421 // Returns the amount of input data passed to the last DecodeChunk() 422 // that was not consumed by the decoder. This is essential if 423 // SetPlannedTargetFileSize() is being used, in order to preserve the 424 // remaining input data stream once the planned target file has been decoded. 425 size_t GetUnconsumedDataSize() const { 426 return unparsed_bytes_.size(); 427 } 428 429 // This function will return true if the decoder has parsed a complete delta 430 // file header plus zero or more delta file windows, with no data left over. 431 // It will also return true if no delta data at all was decoded. If these 432 // conditions are not met, then FinishDecoding() should not be called. 433 bool IsDecodingComplete() const { 434 if (!FoundFileHeader()) { 435 // No complete delta file header has been parsed yet. DecodeChunk() 436 // may have received some data that it hasn't yet parsed, in which case 437 // decoding is incomplete. 438 return unparsed_bytes_.empty(); 439 } else if (custom_code_table_decoder_.get()) { 440 // The decoder is in the middle of parsing a custom code table. 441 return false; 442 } else if (delta_window_.FoundWindowHeader()) { 443 // The decoder is in the middle of parsing an interleaved format delta 444 // window. 445 return false; 446 } else if (ReachedPlannedTargetFileSize()) { 447 // The decoder found exactly the planned number of bytes. In this case 448 // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover 449 // data after the end of the delta file. 450 return true; 451 } else { 452 // No complete delta file window has been parsed yet. DecodeChunk() 453 // may have received some data that it hasn't yet parsed, in which case 454 // decoding is incomplete. 455 return unparsed_bytes_.empty(); 456 } 457 } 458 459 const char* dictionary_ptr() const { return dictionary_ptr_; } 460 461 size_t dictionary_size() const { return dictionary_size_; } 462 463 VCDiffAddressCache* addr_cache() { return addr_cache_.get(); } 464 465 string* decoded_target() { return &decoded_target_; } 466 467 bool allow_vcd_target() const { return allow_vcd_target_; } 468 469 void SetAllowVcdTarget(bool allow_vcd_target) { 470 if (start_decoding_was_called_) { 471 VCD_DFATAL << "SetAllowVcdTarget() called after StartDecoding()" 472 << VCD_ENDL; 473 return; 474 } 475 allow_vcd_target_ = allow_vcd_target; 476 } 477 478 private: 479 // Reads the VCDiff delta file header section as described in RFC section 4.1, 480 // except the custom code table data. Returns RESULT_ERROR if an error 481 // occurred, or RESULT_END_OF_DATA if the end of available data was reached 482 // before the entire header could be read. (The latter may be an error 483 // condition if there is no more data available.) Otherwise, advances 484 // data->position_ past the header and returns RESULT_SUCCESS. 485 // 486 VCDiffResult ReadDeltaFileHeader(ParseableChunk* data); 487 488 // Indicates whether or not the header has already been read. 489 bool FoundFileHeader() const { return addr_cache_.get() != NULL; } 490 491 // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta 492 // file header, this function parses the custom cache sizes and initializes 493 // a nested VCDiffStreamingDecoderImpl object that will be used to parse the 494 // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an 495 // error occurred, or RESULT_END_OF_DATA if the end of available data was 496 // reached before the custom cache sizes could be read. Otherwise, returns 497 // the number of bytes read. 498 // 499 int InitCustomCodeTable(const char* data_start, const char* data_end); 500 501 // If a custom code table was specified in the header section that was parsed 502 // by ReadDeltaFileHeader(), this function makes a recursive call to another 503 // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the 504 // custom code table is expected to be supplied as an embedded VCDIFF 505 // encoding that uses the standard code table. Returns RESULT_ERROR if an 506 // error occurs, or RESULT_END_OF_DATA if the end of available data was 507 // reached before the entire custom code table could be read. Otherwise, 508 // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded 509 // custom code table. If the function returns RESULT_SUCCESS or 510 // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes. 511 // 512 VCDiffResult ReadCustomCodeTable(ParseableChunk* data); 513 514 // Called after the decoder exhausts all input data. This function 515 // copies from decoded_target_ into output_string all the data that 516 // has not yet been output. It sets decoded_target_output_position_ 517 // to mark the start of the next data that needs to be output. 518 void AppendNewOutputText(OutputStringInterface* output_string); 519 520 // Appends to output_string the portion of decoded_target_ that has 521 // not yet been output, then clears decoded_target_. This function is 522 // called after each complete target window has been decoded if 523 // allow_vcd_target is false. In that case, there is no need to retain 524 // target data from any window except the current window. 525 void FlushDecodedTarget(OutputStringInterface* output_string); 526 527 // Contents and length of the source (dictionary) data. 528 const char* dictionary_ptr_; 529 size_t dictionary_size_; 530 531 // This string will be used to store any unparsed bytes left over when 532 // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA. 533 // It will also be used to concatenate those unparsed bytes with the data 534 // supplied to the next call to DecodeChunk(), so that they appear in 535 // contiguous memory. 536 string unparsed_bytes_; 537 538 // The portion of the target file that has been decoded so far. This will be 539 // used to fill the output string for DecodeChunk(), and will also be used to 540 // execute COPY instructions that reference target data. Since the source 541 // window can come from a range of addresses in the previously decoded target 542 // data, the entire target file needs to be available to the decoder, not just 543 // the current target window. 544 string decoded_target_; 545 546 // The VCDIFF version byte (also known as "header4") from the 547 // delta file header. 548 unsigned char vcdiff_version_code_; 549 550 VCDiffDeltaFileWindow delta_window_; 551 552 std::auto_ptr<VCDiffAddressCache> addr_cache_; 553 554 // Will be NULL unless a custom code table has been defined. 555 std::auto_ptr<VCDiffCodeTableData> custom_code_table_; 556 557 // Used to receive the decoded custom code table. 558 string custom_code_table_string_; 559 560 // If a custom code table is specified, it will be expressed 561 // as an embedded VCDIFF delta file which uses the default code table 562 // as the source file (dictionary). Use a child decoder object 563 // to decode that delta file. 564 std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_; 565 566 // If set, then the decoder is expecting *exactly* this number of 567 // target bytes to be decoded from one or more delta file windows. 568 // If this number is exceeded while decoding a window, but was not met 569 // before starting on that window, an error will be reported. 570 // If FinishDecoding() is called before this number is met, an error 571 // will also be reported. This feature is used for decoding the 572 // embedded code table data within a VCDIFF delta file; we want to 573 // stop processing the embedded data once the entire code table has 574 // been decoded, and treat the rest of the available data as part 575 // of the enclosing delta file. 576 size_t planned_target_file_size_; 577 578 size_t maximum_target_file_size_; 579 580 size_t maximum_target_window_size_; 581 582 // Contains the sum of the decoded sizes of all target windows seen so far, 583 // including the expected total size of the current target window in progress 584 // (even if some of the current target window has not yet been decoded.) 585 size_t total_of_target_window_sizes_; 586 587 // Contains the byte position within decoded_target_ of the first data that 588 // has not yet been output by AppendNewOutputText(). 589 size_t decoded_target_output_position_; 590 591 // This value is used to ensure the correct order of calls to the interface 592 // functions, i.e., a single call to StartDecoding(), followed by zero or 593 // more calls to DecodeChunk(), followed by a single call to 594 // FinishDecoding(). 595 bool start_decoding_was_called_; 596 597 // If this value is true then the VCD_TARGET flag can be specified to allow 598 // the source segment to be chosen from the previously-decoded target data. 599 // (This is the default behavior.) If it is false, then specifying the 600 // VCD_TARGET flag is considered an error, and the decoder does not need to 601 // keep in memory any decoded target data prior to the current window. 602 bool allow_vcd_target_; 603 604 // Making these private avoids implicit copy constructor & assignment operator 605 VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT 606 void operator=(const VCDiffStreamingDecoderImpl&); 607 }; 608 609 // *** Methods for VCDiffStreamingDecoderImpl 610 611 const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize; 612 const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes; 613 614 VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl() 615 : maximum_target_file_size_(kDefaultMaximumTargetFileSize), 616 maximum_target_window_size_(kDefaultMaximumTargetFileSize), 617 allow_vcd_target_(true) { 618 delta_window_.Init(this); 619 Reset(); 620 } 621 622 // Reset() will delete the component objects without reallocating them. 623 VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); } 624 625 void VCDiffStreamingDecoderImpl::Reset() { 626 start_decoding_was_called_ = false; 627 dictionary_ptr_ = NULL; 628 dictionary_size_ = 0; 629 vcdiff_version_code_ = '\0'; 630 planned_target_file_size_ = kUnlimitedBytes; 631 total_of_target_window_sizes_ = 0; 632 addr_cache_.reset(); 633 custom_code_table_.reset(); 634 custom_code_table_decoder_.reset(); 635 delta_window_.Reset(); 636 decoded_target_output_position_ = 0; 637 } 638 639 void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr, 640 size_t dictionary_size) { 641 if (start_decoding_was_called_) { 642 VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()" 643 << VCD_ENDL; 644 return; 645 } 646 unparsed_bytes_.clear(); 647 decoded_target_.clear(); // delta_window_.Reset() depends on this 648 Reset(); 649 dictionary_ptr_ = dictionary_ptr; 650 dictionary_size_ = dictionary_size; 651 start_decoding_was_called_ = true; 652 } 653 654 // Reads the VCDiff delta file header section as described in RFC section 4.1: 655 // 656 // Header1 - byte = 0xD6 (ASCII 'V' | 0x80) 657 // Header2 - byte = 0xC3 (ASCII 'C' | 0x80) 658 // Header3 - byte = 0xC4 (ASCII 'D' | 0x80) 659 // Header4 - byte 660 // Hdr_Indicator - byte 661 // [Secondary compressor ID] - byte 662 // [Length of code table data] - integer 663 // [Code table data] 664 // 665 // Initializes the code table and address cache objects. Returns RESULT_ERROR 666 // if an error occurred, and RESULT_END_OF_DATA if the end of available data was 667 // reached before the entire header could be read. (The latter may be an error 668 // condition if there is no more data available.) Otherwise, returns 669 // RESULT_SUCCESS, and removes the header bytes from the data string. 670 // 671 // It's relatively inefficient to expect this function to parse any number of 672 // input bytes available, down to 1 byte, but it is necessary in case the input 673 // is not a properly formatted VCDIFF delta file. If the entire input consists 674 // of two bytes "12", then we should recognize that it does not match the 675 // initial VCDIFF magic number "VCD" and report an error, rather than waiting 676 // indefinitely for more input that will never arrive. 677 // 678 VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader( 679 ParseableChunk* data) { 680 if (FoundFileHeader()) { 681 return RESULT_SUCCESS; 682 } 683 size_t data_size = data->UnparsedSize(); 684 const DeltaFileHeader* header = 685 reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData()); 686 bool wrong_magic_number = false; 687 switch (data_size) { 688 // Verify only the bytes that are available. 689 default: 690 // Found header contents up to and including VCDIFF version 691 vcdiff_version_code_ = header->header4; 692 if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284) 693 (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol 694 VCD_ERROR << "Unrecognized VCDIFF format version" << VCD_ENDL; 695 return RESULT_ERROR; 696 } 697 // fall through 698 case 3: 699 if (header->header3 != 0xC4) { // magic value 'D' | 0x80 700 wrong_magic_number = true; 701 } 702 // fall through 703 case 2: 704 if (header->header2 != 0xC3) { // magic value 'C' | 0x80 705 wrong_magic_number = true; 706 } 707 // fall through 708 case 1: 709 if (header->header1 != 0xD6) { // magic value 'V' | 0x80 710 wrong_magic_number = true; 711 } 712 // fall through 713 case 0: 714 if (wrong_magic_number) { 715 VCD_ERROR << "Did not find VCDIFF header bytes; " 716 "input is not a VCDIFF delta file" << VCD_ENDL; 717 return RESULT_ERROR; 718 } 719 if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA; 720 } 721 // Secondary compressor not supported. 722 if (header->hdr_indicator & VCD_DECOMPRESS) { 723 VCD_ERROR << "Secondary compression is not supported" << VCD_ENDL; 724 return RESULT_ERROR; 725 } 726 if (header->hdr_indicator & VCD_CODETABLE) { 727 int bytes_parsed = InitCustomCodeTable( 728 data->UnparsedData() + sizeof(DeltaFileHeader), 729 data->End()); 730 switch (bytes_parsed) { 731 case RESULT_ERROR: 732 return RESULT_ERROR; 733 case RESULT_END_OF_DATA: 734 return RESULT_END_OF_DATA; 735 default: 736 data->Advance(sizeof(DeltaFileHeader) + bytes_parsed); 737 } 738 } else { 739 addr_cache_.reset(new VCDiffAddressCache); 740 // addr_cache_->Init() will be called 741 // from VCDiffStreamingDecoderImpl::DecodeChunk() 742 data->Advance(sizeof(DeltaFileHeader)); 743 } 744 return RESULT_SUCCESS; 745 } 746 747 int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start, 748 const char* data_end) { 749 // A custom code table is being specified. Parse the variable-length 750 // cache sizes and begin parsing the encoded custom code table. 751 int32_t near_cache_size = 0, same_cache_size = 0; 752 VCDiffHeaderParser header_parser(data_start, data_end); 753 if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) { 754 return header_parser.GetResult(); 755 } 756 if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) { 757 return header_parser.GetResult(); 758 } 759 custom_code_table_.reset(new struct VCDiffCodeTableData); 760 memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData)); 761 custom_code_table_string_.clear(); 762 addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size)); 763 // addr_cache_->Init() will be called 764 // from VCDiffStreamingDecoderImpl::DecodeChunk() 765 766 // If we reach this point (the start of the custom code table) 767 // without encountering a RESULT_END_OF_DATA condition, then we won't call 768 // ReadDeltaFileHeader() again for this delta file. 769 // 770 // Instantiate a recursive decoder to interpret the custom code table 771 // as a VCDIFF encoding of the default code table. 772 custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl); 773 custom_code_table_decoder_->StartDecoding( 774 reinterpret_cast<const char*>( 775 &VCDiffCodeTableData::kDefaultCodeTableData), 776 sizeof(VCDiffCodeTableData::kDefaultCodeTableData)); 777 custom_code_table_decoder_->SetPlannedTargetFileSize( 778 sizeof(*custom_code_table_)); 779 return static_cast<int>(header_parser.ParsedSize()); 780 } 781 782 VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable( 783 ParseableChunk* data) { 784 if (!custom_code_table_decoder_.get()) { 785 return RESULT_SUCCESS; 786 } 787 if (!custom_code_table_.get()) { 788 VCD_DFATAL << "Internal error: custom_code_table_decoder_ is set," 789 " but custom_code_table_ is NULL" << VCD_ENDL; 790 return RESULT_ERROR; 791 } 792 OutputString<string> output_string(&custom_code_table_string_); 793 if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(), 794 data->UnparsedSize(), 795 &output_string)) { 796 return RESULT_ERROR; 797 } 798 if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) { 799 // Skip over the consumed data. 800 data->Finish(); 801 return RESULT_END_OF_DATA; 802 } 803 if (!custom_code_table_decoder_->FinishDecoding()) { 804 return RESULT_ERROR; 805 } 806 if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) { 807 VCD_DFATAL << "Decoded custom code table size (" 808 << custom_code_table_string_.length() 809 << ") does not match size of a code table (" 810 << sizeof(*custom_code_table_) << ")" << VCD_ENDL; 811 return RESULT_ERROR; 812 } 813 memcpy(custom_code_table_.get(), 814 custom_code_table_string_.data(), 815 sizeof(*custom_code_table_)); 816 custom_code_table_string_.clear(); 817 // Skip over the consumed data. 818 data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize()); 819 custom_code_table_decoder_.reset(); 820 delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode()); 821 return RESULT_SUCCESS; 822 } 823 824 void VCDiffStreamingDecoderImpl::FlushDecodedTarget( 825 OutputStringInterface* output_string) { 826 output_string->append( 827 decoded_target_.data() + decoded_target_output_position_, 828 decoded_target_.size() - decoded_target_output_position_); 829 decoded_target_.clear(); 830 delta_window_.set_target_window_start_pos(0); 831 decoded_target_output_position_ = 0; 832 } 833 834 void VCDiffStreamingDecoderImpl::AppendNewOutputText( 835 OutputStringInterface* output_string) { 836 const size_t bytes_decoded_this_chunk = 837 decoded_target_.size() - decoded_target_output_position_; 838 if (bytes_decoded_this_chunk > 0) { 839 size_t target_bytes_remaining = delta_window_.TargetBytesRemaining(); 840 if (target_bytes_remaining > 0) { 841 // The decoder is midway through decoding a target window. Resize 842 // output_string to match the expected length. The interface guarantees 843 // not to resize output_string more than once per target window decoded. 844 output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk 845 + target_bytes_remaining); 846 } 847 output_string->append( 848 decoded_target_.data() + decoded_target_output_position_, 849 bytes_decoded_this_chunk); 850 decoded_target_output_position_ = decoded_target_.size(); 851 } 852 } 853 854 bool VCDiffStreamingDecoderImpl::DecodeChunk( 855 const char* data, 856 size_t len, 857 OutputStringInterface* output_string) { 858 if (!start_decoding_was_called_) { 859 VCD_DFATAL << "DecodeChunk() called without StartDecoding()" << VCD_ENDL; 860 Reset(); 861 return false; 862 } 863 ParseableChunk parseable_chunk(data, len); 864 if (!unparsed_bytes_.empty()) { 865 unparsed_bytes_.append(data, len); 866 parseable_chunk.SetDataBuffer(unparsed_bytes_.data(), 867 unparsed_bytes_.size()); 868 } 869 VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk); 870 if (RESULT_SUCCESS == result) { 871 result = ReadCustomCodeTable(&parseable_chunk); 872 } 873 if (RESULT_SUCCESS == result) { 874 while (!parseable_chunk.Empty()) { 875 result = delta_window_.DecodeWindow(&parseable_chunk); 876 if (RESULT_SUCCESS != result) { 877 break; 878 } 879 if (ReachedPlannedTargetFileSize()) { 880 // Found exactly the length we expected. Stop decoding. 881 break; 882 } 883 if (!allow_vcd_target()) { 884 // VCD_TARGET will never be used to reference target data before the 885 // start of the current window, so flush and clear the contents of 886 // decoded_target_. 887 FlushDecodedTarget(output_string); 888 } 889 } 890 } 891 if (RESULT_ERROR == result) { 892 Reset(); // Don't allow further DecodeChunk calls 893 return false; 894 } 895 unparsed_bytes_.assign(parseable_chunk.UnparsedData(), 896 parseable_chunk.UnparsedSize()); 897 AppendNewOutputText(output_string); 898 return true; 899 } 900 901 // Finishes decoding after all data has been received. Returns true 902 // if decoding of the entire stream was successful. 903 bool VCDiffStreamingDecoderImpl::FinishDecoding() { 904 bool success = true; 905 if (!start_decoding_was_called_) { 906 VCD_WARNING << "FinishDecoding() called before StartDecoding()," 907 " or called after DecodeChunk() returned false" 908 << VCD_ENDL; 909 success = false; 910 } else if (!IsDecodingComplete()) { 911 VCD_ERROR << "FinishDecoding() called before parsing entire" 912 " delta file window" << VCD_ENDL; 913 success = false; 914 } 915 // Reset the object state for the next decode operation 916 Reset(); 917 return success; 918 } 919 920 bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits( 921 size_t window_size) const { 922 if (window_size > maximum_target_window_size_) { 923 VCD_ERROR << "Length of target window (" << window_size 924 << ") exceeds limit of " << maximum_target_window_size_ 925 << " bytes" << VCD_ENDL; 926 return true; 927 } 928 if (HasPlannedTargetFileSize()) { 929 // The logical expression to check would be: 930 // 931 // total_of_target_window_sizes_ + window_size > planned_target_file_size_ 932 // 933 // but the addition might cause an integer overflow if target_bytes_to_add 934 // is very large. So it is better to check target_bytes_to_add against 935 // the remaining planned target bytes. 936 size_t remaining_planned_target_file_size = 937 planned_target_file_size_ - total_of_target_window_sizes_; 938 if (window_size > remaining_planned_target_file_size) { 939 VCD_ERROR << "Length of target window (" << window_size 940 << " bytes) plus previous windows (" 941 << total_of_target_window_sizes_ 942 << " bytes) would exceed planned size of " 943 << planned_target_file_size_ << " bytes" << VCD_ENDL; 944 return true; 945 } 946 } 947 size_t remaining_maximum_target_bytes = 948 maximum_target_file_size_ - total_of_target_window_sizes_; 949 if (window_size > remaining_maximum_target_bytes) { 950 VCD_ERROR << "Length of target window (" << window_size 951 << " bytes) plus previous windows (" 952 << total_of_target_window_sizes_ 953 << " bytes) would exceed maximum target file size of " 954 << maximum_target_file_size_ << " bytes" << VCD_ENDL; 955 return true; 956 } 957 return false; 958 } 959 960 // *** Methods for VCDiffDeltaFileWindow 961 962 void VCDiffDeltaFileWindow::Reset() { 963 found_header_ = false; 964 965 // Mark the start of the current target window. 966 target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U; 967 target_window_length_ = 0; 968 969 source_segment_ptr_ = NULL; 970 source_segment_length_ = 0; 971 972 instructions_and_sizes_.Invalidate(); 973 data_for_add_and_run_.Invalidate(); 974 addresses_for_copy_.Invalidate(); 975 976 interleaved_bytes_expected_ = 0; 977 978 has_checksum_ = false; 979 expected_checksum_ = 0; 980 } 981 982 VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections( 983 VCDiffHeaderParser* header_parser) { 984 size_t add_and_run_data_length = 0; 985 size_t instructions_and_sizes_length = 0; 986 size_t addresses_length = 0; 987 if (!header_parser->ParseSectionLengths(has_checksum_, 988 &add_and_run_data_length, 989 &instructions_and_sizes_length, 990 &addresses_length, 991 &expected_checksum_)) { 992 return header_parser->GetResult(); 993 } 994 if (parent_->AllowInterleaved() && 995 (add_and_run_data_length == 0) && 996 (addresses_length == 0)) { 997 // The interleaved format is being used. 998 interleaved_bytes_expected_ = 999 static_cast<int>(instructions_and_sizes_length); 1000 UpdateInterleavedSectionPointers(header_parser->UnparsedData(), 1001 header_parser->End()); 1002 } else { 1003 // If interleaved format is not used, then the whole window contents 1004 // must be available before decoding can begin. If only part of 1005 // the current window is available, then report end of data 1006 // and re-parse the whole header when DecodeChunk() is called again. 1007 if (header_parser->UnparsedSize() < (add_and_run_data_length + 1008 instructions_and_sizes_length + 1009 addresses_length)) { 1010 return RESULT_END_OF_DATA; 1011 } 1012 data_for_add_and_run_.Init(header_parser->UnparsedData(), 1013 add_and_run_data_length); 1014 instructions_and_sizes_.Init(data_for_add_and_run_.End(), 1015 instructions_and_sizes_length); 1016 addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length); 1017 if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) { 1018 VCD_ERROR << "The end of the instructions section " 1019 "does not match the end of the delta window" << VCD_ENDL; 1020 return RESULT_ERROR; 1021 } 1022 } 1023 reader_.Init(instructions_and_sizes_.UnparsedDataAddr(), 1024 instructions_and_sizes_.End()); 1025 return RESULT_SUCCESS; 1026 } 1027 1028 // Here are the elements of the delta window header to be parsed, 1029 // from section 4 of the RFC: 1030 // 1031 // Window1 1032 // Win_Indicator - byte 1033 // [Source segment size] - integer 1034 // [Source segment position] - integer 1035 // The delta encoding of the target window 1036 // Length of the delta encoding - integer 1037 // The delta encoding 1038 // Size of the target window - integer 1039 // Delta_Indicator - byte 1040 // Length of data for ADDs and RUNs - integer 1041 // Length of instructions and sizes - integer 1042 // Length of addresses for COPYs - integer 1043 // Data section for ADDs and RUNs - array of bytes 1044 // Instructions and sizes section - array of bytes 1045 // Addresses section for COPYs - array of bytes 1046 // 1047 VCDiffResult VCDiffDeltaFileWindow::ReadHeader( 1048 ParseableChunk* parseable_chunk) { 1049 std::string* decoded_target = parent_->decoded_target(); 1050 VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(), 1051 parseable_chunk->End()); 1052 size_t source_segment_position = 0; 1053 unsigned char win_indicator = 0; 1054 if (!header_parser.ParseWinIndicatorAndSourceSegment( 1055 parent_->dictionary_size(), 1056 decoded_target->size(), 1057 parent_->allow_vcd_target(), 1058 &win_indicator, 1059 &source_segment_length_, 1060 &source_segment_position)) { 1061 return header_parser.GetResult(); 1062 } 1063 has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM); 1064 if (!header_parser.ParseWindowLengths(&target_window_length_)) { 1065 return header_parser.GetResult(); 1066 } 1067 if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) { 1068 // An error has been logged by TargetWindowWouldExceedSizeLimits(). 1069 return RESULT_ERROR; 1070 } 1071 header_parser.ParseDeltaIndicator(); 1072 VCDiffResult setup_return_code = SetUpWindowSections(&header_parser); 1073 if (RESULT_SUCCESS != setup_return_code) { 1074 return setup_return_code; 1075 } 1076 // Reserve enough space in the output string for the current target window. 1077 const size_t wanted_capacity = 1078 target_window_start_pos_ + target_window_length_; 1079 if (decoded_target->capacity() < wanted_capacity) { 1080 decoded_target->reserve(wanted_capacity); 1081 } 1082 // Get a pointer to the start of the source segment. 1083 if (win_indicator & VCD_SOURCE) { 1084 source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position; 1085 } else if (win_indicator & VCD_TARGET) { 1086 // This assignment must happen after the reserve(). 1087 // decoded_target should not be resized again while processing this window, 1088 // so source_segment_ptr_ should remain valid. 1089 source_segment_ptr_ = decoded_target->data() + source_segment_position; 1090 } 1091 // The whole window header was found and parsed successfully. 1092 found_header_ = true; 1093 parseable_chunk->Advance(header_parser.ParsedSize()); 1094 parent_->AddToTotalTargetWindowSize(target_window_length_); 1095 return RESULT_SUCCESS; 1096 } 1097 1098 void VCDiffDeltaFileWindow::UpdateInstructionPointer( 1099 ParseableChunk* parseable_chunk) { 1100 if (IsInterleaved()) { 1101 size_t bytes_parsed = instructions_and_sizes_.ParsedSize(); 1102 // Reduce expected instruction segment length by bytes parsed 1103 interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed); 1104 parseable_chunk->Advance(bytes_parsed); 1105 } 1106 } 1107 1108 inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() { 1109 return parent_->decoded_target()->size() - target_window_start_pos_; 1110 } 1111 1112 size_t VCDiffDeltaFileWindow::TargetBytesRemaining() { 1113 if (target_window_length_ == 0) { 1114 // There is no window being decoded at present 1115 return 0; 1116 } else { 1117 return target_window_length_ - TargetBytesDecoded(); 1118 } 1119 } 1120 1121 inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) { 1122 parent_->decoded_target()->append(data, size); 1123 } 1124 1125 inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) { 1126 parent_->decoded_target()->append(size, byte); 1127 } 1128 1129 VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) { 1130 if (size > data_for_add_and_run_.UnparsedSize()) { 1131 return RESULT_END_OF_DATA; 1132 } 1133 // Write the next "size" data bytes 1134 CopyBytes(data_for_add_and_run_.UnparsedData(), size); 1135 data_for_add_and_run_.Advance(size); 1136 return RESULT_SUCCESS; 1137 } 1138 1139 VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) { 1140 if (data_for_add_and_run_.Empty()) { 1141 return RESULT_END_OF_DATA; 1142 } 1143 // Write "size" copies of the next data byte 1144 RunByte(*data_for_add_and_run_.UnparsedData(), size); 1145 data_for_add_and_run_.Advance(1); 1146 return RESULT_SUCCESS; 1147 } 1148 1149 VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size, 1150 unsigned char mode) { 1151 // Keep track of the number of target bytes decoded as a local variable 1152 // to avoid recalculating it each time it is needed. 1153 size_t target_bytes_decoded = TargetBytesDecoded(); 1154 const VCDAddress here_address = 1155 static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded); 1156 const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress( 1157 here_address, 1158 mode, 1159 addresses_for_copy_.UnparsedDataAddr(), 1160 addresses_for_copy_.End()); 1161 switch (decoded_address) { 1162 case RESULT_ERROR: 1163 VCD_ERROR << "Unable to decode address for COPY" << VCD_ENDL; 1164 return RESULT_ERROR; 1165 case RESULT_END_OF_DATA: 1166 return RESULT_END_OF_DATA; 1167 default: 1168 if ((decoded_address < 0) || (decoded_address > here_address)) { 1169 VCD_DFATAL << "Internal error: unexpected address " << decoded_address 1170 << " returned from DecodeAddress, with here_address = " 1171 << here_address << VCD_ENDL; 1172 return RESULT_ERROR; 1173 } 1174 break; 1175 } 1176 size_t address = static_cast<size_t>(decoded_address); 1177 if ((address + size) <= source_segment_length_) { 1178 // Copy all data from source segment 1179 CopyBytes(&source_segment_ptr_[address], size); 1180 return RESULT_SUCCESS; 1181 } 1182 // Copy some data from target window... 1183 if (address < source_segment_length_) { 1184 // ... plus some data from source segment 1185 const size_t partial_copy_size = source_segment_length_ - address; 1186 CopyBytes(&source_segment_ptr_[address], partial_copy_size); 1187 target_bytes_decoded += partial_copy_size; 1188 address += partial_copy_size; 1189 size -= partial_copy_size; 1190 } 1191 address -= source_segment_length_; 1192 // address is now based at start of target window 1193 const char* const target_segment_ptr = parent_->decoded_target()->data() + 1194 target_window_start_pos_; 1195 while (size > (target_bytes_decoded - address)) { 1196 // Recursive copy that extends into the yet-to-be-copied target data 1197 const size_t partial_copy_size = target_bytes_decoded - address; 1198 CopyBytes(&target_segment_ptr[address], partial_copy_size); 1199 target_bytes_decoded += partial_copy_size; 1200 address += partial_copy_size; 1201 size -= partial_copy_size; 1202 } 1203 CopyBytes(&target_segment_ptr[address], size); 1204 return RESULT_SUCCESS; 1205 } 1206 1207 int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) { 1208 if (IsInterleaved() && (instructions_and_sizes_.UnparsedData() 1209 != parseable_chunk->UnparsedData())) { 1210 VCD_DFATAL << "Internal error: interleaved format is used, but the" 1211 " input pointer does not point to the instructions section" 1212 << VCD_ENDL; 1213 return RESULT_ERROR; 1214 } 1215 while (TargetBytesDecoded() < target_window_length_) { 1216 int32_t decoded_size = VCD_INSTRUCTION_ERROR; 1217 unsigned char mode = 0; 1218 VCDiffInstructionType instruction = 1219 reader_.GetNextInstruction(&decoded_size, &mode); 1220 switch (instruction) { 1221 case VCD_INSTRUCTION_END_OF_DATA: 1222 UpdateInstructionPointer(parseable_chunk); 1223 return RESULT_END_OF_DATA; 1224 case VCD_INSTRUCTION_ERROR: 1225 return RESULT_ERROR; 1226 default: 1227 break; 1228 } 1229 const size_t size = static_cast<size_t>(decoded_size); 1230 // The value of "size" itself could be enormous (say, INT32_MAX) 1231 // so check it individually against the limit to protect against 1232 // overflow when adding it to something else. 1233 if ((size > target_window_length_) || 1234 ((size + TargetBytesDecoded()) > target_window_length_)) { 1235 VCD_ERROR << VCDiffInstructionName(instruction) 1236 << " with size " << size 1237 << " plus existing " << TargetBytesDecoded() 1238 << " bytes of target data exceeds length of target" 1239 " window (" << target_window_length_ << " bytes)" 1240 << VCD_ENDL; 1241 return RESULT_ERROR; 1242 } 1243 VCDiffResult result = RESULT_SUCCESS; 1244 switch (instruction) { 1245 case VCD_ADD: 1246 result = DecodeAdd(size); 1247 break; 1248 case VCD_RUN: 1249 result = DecodeRun(size); 1250 break; 1251 case VCD_COPY: 1252 result = DecodeCopy(size, mode); 1253 break; 1254 default: 1255 VCD_DFATAL << "Unexpected instruction type " << instruction 1256 << "in opcode stream" << VCD_ENDL; 1257 return RESULT_ERROR; 1258 } 1259 switch (result) { 1260 case RESULT_END_OF_DATA: 1261 reader_.UnGetInstruction(); 1262 UpdateInstructionPointer(parseable_chunk); 1263 return RESULT_END_OF_DATA; 1264 case RESULT_ERROR: 1265 return RESULT_ERROR; 1266 case RESULT_SUCCESS: 1267 break; 1268 } 1269 } 1270 if (TargetBytesDecoded() != target_window_length_) { 1271 VCD_ERROR << "Decoded target window size (" << TargetBytesDecoded() 1272 << " bytes) does not match expected size (" 1273 << target_window_length_ << " bytes)" << VCD_ENDL; 1274 return RESULT_ERROR; 1275 } 1276 const char* const target_window_start = 1277 parent_->decoded_target()->data() + target_window_start_pos_; 1278 if (has_checksum_ && 1279 (ComputeAdler32(target_window_start, target_window_length_) 1280 != expected_checksum_)) { 1281 VCD_ERROR << "Target data does not match checksum; this could mean " 1282 "that the wrong dictionary was used" << VCD_ENDL; 1283 return RESULT_ERROR; 1284 } 1285 if (!instructions_and_sizes_.Empty()) { 1286 VCD_ERROR << "Excess instructions and sizes left over " 1287 "after decoding target window" << VCD_ENDL; 1288 return RESULT_ERROR; 1289 } 1290 if (!IsInterleaved()) { 1291 // Standard format is being used, with three separate sections for the 1292 // instructions, data, and addresses. 1293 if (!data_for_add_and_run_.Empty()) { 1294 VCD_ERROR << "Excess ADD/RUN data left over " 1295 "after decoding target window" << VCD_ENDL; 1296 return RESULT_ERROR; 1297 } 1298 if (!addresses_for_copy_.Empty()) { 1299 VCD_ERROR << "Excess COPY addresses left over " 1300 "after decoding target window" << VCD_ENDL; 1301 return RESULT_ERROR; 1302 } 1303 // Reached the end of the window. Update the ParseableChunk to point to the 1304 // end of the addresses section, which is the last section in the window. 1305 parseable_chunk->SetPosition(addresses_for_copy_.End()); 1306 } else { 1307 // Interleaved format is being used. 1308 UpdateInstructionPointer(parseable_chunk); 1309 } 1310 return RESULT_SUCCESS; 1311 } 1312 1313 VCDiffResult VCDiffDeltaFileWindow::DecodeWindow( 1314 ParseableChunk* parseable_chunk) { 1315 if (!parent_) { 1316 VCD_DFATAL << "Internal error: VCDiffDeltaFileWindow::DecodeWindow() " 1317 "called before VCDiffDeltaFileWindow::Init()" << VCD_ENDL; 1318 return RESULT_ERROR; 1319 } 1320 if (!found_header_) { 1321 switch (ReadHeader(parseable_chunk)) { 1322 case RESULT_END_OF_DATA: 1323 return RESULT_END_OF_DATA; 1324 case RESULT_ERROR: 1325 return RESULT_ERROR; 1326 default: 1327 // Reset address cache between windows (RFC section 5.1) 1328 if (!parent_->addr_cache()->Init()) { 1329 VCD_DFATAL << "Error initializing address cache" << VCD_ENDL; 1330 return RESULT_ERROR; 1331 } 1332 } 1333 } else { 1334 // We are resuming a window that was partially decoded before a 1335 // RESULT_END_OF_DATA was returned. This can only happen on the first 1336 // loop iteration, and only if the interleaved format is enabled and used. 1337 if (!IsInterleaved()) { 1338 VCD_DFATAL << "Internal error: Resumed decoding of a delta file window" 1339 " when interleaved format is not being used" << VCD_ENDL; 1340 return RESULT_ERROR; 1341 } 1342 UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(), 1343 parseable_chunk->End()); 1344 reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(), 1345 instructions_and_sizes_.End()); 1346 } 1347 switch (DecodeBody(parseable_chunk)) { 1348 case RESULT_END_OF_DATA: 1349 if (MoreDataExpected()) { 1350 return RESULT_END_OF_DATA; 1351 } else { 1352 VCD_ERROR << "End of data reached while decoding VCDIFF delta file" 1353 << VCD_ENDL; 1354 // fall through to RESULT_ERROR case 1355 } 1356 case RESULT_ERROR: 1357 return RESULT_ERROR; 1358 default: 1359 break; // DecodeBody succeeded 1360 } 1361 // Get ready to read a new delta window 1362 Reset(); 1363 return RESULT_SUCCESS; 1364 } 1365 1366 // *** Methods for VCDiffStreamingDecoder 1367 1368 VCDiffStreamingDecoder::VCDiffStreamingDecoder() 1369 : impl_(new VCDiffStreamingDecoderImpl) { } 1370 1371 VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; } 1372 1373 void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) { 1374 impl_->StartDecoding(source, len); 1375 } 1376 1377 bool VCDiffStreamingDecoder::DecodeChunkToInterface( 1378 const char* data, 1379 size_t len, 1380 OutputStringInterface* output_string) { 1381 return impl_->DecodeChunk(data, len, output_string); 1382 } 1383 1384 bool VCDiffStreamingDecoder::FinishDecoding() { 1385 return impl_->FinishDecoding(); 1386 } 1387 1388 bool VCDiffStreamingDecoder::SetMaximumTargetFileSize( 1389 size_t new_maximum_target_file_size) { 1390 return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size); 1391 } 1392 1393 bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize( 1394 size_t new_maximum_target_window_size) { 1395 return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size); 1396 } 1397 1398 void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) { 1399 impl_->SetAllowVcdTarget(allow_vcd_target); 1400 } 1401 1402 bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr, 1403 size_t dictionary_size, 1404 const string& encoding, 1405 OutputStringInterface* target) { 1406 target->clear(); 1407 decoder_.StartDecoding(dictionary_ptr, dictionary_size); 1408 if (!decoder_.DecodeChunkToInterface(encoding.data(), 1409 encoding.size(), 1410 target)) { 1411 return false; 1412 } 1413 return decoder_.FinishDecoding(); 1414 } 1415 1416 } // namespace open_vcdiff 1417