Home | History | Annotate | Download | only in src
      1 // Copyright 2007 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 // Classes to implement the Address Cache and Address Encoding
     17 // algorithms described in sections 5.1 - 5.4 of RFC 3284 -
     18 // The VCDIFF Generic Differencing and Compression Data Format.
     19 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
     20 
     21 #ifndef OPEN_VCDIFF_ADDRCACHE_H_
     22 #define OPEN_VCDIFF_ADDRCACHE_H_
     23 
     24 #include <config.h>
     25 #include <vector>
     26 #include "vcdiff_defs.h"  // VCDAddress
     27 
     28 namespace open_vcdiff {
     29 
     30 // Implements the "same" and "near" caches
     31 // as described in RFC 3284, section 5.  The "near" cache allows
     32 // efficient reuse of one of the last four referenced addresses
     33 // plus a small offset, and the "same" cache allows efficient reuse
     34 // of an exact recent address distinguished by its lowest-order bits.
     35 //
     36 // NOT threadsafe.
     37 //
     38 class VCDiffAddressCache {
     39  public:
     40   // The default cache sizes specified in the RFC
     41   static const int kDefaultNearCacheSize = 4;
     42   static const int kDefaultSameCacheSize = 3;
     43 
     44   VCDiffAddressCache(int near_cache_size, int same_cache_size);
     45 
     46   // This version of the constructor uses the default values
     47   // kDefaultNearCacheSize and kDefaultSameCacheSize.
     48   VCDiffAddressCache();
     49 
     50   // Initializes the object before use.  This method must be called after
     51   // constructing a VCDiffAddressCache/ object, before any other method may be
     52   // called.  This is because Init() validates near_cache_size_ and
     53   // same_cache_size_ before initializing the same and near caches.  After the
     54   // object has been initialized and used, Init() can be called again to reset
     55   // it to its initial state.
     56   //
     57   bool Init();
     58 
     59   int near_cache_size() const { return near_cache_size_; }
     60 
     61   int same_cache_size() const { return same_cache_size_; }
     62 
     63   // Returns the first mode number that represents one of the NEAR modes.
     64   // The number of NEAR modes is near_cache_size.  Each NEAR mode refers to
     65   // an element of the near_addresses_ array, where a recently-referenced
     66   // address is stored.
     67   //
     68   static unsigned char FirstNearMode() {
     69     return VCD_FIRST_NEAR_MODE;
     70   }
     71 
     72   // Returns the first mode number that represents one of the SAME modes.
     73   // The number of SAME modes is same_cache_size.  Each SAME mode refers to
     74   // a block of 256 elements of the same_addresses_ array; the lowest-order
     75   // 8 bits of the address are used to find the element of this block that
     76   // may match the desired address value.
     77   //
     78   unsigned char FirstSameMode() const {
     79     return VCD_FIRST_NEAR_MODE + near_cache_size();
     80   }
     81 
     82   // Returns the maximum valid mode number, which happens to be
     83   // the last SAME mode.
     84   //
     85   unsigned char LastMode() const {
     86     return FirstSameMode() + same_cache_size() - 1;
     87   }
     88 
     89   static unsigned char DefaultLastMode() {
     90     return VCD_FIRST_NEAR_MODE
     91         + kDefaultNearCacheSize + kDefaultSameCacheSize - 1;
     92   }
     93 
     94   // See the definition of enum VCDiffModes in vcdiff_defs.h,
     95   // as well as section 5.3 of the RFC, for a description of
     96   // each address mode type (SELF, HERE, NEAR, and SAME).
     97   static bool IsSelfMode(unsigned char mode) {
     98     return mode == VCD_SELF_MODE;
     99   }
    100 
    101   static bool IsHereMode(unsigned char mode) {
    102     return mode == VCD_HERE_MODE;
    103   }
    104 
    105   bool IsNearMode(unsigned char mode) const {
    106     return (mode >= FirstNearMode()) && (mode < FirstSameMode());
    107   }
    108 
    109   bool IsSameMode(unsigned char mode) const {
    110     return (mode >= FirstSameMode()) && (mode <= LastMode());
    111   }
    112 
    113   static VCDAddress DecodeSelfAddress(int32_t encoded_address) {
    114     return encoded_address;
    115   }
    116 
    117   static VCDAddress DecodeHereAddress(int32_t encoded_address,
    118                                       VCDAddress here_address) {
    119     return here_address - encoded_address;
    120   }
    121 
    122   VCDAddress DecodeNearAddress(unsigned char mode,
    123                                int32_t encoded_address) const {
    124     return NearAddress(mode - FirstNearMode()) + encoded_address;
    125   }
    126 
    127   VCDAddress DecodeSameAddress(unsigned char mode,
    128                                unsigned char encoded_address) const {
    129     return SameAddress(((mode - FirstSameMode()) * 256) + encoded_address);
    130   }
    131 
    132   // Returns true if, when using the given mode, an encoded address
    133   // should be written to the delta file as a variable-length integer;
    134   // returns false if the encoded address should be written
    135   // as a byte value (unsigned char).
    136   bool WriteAddressAsVarintForMode(unsigned char mode) const {
    137     return !IsSameMode(mode);
    138   }
    139 
    140   // An accessor for an element of the near_addresses_ array.
    141   // No bounds checking is performed; the caller must ensure that
    142   // Init() has already been called, and that
    143   //     0 <= pos < near_cache_size_
    144   //
    145   VCDAddress NearAddress(int pos) const {
    146     return near_addresses_[pos];
    147   }
    148 
    149   // An accessor for an element of the same_addresses_ array.
    150   // No bounds checking is performed; the caller must ensure that
    151   // Init() has already been called, and that
    152   //     0 <= pos < (same_cache_size_ * 256)
    153   //
    154   VCDAddress SameAddress(int pos) const {
    155     return same_addresses_[pos];
    156   }
    157 
    158   // This method will be called whenever an address is calculated for an
    159   // encoded or decoded COPY instruction, and will update the contents
    160   // of the SAME and NEAR caches.
    161   //
    162   void UpdateCache(VCDAddress address);
    163 
    164   // Determines the address mode that yields the most compact encoding
    165   // of the given address value.  The most compact encoding
    166   // is found by looking for the numerically lowest encoded address.
    167   // Sets *encoded_addr to the encoded representation of the address
    168   // and returns the mode used.
    169   //
    170   // The caller should pass the return value to the method
    171   // WriteAddressAsVarintForMode() to determine whether encoded_addr
    172   // should be written to the delta file as a variable-length integer
    173   // or as a byte (unsigned char).
    174   //
    175   unsigned char EncodeAddress(VCDAddress address,
    176                               VCDAddress here_address,
    177                               VCDAddress* encoded_addr);
    178 
    179   // Interprets the next value in the address_stream using the provided mode,
    180   // which may need to access the SAME or NEAR address cache.  Returns the
    181   // decoded address, or one of the following values:
    182   //   RESULT_ERROR: An invalid address value was found in address_stream.
    183   //   RESULT_END_OF_DATA: The limit address_stream_end was reached before
    184   //       the address could be decoded.  If more streamed data is expected,
    185   //       this means that the consumer should block and wait for more data
    186   //       before continuing to decode.  If no more data is expected, this
    187   //       return value signals an error condition.
    188   //
    189   // If successful, *address_stream will be incremented past the decoded address
    190   // position.  If RESULT_ERROR or RESULT_END_OF_DATA is returned,
    191   // then the value of *address_stream will not have changed.
    192   //
    193   VCDAddress DecodeAddress(VCDAddress here_address,
    194                            unsigned char mode,
    195                            const char** address_stream,
    196                            const char* address_stream_end);
    197 
    198  private:
    199   // The number of addresses to be kept in the NEAR cache.
    200   const int near_cache_size_;
    201   // The number of 256-byte blocks to store in the SAME cache.
    202   const int same_cache_size_;
    203   // The next position in the NEAR cache to which an address will be written.
    204   int       next_slot_;
    205   // NEAR cache contents
    206   std::vector<VCDAddress> near_addresses_;
    207   // SAME cache contents
    208   std::vector<VCDAddress> same_addresses_;
    209 
    210   // Making these private avoids implicit copy constructor & assignment operator
    211   VCDiffAddressCache(const VCDiffAddressCache&);  // NOLINT
    212   void operator=(const VCDiffAddressCache&);
    213 };
    214 
    215 }  // namespace open_vcdiff
    216 
    217 #endif  // OPEN_VCDIFF_ADDRCACHE_H_
    218