1 // Copyright 2007 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // Classes to implement the Address Cache and Address Encoding 17 // algorithms described in sections 5.1 - 5.4 of RFC 3284 - 18 // The VCDIFF Generic Differencing and Compression Data Format. 19 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html 20 21 #ifndef OPEN_VCDIFF_ADDRCACHE_H_ 22 #define OPEN_VCDIFF_ADDRCACHE_H_ 23 24 #include <config.h> 25 #include <vector> 26 #include "vcdiff_defs.h" // VCDAddress 27 28 namespace open_vcdiff { 29 30 // Implements the "same" and "near" caches 31 // as described in RFC 3284, section 5. The "near" cache allows 32 // efficient reuse of one of the last four referenced addresses 33 // plus a small offset, and the "same" cache allows efficient reuse 34 // of an exact recent address distinguished by its lowest-order bits. 35 // 36 // NOT threadsafe. 37 // 38 class VCDiffAddressCache { 39 public: 40 // The default cache sizes specified in the RFC 41 static const int kDefaultNearCacheSize = 4; 42 static const int kDefaultSameCacheSize = 3; 43 44 VCDiffAddressCache(int near_cache_size, int same_cache_size); 45 46 // This version of the constructor uses the default values 47 // kDefaultNearCacheSize and kDefaultSameCacheSize. 48 VCDiffAddressCache(); 49 50 // Initializes the object before use. This method must be called after 51 // constructing a VCDiffAddressCache/ object, before any other method may be 52 // called. This is because Init() validates near_cache_size_ and 53 // same_cache_size_ before initializing the same and near caches. After the 54 // object has been initialized and used, Init() can be called again to reset 55 // it to its initial state. 56 // 57 bool Init(); 58 59 int near_cache_size() const { return near_cache_size_; } 60 61 int same_cache_size() const { return same_cache_size_; } 62 63 // Returns the first mode number that represents one of the NEAR modes. 64 // The number of NEAR modes is near_cache_size. Each NEAR mode refers to 65 // an element of the near_addresses_ array, where a recently-referenced 66 // address is stored. 67 // 68 static unsigned char FirstNearMode() { 69 return VCD_FIRST_NEAR_MODE; 70 } 71 72 // Returns the first mode number that represents one of the SAME modes. 73 // The number of SAME modes is same_cache_size. Each SAME mode refers to 74 // a block of 256 elements of the same_addresses_ array; the lowest-order 75 // 8 bits of the address are used to find the element of this block that 76 // may match the desired address value. 77 // 78 unsigned char FirstSameMode() const { 79 return VCD_FIRST_NEAR_MODE + near_cache_size(); 80 } 81 82 // Returns the maximum valid mode number, which happens to be 83 // the last SAME mode. 84 // 85 unsigned char LastMode() const { 86 return FirstSameMode() + same_cache_size() - 1; 87 } 88 89 static unsigned char DefaultLastMode() { 90 return VCD_FIRST_NEAR_MODE 91 + kDefaultNearCacheSize + kDefaultSameCacheSize - 1; 92 } 93 94 // See the definition of enum VCDiffModes in vcdiff_defs.h, 95 // as well as section 5.3 of the RFC, for a description of 96 // each address mode type (SELF, HERE, NEAR, and SAME). 97 static bool IsSelfMode(unsigned char mode) { 98 return mode == VCD_SELF_MODE; 99 } 100 101 static bool IsHereMode(unsigned char mode) { 102 return mode == VCD_HERE_MODE; 103 } 104 105 bool IsNearMode(unsigned char mode) const { 106 return (mode >= FirstNearMode()) && (mode < FirstSameMode()); 107 } 108 109 bool IsSameMode(unsigned char mode) const { 110 return (mode >= FirstSameMode()) && (mode <= LastMode()); 111 } 112 113 static VCDAddress DecodeSelfAddress(int32_t encoded_address) { 114 return encoded_address; 115 } 116 117 static VCDAddress DecodeHereAddress(int32_t encoded_address, 118 VCDAddress here_address) { 119 return here_address - encoded_address; 120 } 121 122 VCDAddress DecodeNearAddress(unsigned char mode, 123 int32_t encoded_address) const { 124 return NearAddress(mode - FirstNearMode()) + encoded_address; 125 } 126 127 VCDAddress DecodeSameAddress(unsigned char mode, 128 unsigned char encoded_address) const { 129 return SameAddress(((mode - FirstSameMode()) * 256) + encoded_address); 130 } 131 132 // Returns true if, when using the given mode, an encoded address 133 // should be written to the delta file as a variable-length integer; 134 // returns false if the encoded address should be written 135 // as a byte value (unsigned char). 136 bool WriteAddressAsVarintForMode(unsigned char mode) const { 137 return !IsSameMode(mode); 138 } 139 140 // An accessor for an element of the near_addresses_ array. 141 // No bounds checking is performed; the caller must ensure that 142 // Init() has already been called, and that 143 // 0 <= pos < near_cache_size_ 144 // 145 VCDAddress NearAddress(int pos) const { 146 return near_addresses_[pos]; 147 } 148 149 // An accessor for an element of the same_addresses_ array. 150 // No bounds checking is performed; the caller must ensure that 151 // Init() has already been called, and that 152 // 0 <= pos < (same_cache_size_ * 256) 153 // 154 VCDAddress SameAddress(int pos) const { 155 return same_addresses_[pos]; 156 } 157 158 // This method will be called whenever an address is calculated for an 159 // encoded or decoded COPY instruction, and will update the contents 160 // of the SAME and NEAR caches. 161 // 162 void UpdateCache(VCDAddress address); 163 164 // Determines the address mode that yields the most compact encoding 165 // of the given address value. The most compact encoding 166 // is found by looking for the numerically lowest encoded address. 167 // Sets *encoded_addr to the encoded representation of the address 168 // and returns the mode used. 169 // 170 // The caller should pass the return value to the method 171 // WriteAddressAsVarintForMode() to determine whether encoded_addr 172 // should be written to the delta file as a variable-length integer 173 // or as a byte (unsigned char). 174 // 175 unsigned char EncodeAddress(VCDAddress address, 176 VCDAddress here_address, 177 VCDAddress* encoded_addr); 178 179 // Interprets the next value in the address_stream using the provided mode, 180 // which may need to access the SAME or NEAR address cache. Returns the 181 // decoded address, or one of the following values: 182 // RESULT_ERROR: An invalid address value was found in address_stream. 183 // RESULT_END_OF_DATA: The limit address_stream_end was reached before 184 // the address could be decoded. If more streamed data is expected, 185 // this means that the consumer should block and wait for more data 186 // before continuing to decode. If no more data is expected, this 187 // return value signals an error condition. 188 // 189 // If successful, *address_stream will be incremented past the decoded address 190 // position. If RESULT_ERROR or RESULT_END_OF_DATA is returned, 191 // then the value of *address_stream will not have changed. 192 // 193 VCDAddress DecodeAddress(VCDAddress here_address, 194 unsigned char mode, 195 const char** address_stream, 196 const char* address_stream_end); 197 198 private: 199 // The number of addresses to be kept in the NEAR cache. 200 const int near_cache_size_; 201 // The number of 256-byte blocks to store in the SAME cache. 202 const int same_cache_size_; 203 // The next position in the NEAR cache to which an address will be written. 204 int next_slot_; 205 // NEAR cache contents 206 std::vector<VCDAddress> near_addresses_; 207 // SAME cache contents 208 std::vector<VCDAddress> same_addresses_; 209 210 // Making these private avoids implicit copy constructor & assignment operator 211 VCDiffAddressCache(const VCDiffAddressCache&); // NOLINT 212 void operator=(const VCDiffAddressCache&); 213 }; 214 215 } // namespace open_vcdiff 216 217 #endif // OPEN_VCDIFF_ADDRCACHE_H_ 218