Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sync/internal_api/public/base/unique_position.h"
      6 
      7 #include "base/basictypes.h"
      8 #include "base/logging.h"
      9 #include "base/rand_util.h"
     10 #include "base/stl_util.h"
     11 #include "base/strings/string_number_conversions.h"
     12 #include "sync/protocol/unique_position.pb.h"
     13 #include "third_party/zlib/zlib.h"
     14 
     15 namespace syncer {
     16 
     17 const size_t UniquePosition::kSuffixLength = 28;
     18 const size_t UniquePosition::kCompressBytesThreshold = 128;
     19 
     20 // static.
     21 bool UniquePosition::IsValidSuffix(const std::string& suffix) {
     22   // The suffix must be exactly the specified length, otherwise unique suffixes
     23   // are not sufficient to guarantee unique positions (because prefix + suffix
     24   // == p + refixsuffix).
     25   return suffix.length() == kSuffixLength
     26       && suffix[kSuffixLength-1] != 0;
     27 }
     28 
     29 // static.
     30 bool UniquePosition::IsValidBytes(const std::string& bytes) {
     31   // The first condition ensures that our suffix uniqueness is sufficient to
     32   // guarantee position uniqueness.  Otherwise, it's possible the end of some
     33   // prefix + some short suffix == some long suffix.
     34   // The second condition ensures that FindSmallerWithSuffix can always return a
     35   // result.
     36   return bytes.length() >= kSuffixLength
     37       && bytes[bytes.length()-1] != 0;
     38 }
     39 
     40 // static.
     41 std::string UniquePosition::RandomSuffix() {
     42   // Users random data for all but the last byte.  The last byte must not be
     43   // zero.  We arbitrarily set it to 0x7f.
     44   return base::RandBytesAsString(kSuffixLength - 1) + "\x7f";
     45 }
     46 
     47 // static.
     48 UniquePosition UniquePosition::CreateInvalid() {
     49   UniquePosition pos;
     50   DCHECK(!pos.IsValid());
     51   return pos;
     52 }
     53 
     54 // static.
     55 UniquePosition UniquePosition::FromProto(const sync_pb::UniquePosition& proto) {
     56   if (proto.has_custom_compressed_v1()) {
     57     return UniquePosition(proto.custom_compressed_v1());
     58   } else if (proto.has_value() && !proto.value().empty()) {
     59     return UniquePosition(Compress(proto.value()));
     60   } else if (proto.has_compressed_value() && proto.has_uncompressed_length()) {
     61     uLongf uncompressed_len = proto.uncompressed_length();
     62     std::string un_gzipped;
     63 
     64     un_gzipped.resize(uncompressed_len);
     65     int result = uncompress(
     66         reinterpret_cast<Bytef*>(string_as_array(&un_gzipped)),
     67         &uncompressed_len,
     68         reinterpret_cast<const Bytef*>(proto.compressed_value().data()),
     69         proto.compressed_value().size());
     70     if (result != Z_OK) {
     71       DLOG(ERROR) << "Unzip failed " << result;
     72       return UniquePosition::CreateInvalid();
     73     }
     74     if (uncompressed_len != proto.uncompressed_length()) {
     75       DLOG(ERROR)
     76           << "Uncompressed length " << uncompressed_len
     77           << " did not match specified length " << proto.uncompressed_length();
     78       return UniquePosition::CreateInvalid();
     79     }
     80     return UniquePosition(Compress(un_gzipped));
     81   } else {
     82     return UniquePosition::CreateInvalid();
     83   }
     84 }
     85 
     86 // static.
     87 UniquePosition UniquePosition::FromInt64(
     88     int64 x, const std::string& suffix) {
     89   uint64 y = static_cast<uint64>(x);
     90   y ^= 0x8000000000000000ULL; // Make it non-negative.
     91   std::string bytes(8, 0);
     92   for (int i = 7; i >= 0; --i) {
     93     bytes[i] = static_cast<uint8>(y);
     94     y >>= 8;
     95   }
     96   return UniquePosition(bytes + suffix, suffix);
     97 }
     98 
     99 // static.
    100 UniquePosition UniquePosition::InitialPosition(
    101     const std::string& suffix) {
    102   DCHECK(IsValidSuffix(suffix));
    103   return UniquePosition(suffix, suffix);
    104 }
    105 
    106 // static.
    107 UniquePosition UniquePosition::Before(
    108     const UniquePosition& x,
    109     const std::string& suffix) {
    110   DCHECK(IsValidSuffix(suffix));
    111   DCHECK(x.IsValid());
    112   const std::string& before = FindSmallerWithSuffix(
    113       Uncompress(x.compressed_), suffix);
    114   return UniquePosition(before + suffix, suffix);
    115 }
    116 
    117 // static.
    118 UniquePosition UniquePosition::After(
    119     const UniquePosition& x,
    120     const std::string& suffix) {
    121   DCHECK(IsValidSuffix(suffix));
    122   DCHECK(x.IsValid());
    123   const std::string& after = FindGreaterWithSuffix(
    124       Uncompress(x.compressed_), suffix);
    125   return UniquePosition(after + suffix, suffix);
    126 }
    127 
    128 // static.
    129 UniquePosition UniquePosition::Between(
    130     const UniquePosition& before,
    131     const UniquePosition& after,
    132     const std::string& suffix) {
    133   DCHECK(before.IsValid());
    134   DCHECK(after.IsValid());
    135   DCHECK(before.LessThan(after));
    136   DCHECK(IsValidSuffix(suffix));
    137   const std::string& mid = FindBetweenWithSuffix(
    138       Uncompress(before.compressed_),
    139       Uncompress(after.compressed_),
    140       suffix);
    141   return UniquePosition(mid + suffix, suffix);
    142 }
    143 
    144 UniquePosition::UniquePosition() : is_valid_(false) {}
    145 
    146 bool UniquePosition::LessThan(const UniquePosition& other) const {
    147   DCHECK(this->IsValid());
    148   DCHECK(other.IsValid());
    149 
    150   return compressed_ < other.compressed_;
    151 }
    152 
    153 bool UniquePosition::Equals(const UniquePosition& other) const {
    154   if (!this->IsValid() && !other.IsValid())
    155     return true;
    156 
    157   return compressed_ == other.compressed_;
    158 }
    159 
    160 void UniquePosition::ToProto(sync_pb::UniquePosition* proto) const {
    161   proto->Clear();
    162 
    163   // This is the current preferred foramt.
    164   proto->set_custom_compressed_v1(compressed_);
    165 
    166   // Older clients used to write other formats.  We don't bother doing that
    167   // anymore because that form of backwards compatibility is expensive.  We no
    168   // longer want to pay that price just too support clients that have been
    169   // obsolete for a long time.  See the proto definition for details.
    170 }
    171 
    172 void UniquePosition::SerializeToString(std::string* blob) const {
    173   DCHECK(blob);
    174   sync_pb::UniquePosition proto;
    175   ToProto(&proto);
    176   proto.SerializeToString(blob);
    177 }
    178 
    179 int64 UniquePosition::ToInt64() const {
    180   uint64 y = 0;
    181   const std::string& s = Uncompress(compressed_);
    182   size_t l = sizeof(int64);
    183   if (s.length() < l) {
    184     NOTREACHED();
    185     l = s.length();
    186   }
    187   for (size_t i = 0; i < l; ++i) {
    188     const uint8 byte = s[l - i - 1];
    189     y |= static_cast<uint64>(byte) << (i * 8);
    190   }
    191   y ^= 0x8000000000000000ULL;
    192   // This is technically implementation-defined if y > INT64_MAX, so
    193   // we're assuming that we're on a twos-complement machine.
    194   return static_cast<int64>(y);
    195 }
    196 
    197 bool UniquePosition::IsValid() const {
    198   return is_valid_;
    199 }
    200 
    201 std::string UniquePosition::ToDebugString() const {
    202   const std::string bytes = Uncompress(compressed_);
    203   if (bytes.empty())
    204     return std::string("INVALID[]");
    205 
    206   std::string debug_string = base::HexEncode(bytes.data(), bytes.length());
    207   if (!IsValid()) {
    208     debug_string = "INVALID[" + debug_string + "]";
    209   }
    210 
    211   std::string compressed_string =
    212       base::HexEncode(compressed_.data(), compressed_.length());
    213   debug_string.append(", compressed: " + compressed_string);
    214   return debug_string;
    215 }
    216 
    217 std::string UniquePosition::GetSuffixForTest() const {
    218   const std::string bytes = Uncompress(compressed_);
    219   const size_t prefix_len = bytes.length() - kSuffixLength;
    220   return bytes.substr(prefix_len, std::string::npos);
    221 }
    222 
    223 std::string UniquePosition::FindSmallerWithSuffix(
    224     const std::string& reference,
    225     const std::string& suffix) {
    226   size_t ref_zeroes = reference.find_first_not_of('\0');
    227   size_t suffix_zeroes = suffix.find_first_not_of('\0');
    228 
    229   // Neither of our inputs are allowed to have trailing zeroes, so the following
    230   // must be true.
    231   DCHECK_NE(ref_zeroes, std::string::npos);
    232   DCHECK_NE(suffix_zeroes, std::string::npos);
    233 
    234   if (suffix_zeroes > ref_zeroes) {
    235     // Implies suffix < ref.
    236     return std::string();
    237   }
    238 
    239   if (suffix.substr(suffix_zeroes) < reference.substr(ref_zeroes)) {
    240     // Prepend zeroes so the result has as many zero digits as |reference|.
    241     return std::string(ref_zeroes - suffix_zeroes, '\0');
    242   } else if (suffix_zeroes > 1) {
    243     // Prepend zeroes so the result has one more zero digit than |reference|.
    244     // We could also take the "else" branch below, but taking this branch will
    245     // give us a smaller result.
    246     return std::string(ref_zeroes - suffix_zeroes + 1, '\0');
    247   } else {
    248     // Prepend zeroes to match those in the |reference|, then something smaller
    249     // than the first non-zero digit in |reference|.
    250     char lt_digit = static_cast<uint8>(reference[ref_zeroes])/2;
    251     return std::string(ref_zeroes, '\0') + lt_digit;
    252   }
    253 }
    254 
    255 // static
    256 std::string UniquePosition::FindGreaterWithSuffix(
    257     const std::string& reference,
    258     const std::string& suffix) {
    259   size_t ref_FFs = reference.find_first_not_of(kuint8max);
    260   size_t suffix_FFs = suffix.find_first_not_of(kuint8max);
    261 
    262   if (ref_FFs == std::string::npos) {
    263     ref_FFs = reference.length();
    264   }
    265   if (suffix_FFs == std::string::npos) {
    266     suffix_FFs = suffix.length();
    267   }
    268 
    269   if (suffix_FFs > ref_FFs) {
    270     // Implies suffix > reference.
    271     return std::string();
    272   }
    273 
    274   if (suffix.substr(suffix_FFs) > reference.substr(ref_FFs)) {
    275     // Prepend FF digits to match those in |reference|.
    276     return std::string(ref_FFs - suffix_FFs, kuint8max);
    277   } else if (suffix_FFs > 1) {
    278     // Prepend enough leading FF digits so result has one more of them than
    279     // |reference| does.  We could also take the "else" branch below, but this
    280     // gives us a smaller result.
    281     return std::string(ref_FFs - suffix_FFs + 1, kuint8max);
    282   } else {
    283     // Prepend FF digits to match those in |reference|, then something larger
    284     // than the first non-FF digit in |reference|.
    285     char gt_digit = static_cast<uint8>(reference[ref_FFs]) +
    286         (kuint8max - static_cast<uint8>(reference[ref_FFs]) + 1) / 2;
    287     return std::string(ref_FFs, kuint8max) + gt_digit;
    288   }
    289 }
    290 
    291 // static
    292 std::string UniquePosition::FindBetweenWithSuffix(
    293     const std::string& before,
    294     const std::string& after,
    295     const std::string& suffix) {
    296   DCHECK(IsValidSuffix(suffix));
    297   DCHECK_NE(before, after);
    298   DCHECK_LT(before, after);
    299 
    300   std::string mid;
    301 
    302   // Sometimes our suffix puts us where we want to be.
    303   if (before < suffix && suffix < after) {
    304     return std::string();
    305   }
    306 
    307   size_t i = 0;
    308   for ( ; i < std::min(before.length(), after.length()); ++i) {
    309     uint8 a_digit = before[i];
    310     uint8 b_digit = after[i];
    311 
    312     if (b_digit - a_digit >= 2) {
    313       mid.push_back(a_digit + (b_digit - a_digit)/2);
    314       return mid;
    315     } else if (a_digit == b_digit) {
    316       mid.push_back(a_digit);
    317 
    318       // Both strings are equal so far.  Will appending the suffix at this point
    319       // give us the comparison we're looking for?
    320       if (before.substr(i+1) < suffix && suffix < after.substr(i+1)) {
    321         return mid;
    322       }
    323     } else {
    324       DCHECK_EQ(b_digit - a_digit, 1);  // Implied by above if branches.
    325 
    326       // The two options are off by one digit.  The choice of whether to round
    327       // up or down here will have consequences on what we do with the remaining
    328       // digits.  Exploring both options is an optimization and is not required
    329       // for the correctness of this algorithm.
    330 
    331       // Option A: Round down the current digit.  This makes our |mid| <
    332       // |after|, no matter what we append afterwards.  We then focus on
    333       // appending digits until |mid| > |before|.
    334       std::string mid_a = mid;
    335       mid_a.push_back(a_digit);
    336       mid_a.append(FindGreaterWithSuffix(before.substr(i+1), suffix));
    337 
    338       // Option B: Round up the current digit.  This makes our |mid| > |before|,
    339       // no matter what we append afterwards.  We then focus on appending digits
    340       // until |mid| < |after|.  Note that this option may not be viable if the
    341       // current digit is the last one in |after|, so we skip the option in that
    342       // case.
    343       if (after.length() > i+1) {
    344         std::string mid_b = mid;
    345         mid_b.push_back(b_digit);
    346         mid_b.append(FindSmallerWithSuffix(after.substr(i+1), suffix));
    347 
    348         // Does this give us a shorter position value?  If so, use it.
    349         if (mid_b.length() < mid_a.length()) {
    350           return mid_b;
    351         }
    352       }
    353       return mid_a;
    354     }
    355   }
    356 
    357   // If we haven't found a midpoint yet, the following must be true:
    358   DCHECK_EQ(before.substr(0, i), after.substr(0, i));
    359   DCHECK_EQ(before, mid);
    360   DCHECK_LT(before.length(), after.length());
    361 
    362   // We know that we'll need to append at least one more byte to |mid| in the
    363   // process of making it < |after|.  Appending any digit, regardless of the
    364   // value, will make |before| < |mid|.  Therefore, the following will get us a
    365   // valid position.
    366 
    367   mid.append(FindSmallerWithSuffix(after.substr(i), suffix));
    368   return mid;
    369 }
    370 
    371 UniquePosition::UniquePosition(const std::string& internal_rep)
    372     : compressed_(internal_rep),
    373       is_valid_(IsValidBytes(Uncompress(internal_rep))) {
    374 }
    375 
    376 UniquePosition::UniquePosition(
    377     const std::string& uncompressed,
    378     const std::string& suffix)
    379   : compressed_(Compress(uncompressed)),
    380     is_valid_(IsValidBytes(uncompressed)) {
    381   DCHECK(uncompressed.rfind(suffix) + kSuffixLength == uncompressed.length());
    382   DCHECK(IsValidSuffix(suffix));
    383   DCHECK(IsValid());
    384 }
    385 
    386 // On custom compression:
    387 //
    388 // Let C(x) be the compression function and U(x) be the uncompression function.
    389 //
    390 // This compression scheme has a few special properties.  For one, it is
    391 // order-preserving.  For any two valid position strings x and y:
    392 //   x < y <=> C(x) < C(y)
    393 // This allows us keep the position strings compressed as we sort them.
    394 //
    395 // The compressed format and the decode algorithm:
    396 //
    397 // The compressed string is a series of blocks, almost all of which are 8 bytes
    398 // in length.  The only exception is the last block in the compressed string,
    399 // which may be a remainder block, which has length no greater than 7.  The
    400 // full-length blocks are either repeated character blocks or plain data blocks.
    401 // All blocks are entirely self-contained.  Their decoded values are independent
    402 // from that of their neighbours.
    403 //
    404 // A repeated character block is encoded into eight bytes and represents between
    405 // 4 and 2^31 repeated instances of a given character in the unencoded stream.
    406 // The encoding consists of a single character repeated four times, followed by
    407 // an encoded count.  The encoded count is stored as a big-endian 32 bit
    408 // integer.  There are 2^31 possible count values, and two encodings for each.
    409 // The high encoding is 'enc = kuint32max - count'; the low encoding is 'enc =
    410 // count'.  At compression time, the algorithm will choose between the two
    411 // encodings based on which of the two will maintain the appropriate sort
    412 // ordering (by a process which will be described below).  The decompression
    413 // algorithm need not concern itself with which encoding was used; it needs only
    414 // to decode it.  The decoded value of this block is "count" instances of the
    415 // character that was repeated four times in the first half of this block.
    416 //
    417 // A plain data block is encoded into eight bytes and represents exactly eight
    418 // bytes of data in the unencoded stream.  The plain data block must not begin
    419 // with the same character repeated four times.  It is allowed to contain such a
    420 // four-character sequence, just not at the start of the block.  The decoded
    421 // value of a plain data block is identical to its encoded value.
    422 //
    423 // A remainder block has length of at most seven.  It is a shorter version of
    424 // the plain data block.  It occurs only at the end of the encoded stream and
    425 // represents exactly as many bytes of unencoded data as its own length.  Like a
    426 // plain data block, the remainder block never begins with the same character
    427 // repeated four times.  The decoded value of this block is identical to its
    428 // encoded value.
    429 //
    430 // The encode algorithm:
    431 //
    432 // From the above description, it can be seen that there may be more than one
    433 // way to encode a given input string.  The encoder must be careful to choose
    434 // the encoding that guarantees sort ordering.
    435 //
    436 // The rules for the encoder are as follows:
    437 // 1. Iterate through the input string and produce output blocks one at a time.
    438 // 2. Where possible (ie. where the next four bytes of input consist of the
    439 //    same character repeated four times), produce a repeated data block of
    440 //    maximum possible length.
    441 // 3. If there is at least 8 bytes of data remaining and it is not possible
    442 //    to produce a repeated character block, produce a plain data block.
    443 // 4. If there are less than 8 bytes of data remaining and it is not possible
    444 //    to produce a repeated character block, produce a remainder block.
    445 // 5. When producing a repeated character block, the count encoding must be
    446 //    chosen in such a way that the sort ordering is maintained.  The choice is
    447 //    best illustrated by way of example:
    448 //
    449 //      When comparing two strings, the first of which begins with of 8
    450 //      instances of the letter 'B' and the second with 10 instances of the
    451 //      letter 'B', which of the two should compare lower?  The result depends
    452 //      on the 9th character of the first string, since it will be compared
    453 //      against the 9th 'B' in the second string.  If that character is an 'A',
    454 //      then the first string will compare lower.  If it is a 'C', then the
    455 //      first string will compare higher.
    456 //
    457 //    The key insight is that the comparison value of a repeated character block
    458 //    depends on the value of the character that follows it.  If the character
    459 //    follows the repeated character has a value greater than the repeated
    460 //    character itself, then a shorter run length should translate to a higher
    461 //    comparison value.  Therefore, we encode its count using the low encoding.
    462 //    Similarly, if the following character is lower, we use the high encoding.
    463 
    464 namespace {
    465 
    466 // Appends an encoded run length to |output_str|.
    467 static void WriteEncodedRunLength(uint32 length,
    468                                   bool high_encoding,
    469                                   std::string* output_str) {
    470   CHECK_GE(length, 4U);
    471   CHECK_LT(length, 0x80000000);
    472 
    473   // Step 1: Invert the count, if necessary, to account for the following digit.
    474   uint32 encoded_length;
    475   if (high_encoding) {
    476     encoded_length = 0xffffffff - length;
    477   } else {
    478     encoded_length = length;
    479   }
    480 
    481   // Step 2: Write it as big-endian so it compares correctly with memcmp(3).
    482   output_str->append(1, 0xff & (encoded_length >> 24U));
    483   output_str->append(1, 0xff & (encoded_length >> 16U));
    484   output_str->append(1, 0xff & (encoded_length >> 8U));
    485   output_str->append(1, 0xff & (encoded_length >> 0U));
    486 }
    487 
    488 // Reads an encoded run length for |str| at position |i|.
    489 static uint32 ReadEncodedRunLength(const std::string& str, size_t i) {
    490   DCHECK_LE(i + 4, str.length());
    491 
    492   // Step 1: Extract the big-endian count.
    493   uint32 encoded_length =
    494       ((uint8)(str[i+3]) << 0)  |
    495       ((uint8)(str[i+2]) << 8)  |
    496       ((uint8)(str[i+1]) << 16) |
    497       ((uint8)(str[i+0]) << 24);
    498 
    499   // Step 2: If this was an inverted count, un-invert it.
    500   uint32 length;
    501   if (encoded_length & 0x80000000) {
    502     length = 0xffffffff - encoded_length;
    503   } else {
    504     length = encoded_length;
    505   }
    506 
    507   return length;
    508 }
    509 
    510 // A series of four identical chars at the beginning of a block indicates
    511 // the beginning of a repeated character block.
    512 static bool IsRepeatedCharPrefix(const std::string& chars, size_t start_index) {
    513   return chars[start_index] == chars[start_index+1]
    514       && chars[start_index] == chars[start_index+2]
    515       && chars[start_index] == chars[start_index+3];
    516 }
    517 
    518 }  // namespace
    519 
    520 // static
    521 // Wraps the CompressImpl function with a bunch of DCHECKs.
    522 std::string UniquePosition::Compress(const std::string& str) {
    523   DCHECK(IsValidBytes(str));
    524   std::string compressed = CompressImpl(str);
    525   DCHECK(IsValidCompressed(compressed));
    526   DCHECK_EQ(str, Uncompress(compressed));
    527   return compressed;
    528 }
    529 
    530 // static
    531 // Performs the order preserving run length compression of a given input string.
    532 std::string UniquePosition::CompressImpl(const std::string& str) {
    533   std::string output;
    534 
    535   // The compressed length will usually be at least as long as the suffix (28),
    536   // since the suffix bytes are mostly random.  Most are a few bytes longer; a
    537   // small few are tens of bytes longer.  Some early tests indicated that
    538   // roughly 99% had length 40 or smaller.  We guess that pre-sizing for 48 is a
    539   // good trade-off, but that has not been confirmed through profiling.
    540   output.reserve(48);
    541 
    542   // Each loop iteration will consume 8, or N bytes, where N >= 4 and is the
    543   // length of a string of identical digits starting at i.
    544   for (size_t i = 0; i < str.length(); ) {
    545     if (i + 4 <= str.length() && IsRepeatedCharPrefix(str, i)) {
    546       // Four identical bytes in a row at this position means that we must start
    547       // a repeated character block.  Begin by outputting those four bytes.
    548       output.append(str, i, 4);
    549 
    550       // Determine the size of the run.
    551       const char rep_digit = str[i];
    552       const size_t runs_until = str.find_first_not_of(rep_digit, i+4);
    553 
    554       // Handle the 'runs until end' special case specially.
    555       size_t run_length;
    556       bool encode_high;  // True if the next byte is greater than |rep_digit|.
    557       if (runs_until == std::string::npos) {
    558         run_length = str.length() - i;
    559         encode_high = false;
    560       } else {
    561         run_length = runs_until - i;
    562         encode_high = static_cast<uint8>(str[runs_until]) >
    563             static_cast<uint8>(rep_digit);
    564       }
    565       DCHECK_LT(run_length, static_cast<size_t>(kint32max))
    566           << "This implementation can't encode run-lengths greater than 2^31.";
    567 
    568       WriteEncodedRunLength(run_length, encode_high, &output);
    569       i += run_length;  // Jump forward by the size of the run length.
    570     } else {
    571       // Output up to eight bytes without any encoding.
    572       const size_t len = std::min(static_cast<size_t>(8), str.length() - i);
    573       output.append(str, i, len);
    574       i += len;  // Jump forward by the amount of input consumed (usually 8).
    575     }
    576   }
    577 
    578   return output;
    579 }
    580 
    581 // static
    582 // Uncompresses strings that were compresed with UniquePosition::Compress.
    583 std::string UniquePosition::Uncompress(const std::string& str) {
    584   std::string output;
    585   size_t i = 0;
    586   // Iterate through the compressed string one block at a time.
    587   for (i = 0; i + 8 <= str.length(); i += 8) {
    588     if (IsRepeatedCharPrefix(str, i)) {
    589       // Found a repeated character block.  Expand it.
    590       const char rep_digit = str[i];
    591       uint32 length = ReadEncodedRunLength(str, i+4);
    592       output.append(length, rep_digit);
    593     } else {
    594       // Found a regular block.  Copy it.
    595       output.append(str, i, 8);
    596     }
    597   }
    598   // Copy the remaining bytes that were too small to form a block.
    599   output.append(str, i, std::string::npos);
    600   return output;
    601 }
    602 
    603 bool UniquePosition::IsValidCompressed(const std::string& str) {
    604   for (size_t i = 0; i + 8 <= str.length(); i += 8) {
    605     if (IsRepeatedCharPrefix(str, i)) {
    606       uint32 count = ReadEncodedRunLength(str, i+4);
    607       if (count < 4) {
    608         // A repeated character block should at least represent the four
    609         // characters that started it.
    610         return false;
    611       }
    612       if (str[i] == str[i+4]) {
    613         // Does the next digit after a count match the repeated character?  Then
    614         // this is not the highest possible count.
    615         return false;
    616       }
    617     }
    618   }
    619   // We don't bother looking for the existence or checking the validity of
    620   // any partial blocks.  There's no way they could be invalid anyway.
    621   return true;
    622 }
    623 
    624 }  // namespace syncer
    625