Home | History | Annotate | Download | only in strings
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // This module provides routines for encoding a sequence of typed
     17 // entities into a string.  The resulting strings can be
     18 // lexicographically compared to yield the same comparison value that
     19 // would have been generated if the encoded items had been compared
     20 // one by one according to their type.
     21 //
     22 // More precisely, suppose:
     23 //  1. string A is generated by encoding the sequence of items [A_1..A_n]
     24 //  2. string B is generated by encoding the sequence of items [B_1..B_n]
     25 //  3. The types match; i.e., for all i: A_i was encoded using
     26 //     the same routine as B_i
     27 // Then:
     28 //    Comparing A vs. B lexicographically is the same as comparing
     29 //    the vectors [A_1..A_n] and [B_1..B_n] lexicographically.
     30 //
     31 // Furthermore, if n < m, the encoding of [A_1..A_n] is a strict prefix of
     32 // [A_1..A_m] (unless m = n+1 and A_m is the empty string encoded with
     33 // WriteTrailingString, in which case the encodings are equal).
     34 //
     35 // This module is often useful when generating multi-part sstable
     36 // keys that have to be ordered in a particular fashion.
     37 
     38 #ifndef TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
     39 #define TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
     40 
     41 #include <string>
     42 #include "tensorflow/core/lib/core/stringpiece.h"
     43 #include "tensorflow/core/platform/macros.h"
     44 #include "tensorflow/core/platform/types.h"
     45 
     46 namespace tensorflow {
     47 
     48 namespace strings {
     49 
     50 class OrderedCode {
     51  public:
     52   // -------------------------------------------------------------------
     53   // Encoding routines: each one of the following routines append
     54   // one item to "*dest" in an encoding where larger values are
     55   // ordered lexicographically after smaller values.
     56   static void WriteString(string* dest, StringPiece str);
     57   static void WriteNumIncreasing(string* dest, uint64 num);
     58   static void WriteSignedNumIncreasing(string* dest, int64 num);
     59 
     60   // -------------------------------------------------------------------
     61   // Decoding routines: these extract an item earlier encoded using
     62   // the corresponding WriteXXX() routines above.  The item is read
     63   // from "*src"; "*src" is modified to point past the decoded item;
     64   // and if "result" is non-NULL, "*result" is modified to contain the
     65   // result.  In case of string result, the decoded string is appended to
     66   // "*result".  Returns true if the next item was read successfully, false
     67   // otherwise.
     68   static bool ReadString(StringPiece* src, string* result);
     69   static bool ReadNumIncreasing(StringPiece* src, uint64* result);
     70   static bool ReadSignedNumIncreasing(StringPiece* src, int64* result);
     71 
     72   // Helper for testing: corrupt "*str" by changing the kth item separator
     73   // in the string.
     74   static void TEST_Corrupt(string* str, int k);
     75 
     76   // Helper for testing.
     77   // SkipToNextSpecialByte is an internal routine defined in the .cc file
     78   // with the following semantics. Return a pointer to the first byte
     79   // in the range "[start..limit)" whose value is 0 or 255.  If no such
     80   // byte exists in the range, returns "limit".
     81   static const char* TEST_SkipToNextSpecialByte(const char* start,
     82                                                 const char* limit);
     83 
     84  private:
     85   // This has only static methods, so disallow construction entirely
     86   OrderedCode();
     87   TF_DISALLOW_COPY_AND_ASSIGN(OrderedCode);
     88 };
     89 
     90 }  // namespace strings
     91 }  // namespace tensorflow
     92 
     93 #endif  // TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
     94