Home | History | Annotate | Download | only in strings
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // #status: RECOMMENDED
     17 // #category: operations on strings
     18 // #summary: Merges strings or numbers with no delimiter.
     19 //
     20 #ifndef TENSORFLOW_LIB_STRINGS_STRCAT_H_
     21 #define TENSORFLOW_LIB_STRINGS_STRCAT_H_
     22 
     23 #include <string>
     24 
     25 #include "tensorflow/core/lib/core/stringpiece.h"
     26 #include "tensorflow/core/lib/strings/numbers.h"
     27 #include "tensorflow/core/platform/macros.h"
     28 #include "tensorflow/core/platform/types.h"
     29 
     30 namespace Eigen {
     31 struct half;
     32 }
     33 
     34 // The AlphaNum type was designed to be used as the parameter type for StrCat().
     35 // Any routine accepting either a string or a number may accept it.
     36 // The basic idea is that by accepting a "const AlphaNum &" as an argument
     37 // to your function, your callers will automatically convert bools, integers,
     38 // and floating point values to strings for you.
     39 //
     40 // NOTE: Use of AlphaNum outside of the //strings package is unsupported except
     41 // for the specific case of function parameters of type "AlphaNum" or "const
     42 // AlphaNum &". In particular, instantiating AlphaNum directly as a stack
     43 // variable is not supported.
     44 //
     45 // Conversion from 8-bit values is not accepted because if it were, then an
     46 // attempt to pass ':' instead of ":" might result in a 58 ending up in your
     47 // result.
     48 //
     49 // Bools convert to "0" or "1".
     50 //
     51 // Floating point values are converted to a string which, if passed to strtod(),
     52 // would produce the exact same original double (except in case of NaN; all NaNs
     53 // are considered the same value). We try to keep the string short but it's not
     54 // guaranteed to be as short as possible.
     55 //
     56 // You can convert to Hexadecimal output rather than Decimal output using Hex.
     57 // To do this, pass strings::Hex(my_int) as a parameter to StrCat. You may
     58 // specify a minimum field width using a separate parameter, so the equivalent
     59 // of Printf("%04x", my_int) is StrCat(Hex(my_int, strings::ZERO_PAD_4))
     60 //
     61 // This class has implicit constructors.
     62 namespace tensorflow {
     63 namespace strings {
     64 
     65 enum PadSpec {
     66   NO_PAD = 1,
     67   ZERO_PAD_2,
     68   ZERO_PAD_3,
     69   ZERO_PAD_4,
     70   ZERO_PAD_5,
     71   ZERO_PAD_6,
     72   ZERO_PAD_7,
     73   ZERO_PAD_8,
     74   ZERO_PAD_9,
     75   ZERO_PAD_10,
     76   ZERO_PAD_11,
     77   ZERO_PAD_12,
     78   ZERO_PAD_13,
     79   ZERO_PAD_14,
     80   ZERO_PAD_15,
     81   ZERO_PAD_16,
     82 };
     83 
     84 struct Hex {
     85   uint64 value;
     86   enum PadSpec spec;
     87   template <class Int>
     88   explicit Hex(Int v, PadSpec s = NO_PAD) : spec(s) {
     89     // Prevent sign-extension by casting integers to
     90     // their unsigned counterparts.
     91     static_assert(
     92         sizeof(v) == 1 || sizeof(v) == 2 || sizeof(v) == 4 || sizeof(v) == 8,
     93         "Unknown integer type");
     94     value = sizeof(v) == 1
     95                 ? static_cast<uint8>(v)
     96                 : sizeof(v) == 2 ? static_cast<uint16>(v)
     97                                  : sizeof(v) == 4 ? static_cast<uint32>(v)
     98                                                   : static_cast<uint64>(v);
     99   }
    100 };
    101 
    102 class AlphaNum {
    103  public:
    104   // No bool ctor -- bools convert to an integral type.
    105   // A bool ctor would also convert incoming pointers (bletch).
    106 
    107   AlphaNum(int i32)  // NOLINT(runtime/explicit)
    108       : piece_(digits_, FastInt32ToBufferLeft(i32, digits_) - &digits_[0]) {}
    109   AlphaNum(unsigned int u32)  // NOLINT(runtime/explicit)
    110       : piece_(digits_, FastUInt32ToBufferLeft(u32, digits_) - &digits_[0]) {}
    111   AlphaNum(long x)  // NOLINT(runtime/explicit)
    112       : piece_(digits_, FastInt64ToBufferLeft(x, digits_) - &digits_[0]) {}
    113   AlphaNum(unsigned long x)  // NOLINT(runtime/explicit)
    114       : piece_(digits_, FastUInt64ToBufferLeft(x, digits_) - &digits_[0]) {}
    115   AlphaNum(long long int i64)  // NOLINT(runtime/explicit)
    116       : piece_(digits_, FastInt64ToBufferLeft(i64, digits_) - &digits_[0]) {}
    117   AlphaNum(unsigned long long int u64)  // NOLINT(runtime/explicit)
    118       : piece_(digits_, FastUInt64ToBufferLeft(u64, digits_) - &digits_[0]) {}
    119 
    120   AlphaNum(float f)  // NOLINT(runtime/explicit)
    121       : piece_(digits_, strlen(FloatToBuffer(f, digits_))) {}
    122   AlphaNum(bfloat16 f)  // NOLINT(runtime/explicit)
    123       : piece_(digits_, strlen(FloatToBuffer(static_cast<float>(f), digits_))) {
    124   }
    125   AlphaNum(double f)  // NOLINT(runtime/explicit)
    126       : piece_(digits_, strlen(DoubleToBuffer(f, digits_))) {}
    127 
    128   AlphaNum(const Eigen::half &f);  // NOLINT(runtime/explicit)
    129   AlphaNum(Hex hex);               // NOLINT(runtime/explicit)
    130 
    131   AlphaNum(const char *c_str) : piece_(c_str) {}   // NOLINT(runtime/explicit)
    132   AlphaNum(const StringPiece &pc) : piece_(pc) {}  // NOLINT(runtime/explicit)
    133   AlphaNum(const tensorflow::string &str)          // NOLINT(runtime/explicit)
    134       : piece_(str) {}
    135 
    136   StringPiece::size_type size() const { return piece_.size(); }
    137   const char *data() const { return piece_.data(); }
    138   StringPiece Piece() const { return piece_; }
    139 
    140  private:
    141   StringPiece piece_;
    142   char digits_[kFastToBufferSize];
    143 
    144   // Use ":" not ':'
    145   AlphaNum(char c);  // NOLINT(runtime/explicit)
    146 
    147   TF_DISALLOW_COPY_AND_ASSIGN(AlphaNum);
    148 };
    149 
    150 // ----------------------------------------------------------------------
    151 // StrCat()
    152 //    This merges the given strings or numbers, with no delimiter.  This
    153 //    is designed to be the fastest possible way to construct a string out
    154 //    of a mix of raw C strings, StringPieces, strings, bool values,
    155 //    and numeric values.
    156 //
    157 //    Don't use this for user-visible strings.  The localization process
    158 //    works poorly on strings built up out of fragments.
    159 //
    160 //    For clarity and performance, don't use StrCat when appending to a
    161 //    string.  In particular, avoid using any of these (anti-)patterns:
    162 //      str.append(StrCat(...))
    163 //      str += StrCat(...)
    164 //      str = StrCat(str, ...)
    165 //    where the last is the worse, with the potential to change a loop
    166 //    from a linear time operation with O(1) dynamic allocations into a
    167 //    quadratic time operation with O(n) dynamic allocations.  StrAppend
    168 //    is a better choice than any of the above, subject to the restriction
    169 //    of StrAppend(&str, a, b, c, ...) that none of the a, b, c, ... may
    170 //    be a reference into str.
    171 // ----------------------------------------------------------------------
    172 
    173 // For performance reasons, we have specializations for <= 4 args.
    174 string StrCat(const AlphaNum &a) TF_MUST_USE_RESULT;
    175 string StrCat(const AlphaNum &a, const AlphaNum &b) TF_MUST_USE_RESULT;
    176 string StrCat(const AlphaNum &a, const AlphaNum &b,
    177               const AlphaNum &c) TF_MUST_USE_RESULT;
    178 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    179               const AlphaNum &d) TF_MUST_USE_RESULT;
    180 
    181 namespace internal {
    182 
    183 // Do not call directly - this is not part of the public API.
    184 string CatPieces(std::initializer_list<StringPiece> pieces);
    185 void AppendPieces(string *dest, std::initializer_list<StringPiece> pieces);
    186 
    187 }  // namespace internal
    188 
    189 // Support 5 or more arguments
    190 template <typename... AV>
    191 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    192               const AlphaNum &d, const AlphaNum &e,
    193               const AV &... args) TF_MUST_USE_RESULT;
    194 
    195 template <typename... AV>
    196 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    197               const AlphaNum &d, const AlphaNum &e, const AV &... args) {
    198   return internal::CatPieces({a.Piece(), b.Piece(), c.Piece(), d.Piece(),
    199                               e.Piece(),
    200                               static_cast<const AlphaNum &>(args).Piece()...});
    201 }
    202 
    203 // ----------------------------------------------------------------------
    204 // StrAppend()
    205 //    Same as above, but adds the output to the given string.
    206 //    WARNING: For speed, StrAppend does not try to check each of its input
    207 //    arguments to be sure that they are not a subset of the string being
    208 //    appended to.  That is, while this will work:
    209 //
    210 //    string s = "foo";
    211 //    s += s;
    212 //
    213 //    This will not (necessarily) work:
    214 //
    215 //    string s = "foo";
    216 //    StrAppend(&s, s);
    217 //
    218 //    Note: while StrCat supports appending up to 26 arguments, StrAppend
    219 //    is currently limited to 9.  That's rarely an issue except when
    220 //    automatically transforming StrCat to StrAppend, and can easily be
    221 //    worked around as consecutive calls to StrAppend are quite efficient.
    222 // ----------------------------------------------------------------------
    223 
    224 void StrAppend(string *dest, const AlphaNum &a);
    225 void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b);
    226 void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b,
    227                const AlphaNum &c);
    228 void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b,
    229                const AlphaNum &c, const AlphaNum &d);
    230 
    231 // Support 5 or more arguments
    232 template <typename... AV>
    233 inline void StrAppend(string *dest, const AlphaNum &a, const AlphaNum &b,
    234                       const AlphaNum &c, const AlphaNum &d, const AlphaNum &e,
    235                       const AV &... args) {
    236   internal::AppendPieces(dest,
    237                          {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
    238                           static_cast<const AlphaNum &>(args).Piece()...});
    239 }
    240 
    241 }  // namespace strings
    242 }  // namespace tensorflow
    243 
    244 #endif  // TENSORFLOW_LIB_STRINGS_STRCAT_H_
    245