Home | History | Annotate | Download | only in strings
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CORE_LIB_STRINGS_STR_UTIL_H_
     17 #define TENSORFLOW_CORE_LIB_STRINGS_STR_UTIL_H_
     18 
     19 #include <functional>
     20 #include <string>
     21 #include <vector>
     22 #include "tensorflow/core/lib/core/stringpiece.h"
     23 #include "tensorflow/core/lib/strings/strcat.h"
     24 #include "tensorflow/core/platform/types.h"
     25 
     26 // Basic string utility routines
     27 namespace tensorflow {
     28 namespace str_util {
     29 
     30 // Returns a version of 'src' where unprintable characters have been
     31 // escaped using C-style escape sequences.
     32 string CEscape(StringPiece src);
     33 
     34 // Copies "source" to "dest", rewriting C-style escape sequences --
     35 // '\n', '\r', '\\', '\ooo', etc -- to their ASCII equivalents.
     36 //
     37 // Errors: Sets the description of the first encountered error in
     38 // 'error'. To disable error reporting, set 'error' to NULL.
     39 //
     40 // NOTE: Does not support \u or \U!
     41 bool CUnescape(StringPiece source, string* dest, string* error);
     42 
     43 // Removes any trailing whitespace from "*s".
     44 void StripTrailingWhitespace(string* s);
     45 
     46 // Removes leading ascii_isspace() characters.
     47 // Returns number of characters removed.
     48 size_t RemoveLeadingWhitespace(StringPiece* text);
     49 
     50 // Removes trailing ascii_isspace() characters.
     51 // Returns number of characters removed.
     52 size_t RemoveTrailingWhitespace(StringPiece* text);
     53 
     54 // Removes leading and trailing ascii_isspace() chars.
     55 // Returns number of chars removed.
     56 size_t RemoveWhitespaceContext(StringPiece* text);
     57 
     58 // Consume a leading positive integer value.  If any digits were
     59 // found, store the value of the leading unsigned number in "*val",
     60 // advance "*s" past the consumed number, and return true.  If
     61 // overflow occurred, returns false.  Otherwise, returns false.
     62 bool ConsumeLeadingDigits(StringPiece* s, uint64* val);
     63 
     64 // Consume a leading token composed of non-whitespace characters only.
     65 // If *s starts with a non-zero number of non-whitespace characters, store
     66 // them in *val, advance *s past them, and return true.  Else return false.
     67 bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val);
     68 
     69 // If "*s" starts with "expected", consume it and return true.
     70 // Otherwise, return false.
     71 bool ConsumePrefix(StringPiece* s, StringPiece expected);
     72 
     73 // If "*s" ends with "expected", remove it and return true.
     74 // Otherwise, return false.
     75 bool ConsumeSuffix(StringPiece* s, StringPiece expected);
     76 
     77 // Return lower-cased version of s.
     78 string Lowercase(StringPiece s);
     79 
     80 // Return upper-cased version of s.
     81 string Uppercase(StringPiece s);
     82 
     83 // Converts "^2ILoveYou!" to "i_love_you_". More specifically:
     84 // - converts all non-alphanumeric characters to underscores
     85 // - replaces each occurrence of a capital letter (except the very
     86 //   first character and if there is already an '_' before it) with '_'
     87 //   followed by this letter in lower case
     88 // - Skips leading non-alpha characters
     89 // This method is useful for producing strings matching "[a-z][a-z0-9_]*"
     90 // as required by OpDef.ArgDef.name. The resulting string is either empty or
     91 // matches this regex.
     92 string ArgDefCase(StringPiece s);
     93 
     94 // Capitalize first character of each word in "*s".  "delimiters" is a
     95 // set of characters that can be used as word boundaries.
     96 void TitlecaseString(string* s, StringPiece delimiters);
     97 
     98 // Replaces the first occurrence (if replace_all is false) or all occurrences
     99 // (if replace_all is true) of oldsub in s with newsub.
    100 string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
    101                      bool replace_all);
    102 
    103 // Join functionality
    104 template <typename T>
    105 string Join(const T& s, const char* sep);
    106 
    107 // A variant of Join where for each element of "s", f(&dest_string, elem)
    108 // is invoked (f is often constructed with a lambda of the form:
    109 //   [](string* result, ElemType elem)
    110 template <typename T, typename Formatter>
    111 string Join(const T& s, const char* sep, Formatter f);
    112 
    113 struct AllowEmpty {
    114   bool operator()(StringPiece sp) const { return true; }
    115 };
    116 struct SkipEmpty {
    117   bool operator()(StringPiece sp) const { return !sp.empty(); }
    118 };
    119 struct SkipWhitespace {
    120   bool operator()(StringPiece sp) const {
    121     RemoveTrailingWhitespace(&sp);
    122     return !sp.empty();
    123   }
    124 };
    125 
    126 // Split strings using any of the supplied delimiters. For example:
    127 // Split("a,b.c,d", ".,") would return {"a", "b", "c", "d"}.
    128 std::vector<string> Split(StringPiece text, StringPiece delims);
    129 
    130 template <typename Predicate>
    131 std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p);
    132 
    133 // Split "text" at "delim" characters, and parse each component as
    134 // an integer.  If successful, adds the individual numbers in order
    135 // to "*result" and returns true.  Otherwise returns false.
    136 bool SplitAndParseAsInts(StringPiece text, char delim,
    137                          std::vector<int32>* result);
    138 bool SplitAndParseAsInts(StringPiece text, char delim,
    139                          std::vector<int64>* result);
    140 bool SplitAndParseAsFloats(StringPiece text, char delim,
    141                            std::vector<float>* result);
    142 
    143 // StartsWith()
    144 //
    145 // Returns whether a given string `text` begins with `prefix`.
    146 bool StartsWith(StringPiece text, StringPiece prefix);
    147 
    148 // EndsWith()
    149 //
    150 // Returns whether a given string `text` ends with `suffix`.
    151 bool EndsWith(StringPiece text, StringPiece suffix);
    152 
    153 // StrContains()
    154 //
    155 // Returns whether a given string `haystack` contains the substring `needle`.
    156 bool StrContains(StringPiece haystack, StringPiece needle);
    157 
    158 // ------------------------------------------------------------------
    159 // Implementation details below
    160 template <typename T>
    161 string Join(const T& s, const char* sep) {
    162   string result;
    163   bool first = true;
    164   for (const auto& x : s) {
    165     tensorflow::strings::StrAppend(&result, (first ? "" : sep), x);
    166     first = false;
    167   }
    168   return result;
    169 }
    170 
    171 template <typename T>
    172 class Formatter {
    173  public:
    174   Formatter(std::function<void(string*, T)> f) : f_(f) {}
    175   void operator()(string* out, const T& t) { f_(out, t); }
    176 
    177  private:
    178   std::function<void(string*, T)> f_;
    179 };
    180 
    181 template <typename T, typename Formatter>
    182 string Join(const T& s, const char* sep, Formatter f) {
    183   string result;
    184   bool first = true;
    185   for (const auto& x : s) {
    186     if (!first) {
    187       result.append(sep);
    188     }
    189     f(&result, x);
    190     first = false;
    191   }
    192   return result;
    193 }
    194 
    195 inline std::vector<string> Split(StringPiece text, StringPiece delims) {
    196   return Split(text, delims, AllowEmpty());
    197 }
    198 
    199 template <typename Predicate>
    200 std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p) {
    201   std::vector<string> result;
    202   size_t token_start = 0;
    203   if (!text.empty()) {
    204     for (size_t i = 0; i < text.size() + 1; i++) {
    205       if ((i == text.size()) || (delims.find(text[i]) != StringPiece::npos)) {
    206         StringPiece token(text.data() + token_start, i - token_start);
    207         if (p(token)) {
    208           result.emplace_back(token);
    209         }
    210         token_start = i + 1;
    211       }
    212     }
    213   }
    214   return result;
    215 }
    216 
    217 inline std::vector<string> Split(StringPiece text, char delim) {
    218   return Split(text, StringPiece(&delim, 1));
    219 }
    220 
    221 template <typename Predicate>
    222 std::vector<string> Split(StringPiece text, char delims, Predicate p) {
    223   return Split(text, StringPiece(&delims, 1), p);
    224 }
    225 
    226 // Returns the length of the given null-terminated byte string 'str'.
    227 // Returns 'string_max_len' if the null character was not found in the first
    228 // 'string_max_len' bytes of 'str'.
    229 size_t Strnlen(const char* str, const size_t string_max_len);
    230 
    231 }  // namespace str_util
    232 }  // namespace tensorflow
    233 
    234 #endif  // TENSORFLOW_CORE_LIB_STRINGS_STR_UTIL_H_
    235