Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This file defines utility functions for working with strings.
      6 
      7 #ifndef BASE_STRINGS_STRING_UTIL_H_
      8 #define BASE_STRINGS_STRING_UTIL_H_
      9 
     10 #include <ctype.h>
     11 #include <stdarg.h>   // va_list
     12 #include <stddef.h>
     13 #include <stdint.h>
     14 
     15 #include <string>
     16 #include <vector>
     17 
     18 #include "base/compiler_specific.h"
     19 #include "base/strings/string_piece.h"  // For implicit conversions.
     20 #include "build/build_config.h"
     21 
     22 // On Android, bionic's stdio.h defines an snprintf macro when being built with
     23 // clang. Undefine it here so it won't collide with base::snprintf().
     24 #undef snprintf
     25 
     26 namespace base {
     27 
     28 // C standard-library functions that aren't cross-platform are provided as
     29 // "base::...", and their prototypes are listed below. These functions are
     30 // then implemented as inline calls to the platform-specific equivalents in the
     31 // platform-specific headers.
     32 
     33 // Wrapper for vsnprintf that always null-terminates and always returns the
     34 // number of characters that would be in an untruncated formatted
     35 // string, even when truncation occurs.
     36 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
     37     PRINTF_FORMAT(3, 0);
     38 
     39 // Some of these implementations need to be inlined.
     40 
     41 // We separate the declaration from the implementation of this inline
     42 // function just so the PRINTF_FORMAT works.
     43 inline int snprintf(char* buffer,
     44                     size_t size,
     45                     _Printf_format_string_ const char* format,
     46                     ...) PRINTF_FORMAT(3, 4);
     47 inline int snprintf(char* buffer,
     48                     size_t size,
     49                     _Printf_format_string_ const char* format,
     50                     ...) {
     51   va_list arguments;
     52   va_start(arguments, format);
     53   int result = vsnprintf(buffer, size, format, arguments);
     54   va_end(arguments);
     55   return result;
     56 }
     57 
     58 // BSD-style safe and consistent string copy functions.
     59 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
     60 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
     61 // long as |dst_size| is not 0.  Returns the length of |src| in characters.
     62 // If the return value is >= dst_size, then the output was truncated.
     63 // NOTE: All sizes are in number of characters, NOT in bytes.
     64 size_t strlcpy(char* dst, const char* src, size_t dst_size);
     65 
     66 // ASCII-specific tolower.  The standard library's tolower is locale sensitive,
     67 // so we don't want to use it here.
     68 inline char ToLowerASCII(char c) {
     69   return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
     70 }
     71 
     72 // ASCII-specific toupper.  The standard library's toupper is locale sensitive,
     73 // so we don't want to use it here.
     74 inline char ToUpperASCII(char c) {
     75   return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
     76 }
     77 // Converts the given string to it's ASCII-lowercase equivalent.
     78 std::string ToLowerASCII(StringPiece str);
     79 // Converts the given string to it's ASCII-uppercase equivalent.
     80 std::string ToUpperASCII(StringPiece str);
     81 
     82 // Functor for case-insensitive ASCII comparisons for STL algorithms like
     83 // std::search.
     84 //
     85 // Note that a full Unicode version of this functor is not possible to write
     86 // because case mappings might change the number of characters, depend on
     87 // context (combining accents), and require handling UTF-16. If you need
     88 // proper Unicode support, use base::i18n::ToLower/FoldCase and then just
     89 // use a normal operator== on the result.
     90 template<typename Char> struct CaseInsensitiveCompareASCII {
     91  public:
     92   bool operator()(Char x, Char y) const {
     93     return ToLowerASCII(x) == ToLowerASCII(y);
     94   }
     95 };
     96 
     97 // Like strcasecmp for case-insensitive ASCII characters only. Returns:
     98 //   -1  (a < b)
     99 //    0  (a == b)
    100 //    1  (a > b)
    101 // (unlike strcasecmp which can return values greater or less than 1/-1). For
    102 // full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
    103 // and then just call the normal string operators on the result.
    104 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);
    105 
    106 // Equality for ASCII case-insensitive comparisons. For full Unicode support,
    107 // use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
    108 // == or !=.
    109 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);
    110 
    111 // Contains the set of characters representing whitespace in the corresponding
    112 // encoding. Null-terminated. The ASCII versions are the whitespaces as defined
    113 // by HTML5, and don't include control characters.
    114 extern const char kWhitespaceASCII[];
    115 
    116 // Replaces characters in |replace_chars| from anywhere in |input| with
    117 // |replace_with|.  Each character in |replace_chars| will be replaced with
    118 // the |replace_with| string.  Returns true if any characters were replaced.
    119 // |replace_chars| must be null-terminated.
    120 // NOTE: Safe to use the same variable for both |input| and |output|.
    121 bool ReplaceChars(const std::string& input,
    122                   const StringPiece& replace_chars,
    123                   const std::string& replace_with,
    124                   std::string* output);
    125 
    126 enum TrimPositions {
    127   TRIM_NONE     = 0,
    128   TRIM_LEADING  = 1 << 0,
    129   TRIM_TRAILING = 1 << 1,
    130   TRIM_ALL      = TRIM_LEADING | TRIM_TRAILING,
    131 };
    132 
    133 // Removes characters in |trim_chars| from the beginning and end of |input|.
    134 // The 8-bit version only works on 8-bit characters, not UTF-8.
    135 //
    136 // It is safe to use the same variable for both |input| and |output| (this is
    137 // the normal usage to trim in-place).
    138 bool TrimString(const std::string& input,
    139                 StringPiece trim_chars,
    140                 std::string* output);
    141 
    142 // StringPiece versions of the above. The returned pieces refer to the original
    143 // buffer.
    144 StringPiece TrimString(StringPiece input,
    145                        const StringPiece& trim_chars,
    146                        TrimPositions positions);
    147 
    148 // Trims any whitespace from either end of the input string.
    149 //
    150 // The StringPiece versions return a substring referencing the input buffer.
    151 // The ASCII versions look only for ASCII whitespace.
    152 //
    153 // The std::string versions return where whitespace was found.
    154 // NOTE: Safe to use the same variable for both input and output.
    155 TrimPositions TrimWhitespaceASCII(const std::string& input,
    156                                   TrimPositions positions,
    157                                   std::string* output);
    158 
    159 // Returns true if the specified string matches the criteria. How can a wide
    160 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the
    161 // first case) or characters that use only 8-bits and whose 8-bit
    162 // representation looks like a UTF-8 string (the second case).
    163 //
    164 // Note that IsStringUTF8 checks not only if the input is structurally
    165 // valid but also if it doesn't contain any non-character codepoint
    166 // (e.g. U+FFFE). It's done on purpose because all the existing callers want
    167 // to have the maximum 'discriminating' power from other encodings. If
    168 // there's a use case for just checking the structural validity, we have to
    169 // add a new function for that.
    170 //
    171 // IsStringASCII assumes the input is likely all ASCII, and does not leave early
    172 // if it is not the case.
    173 bool IsStringUTF8(const StringPiece& str);
    174 bool IsStringASCII(const StringPiece& str);
    175 
    176 }  // namespace base
    177 
    178 #if defined(OS_WIN)
    179 #include "base/strings/string_util_win.h"
    180 #elif defined(OS_POSIX)
    181 #include "base/strings/string_util_posix.h"
    182 #else
    183 #error Define string operations appropriately for your platform
    184 #endif
    185 
    186 #endif  // BASE_STRINGS_STRING_UTIL_H_
    187