1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file defines utility functions for working with strings. 6 7 #ifndef BASE_STRINGS_STRING_UTIL_H_ 8 #define BASE_STRINGS_STRING_UTIL_H_ 9 10 #include <ctype.h> 11 #include <stdarg.h> // va_list 12 #include <stddef.h> 13 #include <stdint.h> 14 15 #include <string> 16 #include <vector> 17 18 #include "base/compiler_specific.h" 19 #include "base/strings/string_piece.h" // For implicit conversions. 20 #include "build/build_config.h" 21 22 // On Android, bionic's stdio.h defines an snprintf macro when being built with 23 // clang. Undefine it here so it won't collide with base::snprintf(). 24 #undef snprintf 25 26 namespace base { 27 28 // C standard-library functions that aren't cross-platform are provided as 29 // "base::...", and their prototypes are listed below. These functions are 30 // then implemented as inline calls to the platform-specific equivalents in the 31 // platform-specific headers. 32 33 // Wrapper for vsnprintf that always null-terminates and always returns the 34 // number of characters that would be in an untruncated formatted 35 // string, even when truncation occurs. 36 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) 37 PRINTF_FORMAT(3, 0); 38 39 // Some of these implementations need to be inlined. 40 41 // We separate the declaration from the implementation of this inline 42 // function just so the PRINTF_FORMAT works. 43 inline int snprintf(char* buffer, 44 size_t size, 45 _Printf_format_string_ const char* format, 46 ...) PRINTF_FORMAT(3, 4); 47 inline int snprintf(char* buffer, 48 size_t size, 49 _Printf_format_string_ const char* format, 50 ...) { 51 va_list arguments; 52 va_start(arguments, format); 53 int result = vsnprintf(buffer, size, format, arguments); 54 va_end(arguments); 55 return result; 56 } 57 58 // BSD-style safe and consistent string copy functions. 59 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. 60 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as 61 // long as |dst_size| is not 0. Returns the length of |src| in characters. 62 // If the return value is >= dst_size, then the output was truncated. 63 // NOTE: All sizes are in number of characters, NOT in bytes. 64 size_t strlcpy(char* dst, const char* src, size_t dst_size); 65 66 // ASCII-specific tolower. The standard library's tolower is locale sensitive, 67 // so we don't want to use it here. 68 inline char ToLowerASCII(char c) { 69 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 70 } 71 72 // ASCII-specific toupper. The standard library's toupper is locale sensitive, 73 // so we don't want to use it here. 74 inline char ToUpperASCII(char c) { 75 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; 76 } 77 // Converts the given string to it's ASCII-lowercase equivalent. 78 std::string ToLowerASCII(StringPiece str); 79 // Converts the given string to it's ASCII-uppercase equivalent. 80 std::string ToUpperASCII(StringPiece str); 81 82 // Functor for case-insensitive ASCII comparisons for STL algorithms like 83 // std::search. 84 // 85 // Note that a full Unicode version of this functor is not possible to write 86 // because case mappings might change the number of characters, depend on 87 // context (combining accents), and require handling UTF-16. If you need 88 // proper Unicode support, use base::i18n::ToLower/FoldCase and then just 89 // use a normal operator== on the result. 90 template<typename Char> struct CaseInsensitiveCompareASCII { 91 public: 92 bool operator()(Char x, Char y) const { 93 return ToLowerASCII(x) == ToLowerASCII(y); 94 } 95 }; 96 97 // Like strcasecmp for case-insensitive ASCII characters only. Returns: 98 // -1 (a < b) 99 // 0 (a == b) 100 // 1 (a > b) 101 // (unlike strcasecmp which can return values greater or less than 1/-1). For 102 // full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase 103 // and then just call the normal string operators on the result. 104 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b); 105 106 // Equality for ASCII case-insensitive comparisons. For full Unicode support, 107 // use base::i18n::ToLower or base::i18h::FoldCase and then compare with either 108 // == or !=. 109 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b); 110 111 // Contains the set of characters representing whitespace in the corresponding 112 // encoding. Null-terminated. The ASCII versions are the whitespaces as defined 113 // by HTML5, and don't include control characters. 114 extern const char kWhitespaceASCII[]; 115 116 // Replaces characters in |replace_chars| from anywhere in |input| with 117 // |replace_with|. Each character in |replace_chars| will be replaced with 118 // the |replace_with| string. Returns true if any characters were replaced. 119 // |replace_chars| must be null-terminated. 120 // NOTE: Safe to use the same variable for both |input| and |output|. 121 bool ReplaceChars(const std::string& input, 122 const StringPiece& replace_chars, 123 const std::string& replace_with, 124 std::string* output); 125 126 enum TrimPositions { 127 TRIM_NONE = 0, 128 TRIM_LEADING = 1 << 0, 129 TRIM_TRAILING = 1 << 1, 130 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, 131 }; 132 133 // Removes characters in |trim_chars| from the beginning and end of |input|. 134 // The 8-bit version only works on 8-bit characters, not UTF-8. 135 // 136 // It is safe to use the same variable for both |input| and |output| (this is 137 // the normal usage to trim in-place). 138 bool TrimString(const std::string& input, 139 StringPiece trim_chars, 140 std::string* output); 141 142 // StringPiece versions of the above. The returned pieces refer to the original 143 // buffer. 144 StringPiece TrimString(StringPiece input, 145 const StringPiece& trim_chars, 146 TrimPositions positions); 147 148 // Trims any whitespace from either end of the input string. 149 // 150 // The StringPiece versions return a substring referencing the input buffer. 151 // The ASCII versions look only for ASCII whitespace. 152 // 153 // The std::string versions return where whitespace was found. 154 // NOTE: Safe to use the same variable for both input and output. 155 TrimPositions TrimWhitespaceASCII(const std::string& input, 156 TrimPositions positions, 157 std::string* output); 158 159 // Returns true if the specified string matches the criteria. How can a wide 160 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the 161 // first case) or characters that use only 8-bits and whose 8-bit 162 // representation looks like a UTF-8 string (the second case). 163 // 164 // Note that IsStringUTF8 checks not only if the input is structurally 165 // valid but also if it doesn't contain any non-character codepoint 166 // (e.g. U+FFFE). It's done on purpose because all the existing callers want 167 // to have the maximum 'discriminating' power from other encodings. If 168 // there's a use case for just checking the structural validity, we have to 169 // add a new function for that. 170 // 171 // IsStringASCII assumes the input is likely all ASCII, and does not leave early 172 // if it is not the case. 173 bool IsStringUTF8(const StringPiece& str); 174 bool IsStringASCII(const StringPiece& str); 175 176 } // namespace base 177 178 #if defined(OS_WIN) 179 #include "base/strings/string_util_win.h" 180 #elif defined(OS_POSIX) 181 #include "base/strings/string_util_posix.h" 182 #else 183 #error Define string operations appropriately for your platform 184 #endif 185 186 #endif // BASE_STRINGS_STRING_UTIL_H_ 187