1 /* 2 * Copyright 2004 The WebRTC Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_BASE_STRINGENCODE_H_ 12 #define WEBRTC_BASE_STRINGENCODE_H_ 13 14 #include <sstream> 15 #include <string> 16 #include <vector> 17 18 #include "webrtc/base/checks.h" 19 20 namespace rtc { 21 22 ////////////////////////////////////////////////////////////////////// 23 // String Encoding Utilities 24 ////////////////////////////////////////////////////////////////////// 25 26 // Convert an unsigned value to it's utf8 representation. Returns the length 27 // of the encoded string, or 0 if the encoding is longer than buflen - 1. 28 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value); 29 // Decode the utf8 encoded value pointed to by source. Returns the number of 30 // bytes used by the encoding, or 0 if the encoding is invalid. 31 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value); 32 33 // Escaping prefixes illegal characters with the escape character. Compact, but 34 // illegal characters still appear in the string. 35 size_t escape(char * buffer, size_t buflen, 36 const char * source, size_t srclen, 37 const char * illegal, char escape); 38 // Note: in-place unescaping (buffer == source) is allowed. 39 size_t unescape(char * buffer, size_t buflen, 40 const char * source, size_t srclen, 41 char escape); 42 43 // Encoding replaces illegal characters with the escape character and 2 hex 44 // chars, so it's a little less compact than escape, but completely removes 45 // illegal characters. note that hex digits should not be used as illegal 46 // characters. 47 size_t encode(char * buffer, size_t buflen, 48 const char * source, size_t srclen, 49 const char * illegal, char escape); 50 // Note: in-place decoding (buffer == source) is allowed. 51 size_t decode(char * buffer, size_t buflen, 52 const char * source, size_t srclen, 53 char escape); 54 55 // Returns a list of characters that may be unsafe for use in the name of a 56 // file, suitable for passing to the 'illegal' member of escape or encode. 57 const char* unsafe_filename_characters(); 58 59 // url_encode is an encode operation with a predefined set of illegal characters 60 // and escape character (for use in URLs, obviously). 61 size_t url_encode(char * buffer, size_t buflen, 62 const char * source, size_t srclen); 63 // Note: in-place decoding (buffer == source) is allowed. 64 size_t url_decode(char * buffer, size_t buflen, 65 const char * source, size_t srclen); 66 67 // html_encode prevents data embedded in html from containing markup. 68 size_t html_encode(char * buffer, size_t buflen, 69 const char * source, size_t srclen); 70 // Note: in-place decoding (buffer == source) is allowed. 71 size_t html_decode(char * buffer, size_t buflen, 72 const char * source, size_t srclen); 73 74 // xml_encode makes data suitable for inside xml attributes and values. 75 size_t xml_encode(char * buffer, size_t buflen, 76 const char * source, size_t srclen); 77 // Note: in-place decoding (buffer == source) is allowed. 78 size_t xml_decode(char * buffer, size_t buflen, 79 const char * source, size_t srclen); 80 81 // Convert an unsigned value from 0 to 15 to the hex character equivalent... 82 char hex_encode(unsigned char val); 83 // ...and vice-versa. 84 bool hex_decode(char ch, unsigned char* val); 85 86 // hex_encode shows the hex representation of binary data in ascii. 87 size_t hex_encode(char* buffer, size_t buflen, 88 const char* source, size_t srclen); 89 90 // hex_encode, but separate each byte representation with a delimiter. 91 // |delimiter| == 0 means no delimiter 92 // If the buffer is too short, we return 0 93 size_t hex_encode_with_delimiter(char* buffer, size_t buflen, 94 const char* source, size_t srclen, 95 char delimiter); 96 97 // Helper functions for hex_encode. 98 std::string hex_encode(const std::string& str); 99 std::string hex_encode(const char* source, size_t srclen); 100 std::string hex_encode_with_delimiter(const char* source, size_t srclen, 101 char delimiter); 102 103 // hex_decode converts ascii hex to binary. 104 size_t hex_decode(char* buffer, size_t buflen, 105 const char* source, size_t srclen); 106 107 // hex_decode, assuming that there is a delimiter between every byte 108 // pair. 109 // |delimiter| == 0 means no delimiter 110 // If the buffer is too short or the data is invalid, we return 0. 111 size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 112 const char* source, size_t srclen, 113 char delimiter); 114 115 // Helper functions for hex_decode. 116 size_t hex_decode(char* buffer, size_t buflen, const std::string& source); 117 size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 118 const std::string& source, char delimiter); 119 120 // Apply any suitable string transform (including the ones above) to an STL 121 // string. Stack-allocated temporary space is used for the transformation, 122 // so value and source may refer to the same string. 123 typedef size_t (*Transform)(char * buffer, size_t buflen, 124 const char * source, size_t srclen); 125 size_t transform(std::string& value, size_t maxlen, const std::string& source, 126 Transform t); 127 128 // Return the result of applying transform t to source. 129 std::string s_transform(const std::string& source, Transform t); 130 131 // Convenience wrappers. 132 inline std::string s_url_encode(const std::string& source) { 133 return s_transform(source, url_encode); 134 } 135 inline std::string s_url_decode(const std::string& source) { 136 return s_transform(source, url_decode); 137 } 138 139 // Splits the source string into multiple fields separated by delimiter, 140 // with duplicates of delimiter creating empty fields. 141 size_t split(const std::string& source, char delimiter, 142 std::vector<std::string>* fields); 143 144 // Splits the source string into multiple fields separated by delimiter, 145 // with duplicates of delimiter ignored. Trailing delimiter ignored. 146 size_t tokenize(const std::string& source, char delimiter, 147 std::vector<std::string>* fields); 148 149 // Tokenize, including the empty tokens. 150 size_t tokenize_with_empty_tokens(const std::string& source, 151 char delimiter, 152 std::vector<std::string>* fields); 153 154 // Tokenize and append the tokens to fields. Return the new size of fields. 155 size_t tokenize_append(const std::string& source, char delimiter, 156 std::vector<std::string>* fields); 157 158 // Splits the source string into multiple fields separated by delimiter, with 159 // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in 160 // between the start_mark and the end_mark is treated as a single field. Return 161 // the size of fields. For example, if source is "filename 162 // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and 163 // the start_mark and end_mark are '"', this method returns two fields: 164 // "filename" and "/Library/Application Support/media content.txt". 165 size_t tokenize(const std::string& source, char delimiter, char start_mark, 166 char end_mark, std::vector<std::string>* fields); 167 168 // Extract the first token from source as separated by delimiter, with 169 // duplicates of delimiter ignored. Return false if the delimiter could not be 170 // found, otherwise return true. 171 bool tokenize_first(const std::string& source, 172 const char delimiter, 173 std::string* token, 174 std::string* rest); 175 176 // Safe sprintf to std::string 177 //void sprintf(std::string& value, size_t maxlen, const char * format, ...) 178 // PRINTF_FORMAT(3); 179 180 // Convert arbitrary values to/from a string. 181 182 template <class T> 183 static bool ToString(const T &t, std::string* s) { 184 RTC_DCHECK(s); 185 std::ostringstream oss; 186 oss << std::boolalpha << t; 187 *s = oss.str(); 188 return !oss.fail(); 189 } 190 191 template <class T> 192 static bool FromString(const std::string& s, T* t) { 193 RTC_DCHECK(t); 194 std::istringstream iss(s); 195 iss >> std::boolalpha >> *t; 196 return !iss.fail(); 197 } 198 199 // Inline versions of the string conversion routines. 200 201 template<typename T> 202 static inline std::string ToString(const T& val) { 203 std::string str; ToString(val, &str); return str; 204 } 205 206 template<typename T> 207 static inline T FromString(const std::string& str) { 208 T val; FromString(str, &val); return val; 209 } 210 211 template<typename T> 212 static inline T FromString(const T& defaultValue, const std::string& str) { 213 T val(defaultValue); FromString(str, &val); return val; 214 } 215 216 // simple function to strip out characters which shouldn't be 217 // used in filenames 218 char make_char_safe_for_filename(char c); 219 220 ////////////////////////////////////////////////////////////////////// 221 222 } // namespace rtc 223 224 #endif // WEBRTC_BASE_STRINGENCODE_H__ 225