1 /* 2 * Copyright 2004 The WebRTC Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_BASE_STRINGENCODE_H_ 12 #define WEBRTC_BASE_STRINGENCODE_H_ 13 14 #include <string> 15 #include <sstream> 16 #include <vector> 17 18 #include "webrtc/base/checks.h" 19 20 namespace rtc { 21 22 ////////////////////////////////////////////////////////////////////// 23 // String Encoding Utilities 24 ////////////////////////////////////////////////////////////////////// 25 26 // Convert an unsigned value to it's utf8 representation. Returns the length 27 // of the encoded string, or 0 if the encoding is longer than buflen - 1. 28 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value); 29 // Decode the utf8 encoded value pointed to by source. Returns the number of 30 // bytes used by the encoding, or 0 if the encoding is invalid. 31 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value); 32 33 // Escaping prefixes illegal characters with the escape character. Compact, but 34 // illegal characters still appear in the string. 35 size_t escape(char * buffer, size_t buflen, 36 const char * source, size_t srclen, 37 const char * illegal, char escape); 38 // Note: in-place unescaping (buffer == source) is allowed. 39 size_t unescape(char * buffer, size_t buflen, 40 const char * source, size_t srclen, 41 char escape); 42 43 // Encoding replaces illegal characters with the escape character and 2 hex 44 // chars, so it's a little less compact than escape, but completely removes 45 // illegal characters. note that hex digits should not be used as illegal 46 // characters. 47 size_t encode(char * buffer, size_t buflen, 48 const char * source, size_t srclen, 49 const char * illegal, char escape); 50 // Note: in-place decoding (buffer == source) is allowed. 51 size_t decode(char * buffer, size_t buflen, 52 const char * source, size_t srclen, 53 char escape); 54 55 // Returns a list of characters that may be unsafe for use in the name of a 56 // file, suitable for passing to the 'illegal' member of escape or encode. 57 const char* unsafe_filename_characters(); 58 59 // url_encode is an encode operation with a predefined set of illegal characters 60 // and escape character (for use in URLs, obviously). 61 size_t url_encode(char * buffer, size_t buflen, 62 const char * source, size_t srclen); 63 // Note: in-place decoding (buffer == source) is allowed. 64 size_t url_decode(char * buffer, size_t buflen, 65 const char * source, size_t srclen); 66 67 // html_encode prevents data embedded in html from containing markup. 68 size_t html_encode(char * buffer, size_t buflen, 69 const char * source, size_t srclen); 70 // Note: in-place decoding (buffer == source) is allowed. 71 size_t html_decode(char * buffer, size_t buflen, 72 const char * source, size_t srclen); 73 74 // xml_encode makes data suitable for inside xml attributes and values. 75 size_t xml_encode(char * buffer, size_t buflen, 76 const char * source, size_t srclen); 77 // Note: in-place decoding (buffer == source) is allowed. 78 size_t xml_decode(char * buffer, size_t buflen, 79 const char * source, size_t srclen); 80 81 // Convert an unsigned value from 0 to 15 to the hex character equivalent... 82 char hex_encode(unsigned char val); 83 // ...and vice-versa. 84 bool hex_decode(char ch, unsigned char* val); 85 86 // hex_encode shows the hex representation of binary data in ascii. 87 size_t hex_encode(char* buffer, size_t buflen, 88 const char* source, size_t srclen); 89 90 // hex_encode, but separate each byte representation with a delimiter. 91 // |delimiter| == 0 means no delimiter 92 // If the buffer is too short, we return 0 93 size_t hex_encode_with_delimiter(char* buffer, size_t buflen, 94 const char* source, size_t srclen, 95 char delimiter); 96 97 // Helper functions for hex_encode. 98 std::string hex_encode(const char* source, size_t srclen); 99 std::string hex_encode_with_delimiter(const char* source, size_t srclen, 100 char delimiter); 101 102 // hex_decode converts ascii hex to binary. 103 size_t hex_decode(char* buffer, size_t buflen, 104 const char* source, size_t srclen); 105 106 // hex_decode, assuming that there is a delimiter between every byte 107 // pair. 108 // |delimiter| == 0 means no delimiter 109 // If the buffer is too short or the data is invalid, we return 0. 110 size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 111 const char* source, size_t srclen, 112 char delimiter); 113 114 // Helper functions for hex_decode. 115 size_t hex_decode(char* buffer, size_t buflen, const std::string& source); 116 size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 117 const std::string& source, char delimiter); 118 119 // Apply any suitable string transform (including the ones above) to an STL 120 // string. Stack-allocated temporary space is used for the transformation, 121 // so value and source may refer to the same string. 122 typedef size_t (*Transform)(char * buffer, size_t buflen, 123 const char * source, size_t srclen); 124 size_t transform(std::string& value, size_t maxlen, const std::string& source, 125 Transform t); 126 127 // Return the result of applying transform t to source. 128 std::string s_transform(const std::string& source, Transform t); 129 130 // Convenience wrappers. 131 inline std::string s_url_encode(const std::string& source) { 132 return s_transform(source, url_encode); 133 } 134 inline std::string s_url_decode(const std::string& source) { 135 return s_transform(source, url_decode); 136 } 137 138 // Splits the source string into multiple fields separated by delimiter, 139 // with duplicates of delimiter creating empty fields. 140 size_t split(const std::string& source, char delimiter, 141 std::vector<std::string>* fields); 142 143 // Splits the source string into multiple fields separated by delimiter, 144 // with duplicates of delimiter ignored. Trailing delimiter ignored. 145 size_t tokenize(const std::string& source, char delimiter, 146 std::vector<std::string>* fields); 147 148 // Tokenize and append the tokens to fields. Return the new size of fields. 149 size_t tokenize_append(const std::string& source, char delimiter, 150 std::vector<std::string>* fields); 151 152 // Splits the source string into multiple fields separated by delimiter, with 153 // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in 154 // between the start_mark and the end_mark is treated as a single field. Return 155 // the size of fields. For example, if source is "filename 156 // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and 157 // the start_mark and end_mark are '"', this method returns two fields: 158 // "filename" and "/Library/Application Support/media content.txt". 159 size_t tokenize(const std::string& source, char delimiter, char start_mark, 160 char end_mark, std::vector<std::string>* fields); 161 162 // Safe sprintf to std::string 163 //void sprintf(std::string& value, size_t maxlen, const char * format, ...) 164 // PRINTF_FORMAT(3); 165 166 // Convert arbitrary values to/from a string. 167 168 template <class T> 169 static bool ToString(const T &t, std::string* s) { 170 DCHECK(s); 171 std::ostringstream oss; 172 oss << std::boolalpha << t; 173 *s = oss.str(); 174 return !oss.fail(); 175 } 176 177 template <class T> 178 static bool FromString(const std::string& s, T* t) { 179 DCHECK(t); 180 std::istringstream iss(s); 181 iss >> std::boolalpha >> *t; 182 return !iss.fail(); 183 } 184 185 // Inline versions of the string conversion routines. 186 187 template<typename T> 188 static inline std::string ToString(const T& val) { 189 std::string str; ToString(val, &str); return str; 190 } 191 192 template<typename T> 193 static inline T FromString(const std::string& str) { 194 T val; FromString(str, &val); return val; 195 } 196 197 template<typename T> 198 static inline T FromString(const T& defaultValue, const std::string& str) { 199 T val(defaultValue); FromString(str, &val); return val; 200 } 201 202 // simple function to strip out characters which shouldn't be 203 // used in filenames 204 char make_char_safe_for_filename(char c); 205 206 ////////////////////////////////////////////////////////////////////// 207 208 } // namespace rtc 209 210 #endif // WEBRTC_BASE_STRINGENCODE_H__ 211