1 /* 2 * libjingle 3 * Copyright 2004, Google Inc. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #ifndef TALK_BASE_STRINGENCODE_H_ 29 #define TALK_BASE_STRINGENCODE_H_ 30 31 #include <string> 32 #include <sstream> 33 #include <vector> 34 35 #include "talk/base/common.h" 36 37 namespace talk_base { 38 39 ////////////////////////////////////////////////////////////////////// 40 // String Encoding Utilities 41 ////////////////////////////////////////////////////////////////////// 42 43 // Convert an unsigned value to it's utf8 representation. Returns the length 44 // of the encoded string, or 0 if the encoding is longer than buflen - 1. 45 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value); 46 // Decode the utf8 encoded value pointed to by source. Returns the number of 47 // bytes used by the encoding, or 0 if the encoding is invalid. 48 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value); 49 50 // Escaping prefixes illegal characters with the escape character. Compact, but 51 // illegal characters still appear in the string. 52 size_t escape(char * buffer, size_t buflen, 53 const char * source, size_t srclen, 54 const char * illegal, char escape); 55 // Note: in-place unescaping (buffer == source) is allowed. 56 size_t unescape(char * buffer, size_t buflen, 57 const char * source, size_t srclen, 58 char escape); 59 60 // Encoding replaces illegal characters with the escape character and 2 hex 61 // chars, so it's a little less compact than escape, but completely removes 62 // illegal characters. note that hex digits should not be used as illegal 63 // characters. 64 size_t encode(char * buffer, size_t buflen, 65 const char * source, size_t srclen, 66 const char * illegal, char escape); 67 // Note: in-place decoding (buffer == source) is allowed. 68 size_t decode(char * buffer, size_t buflen, 69 const char * source, size_t srclen, 70 char escape); 71 72 // Returns a list of characters that may be unsafe for use in the name of a 73 // file, suitable for passing to the 'illegal' member of escape or encode. 74 const char* unsafe_filename_characters(); 75 76 // url_encode is an encode operation with a predefined set of illegal characters 77 // and escape character (for use in URLs, obviously). 78 size_t url_encode(char * buffer, size_t buflen, 79 const char * source, size_t srclen); 80 // Note: in-place decoding (buffer == source) is allowed. 81 size_t url_decode(char * buffer, size_t buflen, 82 const char * source, size_t srclen); 83 84 // html_encode prevents data embedded in html from containing markup. 85 size_t html_encode(char * buffer, size_t buflen, 86 const char * source, size_t srclen); 87 // Note: in-place decoding (buffer == source) is allowed. 88 size_t html_decode(char * buffer, size_t buflen, 89 const char * source, size_t srclen); 90 91 // xml_encode makes data suitable for inside xml attributes and values. 92 size_t xml_encode(char * buffer, size_t buflen, 93 const char * source, size_t srclen); 94 // Note: in-place decoding (buffer == source) is allowed. 95 size_t xml_decode(char * buffer, size_t buflen, 96 const char * source, size_t srclen); 97 98 // Convert an unsigned value from 0 to 15 to the hex character equivalent... 99 char hex_encode(unsigned char val); 100 // ...and vice-versa. 101 bool hex_decode(char ch, unsigned char* val); 102 103 // hex_encode shows the hex representation of binary data in ascii. 104 size_t hex_encode(char* buffer, size_t buflen, 105 const char* source, size_t srclen); 106 107 // hex_encode, but separate each byte representation with a delimiter. 108 // |delimiter| == 0 means no delimiter 109 // If the buffer is too short, we return 0 110 size_t hex_encode_with_delimiter(char* buffer, size_t buflen, 111 const char* source, size_t srclen, 112 char delimiter); 113 114 // Helper functions for hex_encode. 115 std::string hex_encode(const char* source, size_t srclen); 116 std::string hex_encode_with_delimiter(const char* source, size_t srclen, 117 char delimiter); 118 119 // hex_decode converts ascii hex to binary. 120 size_t hex_decode(char* buffer, size_t buflen, 121 const char* source, size_t srclen); 122 123 // hex_decode, assuming that there is a delimiter between every byte 124 // pair. 125 // |delimiter| == 0 means no delimiter 126 // If the buffer is too short or the data is invalid, we return 0. 127 size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 128 const char* source, size_t srclen, 129 char delimiter); 130 131 // Helper functions for hex_decode. 132 size_t hex_decode(char* buffer, size_t buflen, const std::string& source); 133 size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 134 const std::string& source, char delimiter); 135 136 // Apply any suitable string transform (including the ones above) to an STL 137 // string. Stack-allocated temporary space is used for the transformation, 138 // so value and source may refer to the same string. 139 typedef size_t (*Transform)(char * buffer, size_t buflen, 140 const char * source, size_t srclen); 141 size_t transform(std::string& value, size_t maxlen, const std::string& source, 142 Transform t); 143 144 // Return the result of applying transform t to source. 145 std::string s_transform(const std::string& source, Transform t); 146 147 // Convenience wrappers. 148 inline std::string s_url_encode(const std::string& source) { 149 return s_transform(source, url_encode); 150 } 151 inline std::string s_url_decode(const std::string& source) { 152 return s_transform(source, url_decode); 153 } 154 155 // Splits the source string into multiple fields separated by delimiter, 156 // with duplicates of delimiter creating empty fields. 157 size_t split(const std::string& source, char delimiter, 158 std::vector<std::string>* fields); 159 160 // Splits the source string into multiple fields separated by delimiter, 161 // with duplicates of delimiter ignored. Trailing delimiter ignored. 162 size_t tokenize(const std::string& source, char delimiter, 163 std::vector<std::string>* fields); 164 165 // Tokenize and append the tokens to fields. Return the new size of fields. 166 size_t tokenize_append(const std::string& source, char delimiter, 167 std::vector<std::string>* fields); 168 169 // Splits the source string into multiple fields separated by delimiter, with 170 // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in 171 // between the start_mark and the end_mark is treated as a single field. Return 172 // the size of fields. For example, if source is "filename 173 // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and 174 // the start_mark and end_mark are '"', this method returns two fields: 175 // "filename" and "/Library/Application Support/media content.txt". 176 size_t tokenize(const std::string& source, char delimiter, char start_mark, 177 char end_mark, std::vector<std::string>* fields); 178 179 // Safe sprintf to std::string 180 //void sprintf(std::string& value, size_t maxlen, const char * format, ...) 181 // PRINTF_FORMAT(3); 182 183 // Convert arbitrary values to/from a string. 184 185 template <class T> 186 static bool ToString(const T &t, std::string* s) { 187 ASSERT(NULL != s); 188 std::ostringstream oss; 189 oss << std::boolalpha << t; 190 *s = oss.str(); 191 return !oss.fail(); 192 } 193 194 template <class T> 195 static bool FromString(const std::string& s, T* t) { 196 ASSERT(NULL != t); 197 std::istringstream iss(s); 198 iss >> std::boolalpha >> *t; 199 return !iss.fail(); 200 } 201 202 // Inline versions of the string conversion routines. 203 204 template<typename T> 205 static inline std::string ToString(const T& val) { 206 std::string str; ToString(val, &str); return str; 207 } 208 209 template<typename T> 210 static inline T FromString(const std::string& str) { 211 T val; FromString(str, &val); return val; 212 } 213 214 template<typename T> 215 static inline T FromString(const T& defaultValue, const std::string& str) { 216 T val(defaultValue); FromString(str, &val); return val; 217 } 218 219 // simple function to strip out characters which shouldn't be 220 // used in filenames 221 char make_char_safe_for_filename(char c); 222 223 ////////////////////////////////////////////////////////////////////// 224 225 } // namespace talk_base 226 227 #endif // TALK_BASE_STRINGENCODE_H__ 228