Home | History | Annotate | Download | only in base
      1 /*
      2  * libjingle
      3  * Copyright 2004, Google Inc.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions are met:
      7  *
      8  *  1. Redistributions of source code must retain the above copyright notice,
      9  *     this list of conditions and the following disclaimer.
     10  *  2. Redistributions in binary form must reproduce the above copyright notice,
     11  *     this list of conditions and the following disclaimer in the documentation
     12  *     and/or other materials provided with the distribution.
     13  *  3. The name of the author may not be used to endorse or promote products
     14  *     derived from this software without specific prior written permission.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
     17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
     19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
     22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #ifndef TALK_BASE_STRINGENCODE_H_
     29 #define TALK_BASE_STRINGENCODE_H_
     30 
     31 #include <string>
     32 #include <sstream>
     33 #include <vector>
     34 
     35 #include "talk/base/common.h"
     36 
     37 namespace talk_base {
     38 
     39 //////////////////////////////////////////////////////////////////////
     40 // String Encoding Utilities
     41 //////////////////////////////////////////////////////////////////////
     42 
     43 // Convert an unsigned value to it's utf8 representation.  Returns the length
     44 // of the encoded string, or 0 if the encoding is longer than buflen - 1.
     45 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
     46 // Decode the utf8 encoded value pointed to by source.  Returns the number of
     47 // bytes used by the encoding, or 0 if the encoding is invalid.
     48 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
     49 
     50 // Escaping prefixes illegal characters with the escape character.  Compact, but
     51 // illegal characters still appear in the string.
     52 size_t escape(char * buffer, size_t buflen,
     53               const char * source, size_t srclen,
     54               const char * illegal, char escape);
     55 // Note: in-place unescaping (buffer == source) is allowed.
     56 size_t unescape(char * buffer, size_t buflen,
     57                 const char * source, size_t srclen,
     58                 char escape);
     59 
     60 // Encoding replaces illegal characters with the escape character and 2 hex
     61 // chars, so it's a little less compact than escape, but completely removes
     62 // illegal characters.  note that hex digits should not be used as illegal
     63 // characters.
     64 size_t encode(char * buffer, size_t buflen,
     65               const char * source, size_t srclen,
     66               const char * illegal, char escape);
     67 // Note: in-place decoding (buffer == source) is allowed.
     68 size_t decode(char * buffer, size_t buflen,
     69               const char * source, size_t srclen,
     70               char escape);
     71 
     72 // Returns a list of characters that may be unsafe for use in the name of a
     73 // file, suitable for passing to the 'illegal' member of escape or encode.
     74 const char* unsafe_filename_characters();
     75 
     76 // url_encode is an encode operation with a predefined set of illegal characters
     77 // and escape character (for use in URLs, obviously).
     78 size_t url_encode(char * buffer, size_t buflen,
     79                   const char * source, size_t srclen);
     80 // Note: in-place decoding (buffer == source) is allowed.
     81 size_t url_decode(char * buffer, size_t buflen,
     82                   const char * source, size_t srclen);
     83 
     84 // html_encode prevents data embedded in html from containing markup.
     85 size_t html_encode(char * buffer, size_t buflen,
     86                    const char * source, size_t srclen);
     87 // Note: in-place decoding (buffer == source) is allowed.
     88 size_t html_decode(char * buffer, size_t buflen,
     89                    const char * source, size_t srclen);
     90 
     91 // xml_encode makes data suitable for inside xml attributes and values.
     92 size_t xml_encode(char * buffer, size_t buflen,
     93                   const char * source, size_t srclen);
     94 // Note: in-place decoding (buffer == source) is allowed.
     95 size_t xml_decode(char * buffer, size_t buflen,
     96                   const char * source, size_t srclen);
     97 
     98 // Convert an unsigned value from 0 to 15 to the hex character equivalent...
     99 char hex_encode(unsigned char val);
    100 // ...and vice-versa.
    101 bool hex_decode(char ch, unsigned char* val);
    102 
    103 // hex_encode shows the hex representation of binary data in ascii.
    104 size_t hex_encode(char* buffer, size_t buflen,
    105                   const char* source, size_t srclen);
    106 
    107 // hex_encode, but separate each byte representation with a delimiter.
    108 // |delimiter| == 0 means no delimiter
    109 // If the buffer is too short, we return 0
    110 size_t hex_encode_with_delimiter(char* buffer, size_t buflen,
    111                                  const char* source, size_t srclen,
    112                                  char delimiter);
    113 
    114 // Helper functions for hex_encode.
    115 std::string hex_encode(const char* source, size_t srclen);
    116 std::string hex_encode_with_delimiter(const char* source, size_t srclen,
    117                                       char delimiter);
    118 
    119 // hex_decode converts ascii hex to binary.
    120 size_t hex_decode(char* buffer, size_t buflen,
    121                   const char* source, size_t srclen);
    122 
    123 // hex_decode, assuming that there is a delimiter between every byte
    124 // pair.
    125 // |delimiter| == 0 means no delimiter
    126 // If the buffer is too short or the data is invalid, we return 0.
    127 size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
    128                                  const char* source, size_t srclen,
    129                                  char delimiter);
    130 
    131 // Helper functions for hex_decode.
    132 size_t hex_decode(char* buffer, size_t buflen, const std::string& source);
    133 size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
    134                                  const std::string& source, char delimiter);
    135 
    136 // Apply any suitable string transform (including the ones above) to an STL
    137 // string.  Stack-allocated temporary space is used for the transformation,
    138 // so value and source may refer to the same string.
    139 typedef size_t (*Transform)(char * buffer, size_t buflen,
    140                             const char * source, size_t srclen);
    141 size_t transform(std::string& value, size_t maxlen, const std::string& source,
    142                  Transform t);
    143 
    144 // Return the result of applying transform t to source.
    145 std::string s_transform(const std::string& source, Transform t);
    146 
    147 // Convenience wrappers.
    148 inline std::string s_url_encode(const std::string& source) {
    149   return s_transform(source, url_encode);
    150 }
    151 inline std::string s_url_decode(const std::string& source) {
    152   return s_transform(source, url_decode);
    153 }
    154 
    155 // Splits the source string into multiple fields separated by delimiter,
    156 // with duplicates of delimiter creating empty fields.
    157 size_t split(const std::string& source, char delimiter,
    158              std::vector<std::string>* fields);
    159 
    160 // Splits the source string into multiple fields separated by delimiter,
    161 // with duplicates of delimiter ignored.  Trailing delimiter ignored.
    162 size_t tokenize(const std::string& source, char delimiter,
    163                 std::vector<std::string>* fields);
    164 
    165 // Tokenize and append the tokens to fields. Return the new size of fields.
    166 size_t tokenize_append(const std::string& source, char delimiter,
    167                        std::vector<std::string>* fields);
    168 
    169 // Splits the source string into multiple fields separated by delimiter, with
    170 // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in
    171 // between the start_mark and the end_mark is treated as a single field. Return
    172 // the size of fields. For example, if source is "filename
    173 // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and
    174 // the start_mark and end_mark are '"', this method returns two fields:
    175 // "filename" and "/Library/Application Support/media content.txt".
    176 size_t tokenize(const std::string& source, char delimiter, char start_mark,
    177                 char end_mark, std::vector<std::string>* fields);
    178 
    179 // Safe sprintf to std::string
    180 //void sprintf(std::string& value, size_t maxlen, const char * format, ...)
    181 //     PRINTF_FORMAT(3);
    182 
    183 // Convert arbitrary values to/from a string.
    184 
    185 template <class T>
    186 static bool ToString(const T &t, std::string* s) {
    187   ASSERT(NULL != s);
    188   std::ostringstream oss;
    189   oss << std::boolalpha << t;
    190   *s = oss.str();
    191   return !oss.fail();
    192 }
    193 
    194 template <class T>
    195 static bool FromString(const std::string& s, T* t) {
    196   ASSERT(NULL != t);
    197   std::istringstream iss(s);
    198   iss >> std::boolalpha >> *t;
    199   return !iss.fail();
    200 }
    201 
    202 // Inline versions of the string conversion routines.
    203 
    204 template<typename T>
    205 static inline std::string ToString(const T& val) {
    206   std::string str; ToString(val, &str); return str;
    207 }
    208 
    209 template<typename T>
    210 static inline T FromString(const std::string& str) {
    211   T val; FromString(str, &val); return val;
    212 }
    213 
    214 template<typename T>
    215 static inline T FromString(const T& defaultValue, const std::string& str) {
    216   T val(defaultValue); FromString(str, &val); return val;
    217 }
    218 
    219 // simple function to strip out characters which shouldn't be
    220 // used in filenames
    221 char make_char_safe_for_filename(char c);
    222 
    223 //////////////////////////////////////////////////////////////////////
    224 
    225 }  // namespace talk_base
    226 
    227 #endif  // TALK_BASE_STRINGENCODE_H__
    228