Home | History | Annotate | Download | only in base
      1 /*
      2  *  Copyright 2004 The WebRTC Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_BASE_STRINGENCODE_H_
     12 #define WEBRTC_BASE_STRINGENCODE_H_
     13 
     14 #include <string>
     15 #include <sstream>
     16 #include <vector>
     17 
     18 #include "webrtc/base/checks.h"
     19 
     20 namespace rtc {
     21 
     22 //////////////////////////////////////////////////////////////////////
     23 // String Encoding Utilities
     24 //////////////////////////////////////////////////////////////////////
     25 
     26 // Convert an unsigned value to it's utf8 representation.  Returns the length
     27 // of the encoded string, or 0 if the encoding is longer than buflen - 1.
     28 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
     29 // Decode the utf8 encoded value pointed to by source.  Returns the number of
     30 // bytes used by the encoding, or 0 if the encoding is invalid.
     31 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
     32 
     33 // Escaping prefixes illegal characters with the escape character.  Compact, but
     34 // illegal characters still appear in the string.
     35 size_t escape(char * buffer, size_t buflen,
     36               const char * source, size_t srclen,
     37               const char * illegal, char escape);
     38 // Note: in-place unescaping (buffer == source) is allowed.
     39 size_t unescape(char * buffer, size_t buflen,
     40                 const char * source, size_t srclen,
     41                 char escape);
     42 
     43 // Encoding replaces illegal characters with the escape character and 2 hex
     44 // chars, so it's a little less compact than escape, but completely removes
     45 // illegal characters.  note that hex digits should not be used as illegal
     46 // characters.
     47 size_t encode(char * buffer, size_t buflen,
     48               const char * source, size_t srclen,
     49               const char * illegal, char escape);
     50 // Note: in-place decoding (buffer == source) is allowed.
     51 size_t decode(char * buffer, size_t buflen,
     52               const char * source, size_t srclen,
     53               char escape);
     54 
     55 // Returns a list of characters that may be unsafe for use in the name of a
     56 // file, suitable for passing to the 'illegal' member of escape or encode.
     57 const char* unsafe_filename_characters();
     58 
     59 // url_encode is an encode operation with a predefined set of illegal characters
     60 // and escape character (for use in URLs, obviously).
     61 size_t url_encode(char * buffer, size_t buflen,
     62                   const char * source, size_t srclen);
     63 // Note: in-place decoding (buffer == source) is allowed.
     64 size_t url_decode(char * buffer, size_t buflen,
     65                   const char * source, size_t srclen);
     66 
     67 // html_encode prevents data embedded in html from containing markup.
     68 size_t html_encode(char * buffer, size_t buflen,
     69                    const char * source, size_t srclen);
     70 // Note: in-place decoding (buffer == source) is allowed.
     71 size_t html_decode(char * buffer, size_t buflen,
     72                    const char * source, size_t srclen);
     73 
     74 // xml_encode makes data suitable for inside xml attributes and values.
     75 size_t xml_encode(char * buffer, size_t buflen,
     76                   const char * source, size_t srclen);
     77 // Note: in-place decoding (buffer == source) is allowed.
     78 size_t xml_decode(char * buffer, size_t buflen,
     79                   const char * source, size_t srclen);
     80 
     81 // Convert an unsigned value from 0 to 15 to the hex character equivalent...
     82 char hex_encode(unsigned char val);
     83 // ...and vice-versa.
     84 bool hex_decode(char ch, unsigned char* val);
     85 
     86 // hex_encode shows the hex representation of binary data in ascii.
     87 size_t hex_encode(char* buffer, size_t buflen,
     88                   const char* source, size_t srclen);
     89 
     90 // hex_encode, but separate each byte representation with a delimiter.
     91 // |delimiter| == 0 means no delimiter
     92 // If the buffer is too short, we return 0
     93 size_t hex_encode_with_delimiter(char* buffer, size_t buflen,
     94                                  const char* source, size_t srclen,
     95                                  char delimiter);
     96 
     97 // Helper functions for hex_encode.
     98 std::string hex_encode(const char* source, size_t srclen);
     99 std::string hex_encode_with_delimiter(const char* source, size_t srclen,
    100                                       char delimiter);
    101 
    102 // hex_decode converts ascii hex to binary.
    103 size_t hex_decode(char* buffer, size_t buflen,
    104                   const char* source, size_t srclen);
    105 
    106 // hex_decode, assuming that there is a delimiter between every byte
    107 // pair.
    108 // |delimiter| == 0 means no delimiter
    109 // If the buffer is too short or the data is invalid, we return 0.
    110 size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
    111                                  const char* source, size_t srclen,
    112                                  char delimiter);
    113 
    114 // Helper functions for hex_decode.
    115 size_t hex_decode(char* buffer, size_t buflen, const std::string& source);
    116 size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
    117                                  const std::string& source, char delimiter);
    118 
    119 // Apply any suitable string transform (including the ones above) to an STL
    120 // string.  Stack-allocated temporary space is used for the transformation,
    121 // so value and source may refer to the same string.
    122 typedef size_t (*Transform)(char * buffer, size_t buflen,
    123                             const char * source, size_t srclen);
    124 size_t transform(std::string& value, size_t maxlen, const std::string& source,
    125                  Transform t);
    126 
    127 // Return the result of applying transform t to source.
    128 std::string s_transform(const std::string& source, Transform t);
    129 
    130 // Convenience wrappers.
    131 inline std::string s_url_encode(const std::string& source) {
    132   return s_transform(source, url_encode);
    133 }
    134 inline std::string s_url_decode(const std::string& source) {
    135   return s_transform(source, url_decode);
    136 }
    137 
    138 // Splits the source string into multiple fields separated by delimiter,
    139 // with duplicates of delimiter creating empty fields.
    140 size_t split(const std::string& source, char delimiter,
    141              std::vector<std::string>* fields);
    142 
    143 // Splits the source string into multiple fields separated by delimiter,
    144 // with duplicates of delimiter ignored.  Trailing delimiter ignored.
    145 size_t tokenize(const std::string& source, char delimiter,
    146                 std::vector<std::string>* fields);
    147 
    148 // Tokenize and append the tokens to fields. Return the new size of fields.
    149 size_t tokenize_append(const std::string& source, char delimiter,
    150                        std::vector<std::string>* fields);
    151 
    152 // Splits the source string into multiple fields separated by delimiter, with
    153 // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in
    154 // between the start_mark and the end_mark is treated as a single field. Return
    155 // the size of fields. For example, if source is "filename
    156 // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and
    157 // the start_mark and end_mark are '"', this method returns two fields:
    158 // "filename" and "/Library/Application Support/media content.txt".
    159 size_t tokenize(const std::string& source, char delimiter, char start_mark,
    160                 char end_mark, std::vector<std::string>* fields);
    161 
    162 // Safe sprintf to std::string
    163 //void sprintf(std::string& value, size_t maxlen, const char * format, ...)
    164 //     PRINTF_FORMAT(3);
    165 
    166 // Convert arbitrary values to/from a string.
    167 
    168 template <class T>
    169 static bool ToString(const T &t, std::string* s) {
    170   DCHECK(s);
    171   std::ostringstream oss;
    172   oss << std::boolalpha << t;
    173   *s = oss.str();
    174   return !oss.fail();
    175 }
    176 
    177 template <class T>
    178 static bool FromString(const std::string& s, T* t) {
    179   DCHECK(t);
    180   std::istringstream iss(s);
    181   iss >> std::boolalpha >> *t;
    182   return !iss.fail();
    183 }
    184 
    185 // Inline versions of the string conversion routines.
    186 
    187 template<typename T>
    188 static inline std::string ToString(const T& val) {
    189   std::string str; ToString(val, &str); return str;
    190 }
    191 
    192 template<typename T>
    193 static inline T FromString(const std::string& str) {
    194   T val; FromString(str, &val); return val;
    195 }
    196 
    197 template<typename T>
    198 static inline T FromString(const T& defaultValue, const std::string& str) {
    199   T val(defaultValue); FromString(str, &val); return val;
    200 }
    201 
    202 // simple function to strip out characters which shouldn't be
    203 // used in filenames
    204 char make_char_safe_for_filename(char c);
    205 
    206 //////////////////////////////////////////////////////////////////////
    207 
    208 }  // namespace rtc
    209 
    210 #endif  // WEBRTC_BASE_STRINGENCODE_H__
    211