Home | History | Annotate | Download | only in src
      1 // Copyright 2015 The Weave Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "src/data_encoding.h"
      6 
      7 #include <memory>
      8 
      9 #include <base/logging.h>
     10 #include <base/strings/string_util.h>
     11 #include <base/strings/stringprintf.h>
     12 
     13 #include "src/string_utils.h"
     14 #include "third_party/modp_b64/modp_b64/modp_b64.h"
     15 
     16 namespace weave {
     17 
     18 namespace {
     19 
     20 inline int HexToDec(int hex) {
     21   int dec = -1;
     22   if (hex >= '0' && hex <= '9') {
     23     dec = hex - '0';
     24   } else if (hex >= 'A' && hex <= 'F') {
     25     dec = hex - 'A' + 10;
     26   } else if (hex >= 'a' && hex <= 'f') {
     27     dec = hex - 'a' + 10;
     28   }
     29   return dec;
     30 }
     31 
     32 // Helper for Base64Encode() and Base64EncodeWrapLines().
     33 std::string Base64EncodeHelper(const void* data, size_t size) {
     34   std::vector<char> buffer;
     35   buffer.resize(modp_b64_encode_len(size));
     36   size_t out_size =
     37       modp_b64_encode(buffer.data(), static_cast<const char*>(data), size);
     38   return std::string{buffer.begin(), buffer.begin() + out_size};
     39 }
     40 
     41 }  // namespace
     42 
     43 std::string UrlEncode(const char* data, bool encodeSpaceAsPlus) {
     44   std::string result;
     45 
     46   while (*data) {
     47     char c = *data++;
     48     // According to RFC3986 (http://www.faqs.org/rfcs/rfc3986.html),
     49     // section 2.3. - Unreserved Characters
     50     if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') ||
     51         (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' ||
     52         c == '~') {
     53       result += c;
     54     } else if (c == ' ' && encodeSpaceAsPlus) {
     55       // For historical reasons, some URLs have spaces encoded as '+',
     56       // this also applies to form data encoded as
     57       // 'application/x-www-form-urlencoded'
     58       result += '+';
     59     } else {
     60       base::StringAppendF(&result, "%%%02X",
     61                           static_cast<unsigned char>(c));  // Encode as %NN
     62     }
     63   }
     64   return result;
     65 }
     66 
     67 std::string UrlDecode(const char* data) {
     68   std::string result;
     69   while (*data) {
     70     char c = *data++;
     71     int part1 = 0, part2 = 0;
     72     // HexToDec would return -1 even for character 0 (end of string),
     73     // so it is safe to access data[0] and data[1] without overrunning the buf.
     74     if (c == '%' && (part1 = HexToDec(data[0])) >= 0 &&
     75         (part2 = HexToDec(data[1])) >= 0) {
     76       c = static_cast<char>((part1 << 4) | part2);
     77       data += 2;
     78     } else if (c == '+') {
     79       c = ' ';
     80     }
     81     result += c;
     82   }
     83   return result;
     84 }
     85 
     86 std::string WebParamsEncode(const WebParamList& params,
     87                             bool encodeSpaceAsPlus) {
     88   std::vector<std::string> pairs;
     89   pairs.reserve(params.size());
     90   for (const auto& p : params) {
     91     std::string key = UrlEncode(p.first.c_str(), encodeSpaceAsPlus);
     92     std::string value = UrlEncode(p.second.c_str(), encodeSpaceAsPlus);
     93     pairs.push_back(Join("=", key, value));
     94   }
     95 
     96   return Join("&", pairs);
     97 }
     98 
     99 WebParamList WebParamsDecode(const std::string& data) {
    100   WebParamList result;
    101   for (const auto& p : Split(data, "&", true, true)) {
    102     auto pair = SplitAtFirst(p, "=", true);
    103     result.emplace_back(UrlDecode(pair.first.c_str()),
    104                         UrlDecode(pair.second.c_str()));
    105   }
    106   return result;
    107 }
    108 
    109 std::string Base64Encode(const void* data, size_t size) {
    110   return Base64EncodeHelper(data, size);
    111 }
    112 
    113 std::string Base64EncodeWrapLines(const void* data, size_t size) {
    114   std::string unwrapped = Base64EncodeHelper(data, size);
    115   std::string wrapped;
    116 
    117   for (size_t i = 0; i < unwrapped.size(); i += 64) {
    118     wrapped.append(unwrapped, i, 64);
    119     wrapped.append("\n");
    120   }
    121   return wrapped;
    122 }
    123 
    124 bool Base64Decode(const std::string& input, std::vector<uint8_t>* output) {
    125   std::string temp_buffer;
    126   const std::string* data = &input;
    127   if (input.find_first_of("\r\n") != std::string::npos) {
    128     base::ReplaceChars(input, "\n", "", &temp_buffer);
    129     base::ReplaceChars(temp_buffer, "\r", "", &temp_buffer);
    130     data = &temp_buffer;
    131   }
    132   // base64 decoded data has 25% fewer bytes than the original (since every
    133   // 3 source octets are encoded as 4 characters in base64).
    134   // modp_b64_decode_len provides an upper estimate of the size of the output
    135   // data.
    136   output->resize(modp_b64_decode_len(data->size()));
    137 
    138   size_t size_read = modp_b64_decode(reinterpret_cast<char*>(output->data()),
    139                                      data->data(), data->size());
    140   if (size_read == MODP_B64_ERROR) {
    141     output->resize(0);
    142     return false;
    143   }
    144   output->resize(size_read);
    145 
    146   return true;
    147 }
    148 
    149 }  // namespace weave
    150