1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // NOTE: based loosely on mozilla's nsDataChannel.cpp 6 7 #include <algorithm> 8 9 #include "net/base/data_url.h" 10 11 #include "base/base64.h" 12 #include "base/string_util.h" 13 #include "googleurl/src/gurl.h" 14 #include "net/base/escape.h" 15 16 namespace net { 17 18 // static 19 bool DataURL::Parse(const GURL& url, std::string* mime_type, 20 std::string* charset, std::string* data) { 21 std::string::const_iterator begin = url.spec().begin(); 22 std::string::const_iterator end = url.spec().end(); 23 24 std::string::const_iterator after_colon = std::find(begin, end, ':'); 25 if (after_colon == end) 26 return false; 27 ++after_colon; 28 29 // first, find the start of the data 30 std::string::const_iterator comma = std::find(after_colon, end, ','); 31 if (comma == end) 32 return false; 33 34 const char kBase64Tag[] = ";base64"; 35 std::string::const_iterator it = 36 std::search(after_colon, comma, kBase64Tag, 37 kBase64Tag + sizeof(kBase64Tag)-1); 38 39 bool base64_encoded = (it != comma); 40 41 if (comma != after_colon) { 42 // everything else is content type 43 std::string::const_iterator semi_colon = std::find(after_colon, comma, ';'); 44 if (semi_colon != after_colon) { 45 mime_type->assign(after_colon, semi_colon); 46 StringToLowerASCII(mime_type); 47 } 48 if (semi_colon != comma) { 49 const char kCharsetTag[] = "charset="; 50 it = std::search(semi_colon + 1, comma, kCharsetTag, 51 kCharsetTag + sizeof(kCharsetTag)-1); 52 if (it != comma) 53 charset->assign(it + sizeof(kCharsetTag)-1, comma); 54 } 55 } 56 57 // fallback to defaults if nothing specified in the URL: 58 if (mime_type->empty()) 59 mime_type->assign("text/plain"); 60 if (charset->empty()) 61 charset->assign("US-ASCII"); 62 63 // The caller may not be interested in receiving the data. 64 if (!data) 65 return true; 66 67 // Preserve spaces if dealing with text or xml input, same as mozilla: 68 // https://bugzilla.mozilla.org/show_bug.cgi?id=138052 69 // but strip them otherwise: 70 // https://bugzilla.mozilla.org/show_bug.cgi?id=37200 71 // (Spaces in a data URL should be escaped, which is handled below, so any 72 // spaces now are wrong. People expect to be able to enter them in the URL 73 // bar for text, and it can't hurt, so we allow it.) 74 std::string temp_data = std::string(comma + 1, end); 75 76 // For base64, we may have url-escaped whitespace which is not part 77 // of the data, and should be stripped. Otherwise, the escaped whitespace 78 // could be part of the payload, so don't strip it. 79 if (base64_encoded) { 80 temp_data = UnescapeURLComponent(temp_data, 81 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS | 82 UnescapeRule::CONTROL_CHARS); 83 } 84 85 // Strip whitespace. 86 if (base64_encoded || !(mime_type->compare(0, 5, "text/") == 0 || 87 mime_type->find("xml") != std::string::npos)) { 88 temp_data.erase(std::remove_if(temp_data.begin(), temp_data.end(), 89 IsAsciiWhitespace<wchar_t>), 90 temp_data.end()); 91 } 92 93 if (!base64_encoded) { 94 temp_data = UnescapeURLComponent(temp_data, 95 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS | 96 UnescapeRule::CONTROL_CHARS); 97 } 98 99 if (base64_encoded) 100 return base::Base64Decode(temp_data, data); 101 102 temp_data.swap(*data); 103 return true; 104 } 105 106 } // namespace net 107