1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/base/net_util.h" 6 7 #include "base/file_path.h" 8 #include "base/string_piece.h" 9 #include "base/string_util.h" 10 #include "base/sys_string_conversions.h" 11 #include "googleurl/src/gurl.h" 12 #include "net/base/escape.h" 13 14 namespace net { 15 16 bool FileURLToFilePath(const GURL& url, FilePath* file_path) { 17 *file_path = FilePath(); 18 std::wstring& file_path_str = const_cast<std::wstring&>(file_path->value()); 19 file_path_str.clear(); 20 21 if (!url.is_valid()) 22 return false; 23 24 std::string path; 25 std::string host = url.host(); 26 if (host.empty()) { 27 // URL contains no host, the path is the filename. In this case, the path 28 // will probably be preceeded with a slash, as in "/C:/foo.txt", so we 29 // trim out that here. 30 path = url.path(); 31 size_t first_non_slash = path.find_first_not_of("/\\"); 32 if (first_non_slash != std::string::npos && first_non_slash > 0) 33 path.erase(0, first_non_slash); 34 } else { 35 // URL contains a host: this means it's UNC. We keep the preceeding slash 36 // on the path. 37 path = "\\\\"; 38 path.append(host); 39 path.append(url.path()); 40 } 41 42 if (path.empty()) 43 return false; 44 std::replace(path.begin(), path.end(), '/', '\\'); 45 46 // GURL stores strings as percent-encoded UTF-8, this will undo if possible. 47 path = UnescapeURLComponent(path, 48 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); 49 50 if (!IsStringUTF8(path)) { 51 // Not UTF-8, assume encoding is native codepage and we're done. We know we 52 // are giving the conversion function a nonempty string, and it may fail if 53 // the given string is not in the current encoding and give us an empty 54 // string back. We detect this and report failure. 55 file_path_str = base::SysNativeMBToWide(path); 56 return !file_path_str.empty(); 57 } 58 file_path_str.assign(UTF8ToWide(path)); 59 60 // We used to try too hard and see if |path| made up entirely of 61 // the 1st 256 characters in the Unicode was a zero-extended UTF-16. 62 // If so, we converted it to 'Latin-1' and checked if the result was UTF-8. 63 // If the check passed, we converted the result to UTF-8. 64 // Otherwise, we treated the result as the native OS encoding. 65 // However, that led to http://crbug.com/4619 and http://crbug.com/14153 66 return true; 67 } 68 69 } // namespace net 70