1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/ftp/ftp_util.h" 6 7 #include <map> 8 #include <vector> 9 10 #include "base/i18n/case_conversion.h" 11 #include "base/i18n/char_iterator.h" 12 #include "base/logging.h" 13 #include "base/memory/singleton.h" 14 #include "base/strings/string_number_conversions.h" 15 #include "base/strings/string_piece.h" 16 #include "base/strings/string_split.h" 17 #include "base/strings/string_tokenizer.h" 18 #include "base/strings/string_util.h" 19 #include "base/strings/utf_string_conversions.h" 20 #include "base/time/time.h" 21 #include "third_party/icu/source/common/unicode/uchar.h" 22 #include "third_party/icu/source/i18n/unicode/datefmt.h" 23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h" 24 25 using base::ASCIIToUTF16; 26 using base::StringPiece16; 27 28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS 29 // a path looks differently depending on whether it's a file or directory. 30 31 namespace net { 32 33 // static 34 std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) { 35 if (unix_path.empty()) 36 return std::string(); 37 38 base::StringTokenizer tokenizer(unix_path, "/"); 39 std::vector<std::string> tokens; 40 while (tokenizer.GetNext()) 41 tokens.push_back(tokenizer.token()); 42 43 if (unix_path[0] == '/') { 44 // It's an absolute path. 45 46 if (tokens.empty()) { 47 DCHECK_EQ(1U, unix_path.length()); 48 return "[]"; 49 } 50 51 if (tokens.size() == 1) 52 return unix_path.substr(1); // Drop the leading slash. 53 54 std::string result(tokens[0] + ":["); 55 if (tokens.size() == 2) { 56 // Don't ask why, it just works that way on VMS. 57 result.append("000000"); 58 } else { 59 result.append(tokens[1]); 60 for (size_t i = 2; i < tokens.size() - 1; i++) 61 result.append("." + tokens[i]); 62 } 63 result.append("]" + tokens[tokens.size() - 1]); 64 return result; 65 } 66 67 if (tokens.size() == 1) 68 return unix_path; 69 70 std::string result("["); 71 for (size_t i = 0; i < tokens.size() - 1; i++) 72 result.append("." + tokens[i]); 73 result.append("]" + tokens[tokens.size() - 1]); 74 return result; 75 } 76 77 // static 78 std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) { 79 if (unix_path.empty()) 80 return std::string(); 81 82 std::string path(unix_path); 83 84 if (path[path.length() - 1] != '/') 85 path.append("/"); 86 87 // Reuse logic from UnixFilePathToVMS by appending a fake file name to the 88 // real path and removing it after conversion. 89 path.append("x"); 90 path = UnixFilePathToVMS(path); 91 return path.substr(0, path.length() - 1); 92 } 93 94 // static 95 std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) { 96 if (vms_path.empty()) 97 return "."; 98 99 if (vms_path[0] == '/') { 100 // This is not really a VMS path. Most likely the server is emulating UNIX. 101 // Return path as-is. 102 return vms_path; 103 } 104 105 if (vms_path == "[]") 106 return "/"; 107 108 std::string result(vms_path); 109 if (vms_path[0] == '[') { 110 // It's a relative path. 111 ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string()); 112 } else { 113 // It's an absolute path. 114 result.insert(0, "/"); 115 ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/"); 116 ReplaceSubstringsAfterOffset(&result, 0, ":[", "/"); 117 } 118 std::replace(result.begin(), result.end(), '.', '/'); 119 std::replace(result.begin(), result.end(), ']', '/'); 120 121 // Make sure the result doesn't end with a slash. 122 if (result.length() && result[result.length() - 1] == '/') 123 result = result.substr(0, result.length() - 1); 124 125 return result; 126 } 127 128 namespace { 129 130 // Lazy-initialized map of abbreviated month names. 131 class AbbreviatedMonthsMap { 132 public: 133 static AbbreviatedMonthsMap* GetInstance() { 134 return Singleton<AbbreviatedMonthsMap>::get(); 135 } 136 137 // Converts abbreviated month name |text| to its number (in range 1-12). 138 // On success returns true and puts the number in |number|. 139 bool GetMonthNumber(const base::string16& text, int* number) { 140 // Ignore the case of the month names. The simplest way to handle that 141 // is to make everything lowercase. 142 base::string16 text_lower(base::i18n::ToLower(text)); 143 144 if (map_.find(text_lower) == map_.end()) 145 return false; 146 147 *number = map_[text_lower]; 148 return true; 149 } 150 151 private: 152 friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>; 153 154 // Constructor, initializes the map based on ICU data. It is much faster 155 // to do that just once. 156 AbbreviatedMonthsMap() { 157 int32_t locales_count; 158 const icu::Locale* locales = 159 icu::DateFormat::getAvailableLocales(locales_count); 160 161 for (int32_t locale = 0; locale < locales_count; locale++) { 162 UErrorCode status(U_ZERO_ERROR); 163 164 icu::DateFormatSymbols format_symbols(locales[locale], status); 165 166 // If we cannot get format symbols for some locale, it's not a fatal 167 // error. Just try another one. 168 if (U_FAILURE(status)) 169 continue; 170 171 int32_t months_count; 172 const icu::UnicodeString* months = 173 format_symbols.getShortMonths(months_count); 174 175 for (int32_t month = 0; month < months_count; month++) { 176 base::string16 month_name(months[month].getBuffer(), 177 static_cast<size_t>(months[month].length())); 178 179 // Ignore the case of the month names. The simplest way to handle that 180 // is to make everything lowercase. 181 month_name = base::i18n::ToLower(month_name); 182 183 map_[month_name] = month + 1; 184 185 // Sometimes ICU returns longer strings, but in FTP listings a shorter 186 // abbreviation is used (for example for the Russian locale). Make sure 187 // we always have a map entry for a three-letter abbreviation. 188 map_[month_name.substr(0, 3)] = month + 1; 189 } 190 } 191 192 // Fail loudly if the data returned by ICU is obviously incomplete. 193 // This is intended to catch cases like http://crbug.com/177428 194 // much earlier. Note that the issue above turned out to be non-trivial 195 // to reproduce - crash data is much better indicator of a problem 196 // than incomplete bug reports. 197 CHECK_EQ(1, map_[ASCIIToUTF16("jan")]); 198 CHECK_EQ(2, map_[ASCIIToUTF16("feb")]); 199 CHECK_EQ(3, map_[ASCIIToUTF16("mar")]); 200 CHECK_EQ(4, map_[ASCIIToUTF16("apr")]); 201 CHECK_EQ(5, map_[ASCIIToUTF16("may")]); 202 CHECK_EQ(6, map_[ASCIIToUTF16("jun")]); 203 CHECK_EQ(7, map_[ASCIIToUTF16("jul")]); 204 CHECK_EQ(8, map_[ASCIIToUTF16("aug")]); 205 CHECK_EQ(9, map_[ASCIIToUTF16("sep")]); 206 CHECK_EQ(10, map_[ASCIIToUTF16("oct")]); 207 CHECK_EQ(11, map_[ASCIIToUTF16("nov")]); 208 CHECK_EQ(12, map_[ASCIIToUTF16("dec")]); 209 } 210 211 // Maps lowercase month names to numbers in range 1-12. 212 std::map<base::string16, int> map_; 213 214 DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap); 215 }; 216 217 } // namespace 218 219 // static 220 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text, 221 int* number) { 222 return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number); 223 } 224 225 // static 226 bool FtpUtil::LsDateListingToTime(const base::string16& month, 227 const base::string16& day, 228 const base::string16& rest, 229 const base::Time& current_time, 230 base::Time* result) { 231 base::Time::Exploded time_exploded = { 0 }; 232 233 if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) { 234 // Work around garbage sent by some servers in the same column 235 // as the month. Take just last 3 characters of the string. 236 if (month.length() < 3 || 237 !AbbreviatedMonthToNumber(month.substr(month.length() - 3), 238 &time_exploded.month)) { 239 return false; 240 } 241 } 242 243 if (!base::StringToInt(day, &time_exploded.day_of_month)) 244 return false; 245 if (time_exploded.day_of_month > 31) 246 return false; 247 248 if (!base::StringToInt(rest, &time_exploded.year)) { 249 // Maybe it's time. Does it look like time? Note that it can be any of 250 // "HH:MM", "H:MM", "HH:M" or maybe even "H:M". 251 if (rest.length() > 5) 252 return false; 253 254 size_t colon_pos = rest.find(':'); 255 if (colon_pos == base::string16::npos) 256 return false; 257 if (colon_pos > 2) 258 return false; 259 260 if (!base::StringToInt( 261 StringPiece16(rest.begin(), rest.begin() + colon_pos), 262 &time_exploded.hour)) { 263 return false; 264 } 265 if (!base::StringToInt( 266 StringPiece16(rest.begin() + colon_pos + 1, rest.end()), 267 &time_exploded.minute)) { 268 return false; 269 } 270 271 // Guess the year. 272 base::Time::Exploded current_exploded; 273 current_time.LocalExplode(¤t_exploded); 274 275 // If it's not possible for the parsed date to be in the current year, 276 // use the previous year. 277 if (time_exploded.month > current_exploded.month || 278 (time_exploded.month == current_exploded.month && 279 time_exploded.day_of_month > current_exploded.day_of_month)) { 280 time_exploded.year = current_exploded.year - 1; 281 } else { 282 time_exploded.year = current_exploded.year; 283 } 284 } 285 286 // We don't know the time zone of the listing, so just use local time. 287 *result = base::Time::FromLocalExploded(time_exploded); 288 return true; 289 } 290 291 // static 292 bool FtpUtil::WindowsDateListingToTime(const base::string16& date, 293 const base::string16& time, 294 base::Time* result) { 295 base::Time::Exploded time_exploded = { 0 }; 296 297 // Date should be in format MM-DD-YY[YY]. 298 std::vector<base::string16> date_parts; 299 base::SplitString(date, '-', &date_parts); 300 if (date_parts.size() != 3) 301 return false; 302 if (!base::StringToInt(date_parts[0], &time_exploded.month)) 303 return false; 304 if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month)) 305 return false; 306 if (!base::StringToInt(date_parts[2], &time_exploded.year)) 307 return false; 308 if (time_exploded.year < 0) 309 return false; 310 // If year has only two digits then assume that 00-79 is 2000-2079, 311 // and 80-99 is 1980-1999. 312 if (time_exploded.year < 80) 313 time_exploded.year += 2000; 314 else if (time_exploded.year < 100) 315 time_exploded.year += 1900; 316 317 // Time should be in format HH:MM[(AM|PM)] 318 if (time.length() < 5) 319 return false; 320 321 std::vector<base::string16> time_parts; 322 base::SplitString(time.substr(0, 5), ':', &time_parts); 323 if (time_parts.size() != 2) 324 return false; 325 if (!base::StringToInt(time_parts[0], &time_exploded.hour)) 326 return false; 327 if (!base::StringToInt(time_parts[1], &time_exploded.minute)) 328 return false; 329 if (!time_exploded.HasValidValues()) 330 return false; 331 332 if (time.length() > 5) { 333 if (time.length() != 7) 334 return false; 335 base::string16 am_or_pm(time.substr(5, 2)); 336 if (EqualsASCII(am_or_pm, "PM")) { 337 if (time_exploded.hour < 12) 338 time_exploded.hour += 12; 339 } else if (EqualsASCII(am_or_pm, "AM")) { 340 if (time_exploded.hour == 12) 341 time_exploded.hour = 0; 342 } else { 343 return false; 344 } 345 } 346 347 // We don't know the time zone of the server, so just use local time. 348 *result = base::Time::FromLocalExploded(time_exploded); 349 return true; 350 } 351 352 // static 353 base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text, 354 int columns) { 355 base::i18n::UTF16CharIterator iter(&text); 356 357 // TODO(jshin): Is u_isspace the right function to use here? 358 for (int i = 0; i < columns; i++) { 359 // Skip the leading whitespace. 360 while (!iter.end() && u_isspace(iter.get())) 361 iter.Advance(); 362 363 // Skip the actual text of i-th column. 364 while (!iter.end() && !u_isspace(iter.get())) 365 iter.Advance(); 366 } 367 368 base::string16 result(text.substr(iter.array_pos())); 369 base::TrimWhitespace(result, base::TRIM_ALL, &result); 370 return result; 371 } 372 373 } // namespace 374