Home | History | Annotate | Download | only in ftp
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/ftp/ftp_util.h"
      6 
      7 #include <map>
      8 #include <vector>
      9 
     10 #include "base/i18n/case_conversion.h"
     11 #include "base/i18n/char_iterator.h"
     12 #include "base/logging.h"
     13 #include "base/memory/singleton.h"
     14 #include "base/strings/string_number_conversions.h"
     15 #include "base/strings/string_piece.h"
     16 #include "base/strings/string_split.h"
     17 #include "base/strings/string_tokenizer.h"
     18 #include "base/strings/string_util.h"
     19 #include "base/strings/utf_string_conversions.h"
     20 #include "base/time/time.h"
     21 #include "third_party/icu/source/common/unicode/uchar.h"
     22 #include "third_party/icu/source/i18n/unicode/datefmt.h"
     23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
     24 
     25 using base::StringPiece16;
     26 
     27 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS
     28 // a path looks differently depending on whether it's a file or directory.
     29 
     30 namespace net {
     31 
     32 // static
     33 std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
     34   if (unix_path.empty())
     35     return std::string();
     36 
     37   base::StringTokenizer tokenizer(unix_path, "/");
     38   std::vector<std::string> tokens;
     39   while (tokenizer.GetNext())
     40     tokens.push_back(tokenizer.token());
     41 
     42   if (unix_path[0] == '/') {
     43     // It's an absolute path.
     44 
     45     if (tokens.empty()) {
     46       DCHECK_EQ(1U, unix_path.length());
     47       return "[]";
     48     }
     49 
     50     if (tokens.size() == 1)
     51       return unix_path.substr(1);  // Drop the leading slash.
     52 
     53     std::string result(tokens[0] + ":[");
     54     if (tokens.size() == 2) {
     55       // Don't ask why, it just works that way on VMS.
     56       result.append("000000");
     57     } else {
     58       result.append(tokens[1]);
     59       for (size_t i = 2; i < tokens.size() - 1; i++)
     60         result.append("." + tokens[i]);
     61     }
     62     result.append("]" + tokens[tokens.size() - 1]);
     63     return result;
     64   }
     65 
     66   if (tokens.size() == 1)
     67     return unix_path;
     68 
     69   std::string result("[");
     70   for (size_t i = 0; i < tokens.size() - 1; i++)
     71     result.append("." + tokens[i]);
     72   result.append("]" + tokens[tokens.size() - 1]);
     73   return result;
     74 }
     75 
     76 // static
     77 std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
     78   if (unix_path.empty())
     79     return std::string();
     80 
     81   std::string path(unix_path);
     82 
     83   if (path[path.length() - 1] != '/')
     84     path.append("/");
     85 
     86   // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
     87   // real path and removing it after conversion.
     88   path.append("x");
     89   path = UnixFilePathToVMS(path);
     90   return path.substr(0, path.length() - 1);
     91 }
     92 
     93 // static
     94 std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
     95   if (vms_path.empty())
     96     return ".";
     97 
     98   if (vms_path[0] == '/') {
     99     // This is not really a VMS path. Most likely the server is emulating UNIX.
    100     // Return path as-is.
    101     return vms_path;
    102   }
    103 
    104   if (vms_path == "[]")
    105     return "/";
    106 
    107   std::string result(vms_path);
    108   if (vms_path[0] == '[') {
    109     // It's a relative path.
    110     ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string());
    111   } else {
    112     // It's an absolute path.
    113     result.insert(0, "/");
    114     ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
    115     ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
    116   }
    117   std::replace(result.begin(), result.end(), '.', '/');
    118   std::replace(result.begin(), result.end(), ']', '/');
    119 
    120   // Make sure the result doesn't end with a slash.
    121   if (result.length() && result[result.length() - 1] == '/')
    122     result = result.substr(0, result.length() - 1);
    123 
    124   return result;
    125 }
    126 
    127 namespace {
    128 
    129 // Lazy-initialized map of abbreviated month names.
    130 class AbbreviatedMonthsMap {
    131  public:
    132   static AbbreviatedMonthsMap* GetInstance() {
    133     return Singleton<AbbreviatedMonthsMap>::get();
    134   }
    135 
    136   // Converts abbreviated month name |text| to its number (in range 1-12).
    137   // On success returns true and puts the number in |number|.
    138   bool GetMonthNumber(const base::string16& text, int* number) {
    139     // Ignore the case of the month names. The simplest way to handle that
    140     // is to make everything lowercase.
    141     base::string16 text_lower(base::i18n::ToLower(text));
    142 
    143     if (map_.find(text_lower) == map_.end())
    144       return false;
    145 
    146     *number = map_[text_lower];
    147     return true;
    148   }
    149 
    150  private:
    151   friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>;
    152 
    153   // Constructor, initializes the map based on ICU data. It is much faster
    154   // to do that just once.
    155   AbbreviatedMonthsMap() {
    156     int32_t locales_count;
    157     const icu::Locale* locales =
    158         icu::DateFormat::getAvailableLocales(locales_count);
    159 
    160     for (int32_t locale = 0; locale < locales_count; locale++) {
    161       UErrorCode status(U_ZERO_ERROR);
    162 
    163       icu::DateFormatSymbols format_symbols(locales[locale], status);
    164 
    165       // If we cannot get format symbols for some locale, it's not a fatal
    166       // error. Just try another one.
    167       if (U_FAILURE(status))
    168         continue;
    169 
    170       int32_t months_count;
    171       const icu::UnicodeString* months =
    172           format_symbols.getShortMonths(months_count);
    173 
    174       for (int32_t month = 0; month < months_count; month++) {
    175         base::string16 month_name(months[month].getBuffer(),
    176                             static_cast<size_t>(months[month].length()));
    177 
    178         // Ignore the case of the month names. The simplest way to handle that
    179         // is to make everything lowercase.
    180         month_name = base::i18n::ToLower(month_name);
    181 
    182         map_[month_name] = month + 1;
    183 
    184         // Sometimes ICU returns longer strings, but in FTP listings a shorter
    185         // abbreviation is used (for example for the Russian locale). Make sure
    186         // we always have a map entry for a three-letter abbreviation.
    187         map_[month_name.substr(0, 3)] = month + 1;
    188       }
    189     }
    190 
    191     // Fail loudly if the data returned by ICU is obviously incomplete.
    192     // This is intended to catch cases like http://crbug.com/177428
    193     // much earlier. Note that the issue above turned out to be non-trivial
    194     // to reproduce - crash data is much better indicator of a problem
    195     // than incomplete bug reports.
    196     CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
    197     CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
    198     CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
    199     CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
    200     CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
    201     CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
    202     CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
    203     CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
    204     CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
    205     CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
    206     CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
    207     CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
    208   }
    209 
    210   // Maps lowercase month names to numbers in range 1-12.
    211   std::map<base::string16, int> map_;
    212 
    213   DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
    214 };
    215 
    216 }  // namespace
    217 
    218 // static
    219 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
    220                                        int* number) {
    221   return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
    222 }
    223 
    224 // static
    225 bool FtpUtil::LsDateListingToTime(const base::string16& month,
    226                                   const base::string16& day,
    227                                   const base::string16& rest,
    228                                   const base::Time& current_time,
    229                                   base::Time* result) {
    230   base::Time::Exploded time_exploded = { 0 };
    231 
    232   if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
    233     // Work around garbage sent by some servers in the same column
    234     // as the month. Take just last 3 characters of the string.
    235     if (month.length() < 3 ||
    236         !AbbreviatedMonthToNumber(month.substr(month.length() - 3),
    237                                   &time_exploded.month)) {
    238       return false;
    239     }
    240   }
    241 
    242   if (!base::StringToInt(day, &time_exploded.day_of_month))
    243     return false;
    244   if (time_exploded.day_of_month > 31)
    245     return false;
    246 
    247   if (!base::StringToInt(rest, &time_exploded.year)) {
    248     // Maybe it's time. Does it look like time (HH:MM)?
    249     if (rest.length() == 5 && rest[2] == ':') {
    250       if (!base::StringToInt(StringPiece16(rest.begin(), rest.begin() + 2),
    251                              &time_exploded.hour)) {
    252         return false;
    253       }
    254 
    255       if (!base::StringToInt(StringPiece16(rest.begin() + 3, rest.begin() + 5),
    256                              &time_exploded.minute)) {
    257         return false;
    258       }
    259     } else if (rest.length() == 4 && rest[1] == ':') {
    260       // Sometimes it's just H:MM.
    261       if (!base::StringToInt(StringPiece16(rest.begin(), rest.begin() + 1),
    262                              &time_exploded.hour)) {
    263         return false;
    264       }
    265 
    266       if (!base::StringToInt(StringPiece16(rest.begin() + 2, rest.begin() + 4),
    267                              &time_exploded.minute)) {
    268         return false;
    269       }
    270     } else {
    271       return false;
    272     }
    273 
    274     // Guess the year.
    275     base::Time::Exploded current_exploded;
    276     current_time.LocalExplode(&current_exploded);
    277 
    278     // If it's not possible for the parsed date to be in the current year,
    279     // use the previous year.
    280     if (time_exploded.month > current_exploded.month ||
    281         (time_exploded.month == current_exploded.month &&
    282          time_exploded.day_of_month > current_exploded.day_of_month)) {
    283       time_exploded.year = current_exploded.year - 1;
    284     } else {
    285       time_exploded.year = current_exploded.year;
    286     }
    287   }
    288 
    289   // We don't know the time zone of the listing, so just use local time.
    290   *result = base::Time::FromLocalExploded(time_exploded);
    291   return true;
    292 }
    293 
    294 // static
    295 bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
    296                                        const base::string16& time,
    297                                        base::Time* result) {
    298   base::Time::Exploded time_exploded = { 0 };
    299 
    300   // Date should be in format MM-DD-YY[YY].
    301   std::vector<base::string16> date_parts;
    302   base::SplitString(date, '-', &date_parts);
    303   if (date_parts.size() != 3)
    304     return false;
    305   if (!base::StringToInt(date_parts[0], &time_exploded.month))
    306     return false;
    307   if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
    308     return false;
    309   if (!base::StringToInt(date_parts[2], &time_exploded.year))
    310     return false;
    311   if (time_exploded.year < 0)
    312     return false;
    313   // If year has only two digits then assume that 00-79 is 2000-2079,
    314   // and 80-99 is 1980-1999.
    315   if (time_exploded.year < 80)
    316     time_exploded.year += 2000;
    317   else if (time_exploded.year < 100)
    318     time_exploded.year += 1900;
    319 
    320   // Time should be in format HH:MM[(AM|PM)]
    321   if (time.length() < 5)
    322     return false;
    323 
    324   std::vector<base::string16> time_parts;
    325   base::SplitString(time.substr(0, 5), ':', &time_parts);
    326   if (time_parts.size() != 2)
    327     return false;
    328   if (!base::StringToInt(time_parts[0], &time_exploded.hour))
    329     return false;
    330   if (!base::StringToInt(time_parts[1], &time_exploded.minute))
    331     return false;
    332   if (!time_exploded.HasValidValues())
    333     return false;
    334 
    335   if (time.length() > 5) {
    336     if (time.length() != 7)
    337       return false;
    338     base::string16 am_or_pm(time.substr(5, 2));
    339     if (EqualsASCII(am_or_pm, "PM")) {
    340       if (time_exploded.hour < 12)
    341         time_exploded.hour += 12;
    342     } else if (EqualsASCII(am_or_pm, "AM")) {
    343       if (time_exploded.hour == 12)
    344         time_exploded.hour = 0;
    345     } else {
    346       return false;
    347     }
    348   }
    349 
    350   // We don't know the time zone of the server, so just use local time.
    351   *result = base::Time::FromLocalExploded(time_exploded);
    352   return true;
    353 }
    354 
    355 // static
    356 base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
    357                                                   int columns) {
    358   base::i18n::UTF16CharIterator iter(&text);
    359 
    360   // TODO(jshin): Is u_isspace the right function to use here?
    361   for (int i = 0; i < columns; i++) {
    362     // Skip the leading whitespace.
    363     while (!iter.end() && u_isspace(iter.get()))
    364       iter.Advance();
    365 
    366     // Skip the actual text of i-th column.
    367     while (!iter.end() && !u_isspace(iter.get()))
    368       iter.Advance();
    369   }
    370 
    371   base::string16 result(text.substr(iter.array_pos()));
    372   TrimWhitespace(result, TRIM_ALL, &result);
    373   return result;
    374 }
    375 
    376 }  // namespace
    377