Home | History | Annotate | Download | only in ftp
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/ftp/ftp_util.h"
      6 
      7 #include <map>
      8 #include <vector>
      9 
     10 #include "base/i18n/case_conversion.h"
     11 #include "base/i18n/char_iterator.h"
     12 #include "base/logging.h"
     13 #include "base/memory/singleton.h"
     14 #include "base/strings/string_number_conversions.h"
     15 #include "base/strings/string_piece.h"
     16 #include "base/strings/string_split.h"
     17 #include "base/strings/string_tokenizer.h"
     18 #include "base/strings/string_util.h"
     19 #include "base/strings/utf_string_conversions.h"
     20 #include "base/time/time.h"
     21 #include "third_party/icu/source/common/unicode/uchar.h"
     22 #include "third_party/icu/source/i18n/unicode/datefmt.h"
     23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
     24 
     25 using base::ASCIIToUTF16;
     26 using base::StringPiece16;
     27 
     28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS
     29 // a path looks differently depending on whether it's a file or directory.
     30 
     31 namespace net {
     32 
     33 // static
     34 std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
     35   if (unix_path.empty())
     36     return std::string();
     37 
     38   base::StringTokenizer tokenizer(unix_path, "/");
     39   std::vector<std::string> tokens;
     40   while (tokenizer.GetNext())
     41     tokens.push_back(tokenizer.token());
     42 
     43   if (unix_path[0] == '/') {
     44     // It's an absolute path.
     45 
     46     if (tokens.empty()) {
     47       DCHECK_EQ(1U, unix_path.length());
     48       return "[]";
     49     }
     50 
     51     if (tokens.size() == 1)
     52       return unix_path.substr(1);  // Drop the leading slash.
     53 
     54     std::string result(tokens[0] + ":[");
     55     if (tokens.size() == 2) {
     56       // Don't ask why, it just works that way on VMS.
     57       result.append("000000");
     58     } else {
     59       result.append(tokens[1]);
     60       for (size_t i = 2; i < tokens.size() - 1; i++)
     61         result.append("." + tokens[i]);
     62     }
     63     result.append("]" + tokens[tokens.size() - 1]);
     64     return result;
     65   }
     66 
     67   if (tokens.size() == 1)
     68     return unix_path;
     69 
     70   std::string result("[");
     71   for (size_t i = 0; i < tokens.size() - 1; i++)
     72     result.append("." + tokens[i]);
     73   result.append("]" + tokens[tokens.size() - 1]);
     74   return result;
     75 }
     76 
     77 // static
     78 std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
     79   if (unix_path.empty())
     80     return std::string();
     81 
     82   std::string path(unix_path);
     83 
     84   if (path[path.length() - 1] != '/')
     85     path.append("/");
     86 
     87   // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
     88   // real path and removing it after conversion.
     89   path.append("x");
     90   path = UnixFilePathToVMS(path);
     91   return path.substr(0, path.length() - 1);
     92 }
     93 
     94 // static
     95 std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
     96   if (vms_path.empty())
     97     return ".";
     98 
     99   if (vms_path[0] == '/') {
    100     // This is not really a VMS path. Most likely the server is emulating UNIX.
    101     // Return path as-is.
    102     return vms_path;
    103   }
    104 
    105   if (vms_path == "[]")
    106     return "/";
    107 
    108   std::string result(vms_path);
    109   if (vms_path[0] == '[') {
    110     // It's a relative path.
    111     ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string());
    112   } else {
    113     // It's an absolute path.
    114     result.insert(0, "/");
    115     ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
    116     ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
    117   }
    118   std::replace(result.begin(), result.end(), '.', '/');
    119   std::replace(result.begin(), result.end(), ']', '/');
    120 
    121   // Make sure the result doesn't end with a slash.
    122   if (result.length() && result[result.length() - 1] == '/')
    123     result = result.substr(0, result.length() - 1);
    124 
    125   return result;
    126 }
    127 
    128 namespace {
    129 
    130 // Lazy-initialized map of abbreviated month names.
    131 class AbbreviatedMonthsMap {
    132  public:
    133   static AbbreviatedMonthsMap* GetInstance() {
    134     return Singleton<AbbreviatedMonthsMap>::get();
    135   }
    136 
    137   // Converts abbreviated month name |text| to its number (in range 1-12).
    138   // On success returns true and puts the number in |number|.
    139   bool GetMonthNumber(const base::string16& text, int* number) {
    140     // Ignore the case of the month names. The simplest way to handle that
    141     // is to make everything lowercase.
    142     base::string16 text_lower(base::i18n::ToLower(text));
    143 
    144     if (map_.find(text_lower) == map_.end())
    145       return false;
    146 
    147     *number = map_[text_lower];
    148     return true;
    149   }
    150 
    151  private:
    152   friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>;
    153 
    154   // Constructor, initializes the map based on ICU data. It is much faster
    155   // to do that just once.
    156   AbbreviatedMonthsMap() {
    157     int32_t locales_count;
    158     const icu::Locale* locales =
    159         icu::DateFormat::getAvailableLocales(locales_count);
    160 
    161     for (int32_t locale = 0; locale < locales_count; locale++) {
    162       UErrorCode status(U_ZERO_ERROR);
    163 
    164       icu::DateFormatSymbols format_symbols(locales[locale], status);
    165 
    166       // If we cannot get format symbols for some locale, it's not a fatal
    167       // error. Just try another one.
    168       if (U_FAILURE(status))
    169         continue;
    170 
    171       int32_t months_count;
    172       const icu::UnicodeString* months =
    173           format_symbols.getShortMonths(months_count);
    174 
    175       for (int32_t month = 0; month < months_count; month++) {
    176         base::string16 month_name(months[month].getBuffer(),
    177                             static_cast<size_t>(months[month].length()));
    178 
    179         // Ignore the case of the month names. The simplest way to handle that
    180         // is to make everything lowercase.
    181         month_name = base::i18n::ToLower(month_name);
    182 
    183         map_[month_name] = month + 1;
    184 
    185         // Sometimes ICU returns longer strings, but in FTP listings a shorter
    186         // abbreviation is used (for example for the Russian locale). Make sure
    187         // we always have a map entry for a three-letter abbreviation.
    188         map_[month_name.substr(0, 3)] = month + 1;
    189       }
    190     }
    191 
    192     // Fail loudly if the data returned by ICU is obviously incomplete.
    193     // This is intended to catch cases like http://crbug.com/177428
    194     // much earlier. Note that the issue above turned out to be non-trivial
    195     // to reproduce - crash data is much better indicator of a problem
    196     // than incomplete bug reports.
    197     CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
    198     CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
    199     CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
    200     CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
    201     CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
    202     CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
    203     CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
    204     CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
    205     CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
    206     CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
    207     CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
    208     CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
    209   }
    210 
    211   // Maps lowercase month names to numbers in range 1-12.
    212   std::map<base::string16, int> map_;
    213 
    214   DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
    215 };
    216 
    217 }  // namespace
    218 
    219 // static
    220 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
    221                                        int* number) {
    222   return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
    223 }
    224 
    225 // static
    226 bool FtpUtil::LsDateListingToTime(const base::string16& month,
    227                                   const base::string16& day,
    228                                   const base::string16& rest,
    229                                   const base::Time& current_time,
    230                                   base::Time* result) {
    231   base::Time::Exploded time_exploded = { 0 };
    232 
    233   if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
    234     // Work around garbage sent by some servers in the same column
    235     // as the month. Take just last 3 characters of the string.
    236     if (month.length() < 3 ||
    237         !AbbreviatedMonthToNumber(month.substr(month.length() - 3),
    238                                   &time_exploded.month)) {
    239       return false;
    240     }
    241   }
    242 
    243   if (!base::StringToInt(day, &time_exploded.day_of_month))
    244     return false;
    245   if (time_exploded.day_of_month > 31)
    246     return false;
    247 
    248   if (!base::StringToInt(rest, &time_exploded.year)) {
    249     // Maybe it's time. Does it look like time? Note that it can be any of
    250     // "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
    251     if (rest.length() > 5)
    252       return false;
    253 
    254     size_t colon_pos = rest.find(':');
    255     if (colon_pos == base::string16::npos)
    256       return false;
    257     if (colon_pos > 2)
    258       return false;
    259 
    260     if (!base::StringToInt(
    261             StringPiece16(rest.begin(), rest.begin() + colon_pos),
    262             &time_exploded.hour)) {
    263       return false;
    264     }
    265     if (!base::StringToInt(
    266             StringPiece16(rest.begin() + colon_pos + 1, rest.end()),
    267             &time_exploded.minute)) {
    268       return false;
    269     }
    270 
    271     // Guess the year.
    272     base::Time::Exploded current_exploded;
    273     current_time.LocalExplode(&current_exploded);
    274 
    275     // If it's not possible for the parsed date to be in the current year,
    276     // use the previous year.
    277     if (time_exploded.month > current_exploded.month ||
    278         (time_exploded.month == current_exploded.month &&
    279          time_exploded.day_of_month > current_exploded.day_of_month)) {
    280       time_exploded.year = current_exploded.year - 1;
    281     } else {
    282       time_exploded.year = current_exploded.year;
    283     }
    284   }
    285 
    286   // We don't know the time zone of the listing, so just use local time.
    287   *result = base::Time::FromLocalExploded(time_exploded);
    288   return true;
    289 }
    290 
    291 // static
    292 bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
    293                                        const base::string16& time,
    294                                        base::Time* result) {
    295   base::Time::Exploded time_exploded = { 0 };
    296 
    297   // Date should be in format MM-DD-YY[YY].
    298   std::vector<base::string16> date_parts;
    299   base::SplitString(date, '-', &date_parts);
    300   if (date_parts.size() != 3)
    301     return false;
    302   if (!base::StringToInt(date_parts[0], &time_exploded.month))
    303     return false;
    304   if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
    305     return false;
    306   if (!base::StringToInt(date_parts[2], &time_exploded.year))
    307     return false;
    308   if (time_exploded.year < 0)
    309     return false;
    310   // If year has only two digits then assume that 00-79 is 2000-2079,
    311   // and 80-99 is 1980-1999.
    312   if (time_exploded.year < 80)
    313     time_exploded.year += 2000;
    314   else if (time_exploded.year < 100)
    315     time_exploded.year += 1900;
    316 
    317   // Time should be in format HH:MM[(AM|PM)]
    318   if (time.length() < 5)
    319     return false;
    320 
    321   std::vector<base::string16> time_parts;
    322   base::SplitString(time.substr(0, 5), ':', &time_parts);
    323   if (time_parts.size() != 2)
    324     return false;
    325   if (!base::StringToInt(time_parts[0], &time_exploded.hour))
    326     return false;
    327   if (!base::StringToInt(time_parts[1], &time_exploded.minute))
    328     return false;
    329   if (!time_exploded.HasValidValues())
    330     return false;
    331 
    332   if (time.length() > 5) {
    333     if (time.length() != 7)
    334       return false;
    335     base::string16 am_or_pm(time.substr(5, 2));
    336     if (EqualsASCII(am_or_pm, "PM")) {
    337       if (time_exploded.hour < 12)
    338         time_exploded.hour += 12;
    339     } else if (EqualsASCII(am_or_pm, "AM")) {
    340       if (time_exploded.hour == 12)
    341         time_exploded.hour = 0;
    342     } else {
    343       return false;
    344     }
    345   }
    346 
    347   // We don't know the time zone of the server, so just use local time.
    348   *result = base::Time::FromLocalExploded(time_exploded);
    349   return true;
    350 }
    351 
    352 // static
    353 base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
    354                                                   int columns) {
    355   base::i18n::UTF16CharIterator iter(&text);
    356 
    357   // TODO(jshin): Is u_isspace the right function to use here?
    358   for (int i = 0; i < columns; i++) {
    359     // Skip the leading whitespace.
    360     while (!iter.end() && u_isspace(iter.get()))
    361       iter.Advance();
    362 
    363     // Skip the actual text of i-th column.
    364     while (!iter.end() && !u_isspace(iter.get()))
    365       iter.Advance();
    366   }
    367 
    368   base::string16 result(text.substr(iter.array_pos()));
    369   base::TrimWhitespace(result, base::TRIM_ALL, &result);
    370   return result;
    371 }
    372 
    373 }  // namespace
    374