Home | History | Annotate | Download | only in ftp
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/ftp/ftp_directory_listing_parser_ls.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/strings/string_number_conversions.h"
     10 #include "base/strings/string_split.h"
     11 #include "base/strings/string_util.h"
     12 #include "base/strings/utf_string_conversions.h"
     13 #include "base/time/time.h"
     14 #include "net/ftp/ftp_directory_listing_parser.h"
     15 #include "net/ftp/ftp_util.h"
     16 
     17 namespace {
     18 
     19 bool TwoColumnDateListingToTime(const base::string16& date,
     20                                 const base::string16& time,
     21                                 base::Time* result) {
     22   base::Time::Exploded time_exploded = { 0 };
     23 
     24   // Date should be in format YYYY-MM-DD.
     25   std::vector<base::string16> date_parts;
     26   base::SplitString(date, '-', &date_parts);
     27   if (date_parts.size() != 3)
     28     return false;
     29   if (!base::StringToInt(date_parts[0], &time_exploded.year))
     30     return false;
     31   if (!base::StringToInt(date_parts[1], &time_exploded.month))
     32     return false;
     33   if (!base::StringToInt(date_parts[2], &time_exploded.day_of_month))
     34     return false;
     35 
     36   // Time should be in format HH:MM
     37   if (time.length() != 5)
     38     return false;
     39 
     40   std::vector<base::string16> time_parts;
     41   base::SplitString(time, ':', &time_parts);
     42   if (time_parts.size() != 2)
     43     return false;
     44   if (!base::StringToInt(time_parts[0], &time_exploded.hour))
     45     return false;
     46   if (!base::StringToInt(time_parts[1], &time_exploded.minute))
     47     return false;
     48   if (!time_exploded.HasValidValues())
     49     return false;
     50 
     51   // We don't know the time zone of the server, so just use local time.
     52   *result = base::Time::FromLocalExploded(time_exploded);
     53   return true;
     54 }
     55 
     56 // Returns the column index of the end of the date listing and detected
     57 // last modification time.
     58 bool DetectColumnOffsetSizeAndModificationTime(
     59     const std::vector<base::string16>& columns,
     60     const base::Time& current_time,
     61     size_t* offset,
     62     base::string16* size,
     63     base::Time* modification_time) {
     64   // The column offset can be arbitrarily large if some fields
     65   // like owner or group name contain spaces. Try offsets from left to right
     66   // and use the first one that matches a date listing.
     67   //
     68   // Here is how a listing line should look like. A star ("*") indicates
     69   // a required field:
     70   //
     71   //  * 1. permission listing
     72   //    2. number of links (optional)
     73   //  * 3. owner name (may contain spaces)
     74   //    4. group name (optional, may contain spaces)
     75   //  * 5. size in bytes
     76   //  * 6. month
     77   //  * 7. day of month
     78   //  * 8. year or time <-- column_offset will be the index of this column
     79   //    9. file name (optional, may contain spaces)
     80   for (size_t i = 5U; i < columns.size(); i++) {
     81     if (net::FtpUtil::LsDateListingToTime(columns[i - 2],
     82                                           columns[i - 1],
     83                                           columns[i],
     84                                           current_time,
     85                                           modification_time)) {
     86       *size = columns[i - 3];
     87       *offset = i;
     88       return true;
     89     }
     90   }
     91 
     92   // Some FTP listings have swapped the "month" and "day of month" columns
     93   // (for example Russian listings). We try to recognize them only after making
     94   // sure no column offset works above (this is a more strict way).
     95   for (size_t i = 5U; i < columns.size(); i++) {
     96     if (net::FtpUtil::LsDateListingToTime(columns[i - 1],
     97                                           columns[i - 2],
     98                                           columns[i],
     99                                           current_time,
    100                                           modification_time)) {
    101       *size = columns[i - 3];
    102       *offset = i;
    103       return true;
    104     }
    105   }
    106 
    107   // Some FTP listings use a different date format.
    108   for (size_t i = 5U; i < columns.size(); i++) {
    109     if (TwoColumnDateListingToTime(columns[i - 1],
    110                                    columns[i],
    111                                    modification_time)) {
    112       *size = columns[i - 2];
    113       *offset = i;
    114       return true;
    115     }
    116   }
    117 
    118   return false;
    119 }
    120 
    121 }  // namespace
    122 
    123 namespace net {
    124 
    125 bool ParseFtpDirectoryListingLs(
    126     const std::vector<base::string16>& lines,
    127     const base::Time& current_time,
    128     std::vector<FtpDirectoryListingEntry>* entries) {
    129   // True after we have received a "total n" listing header, where n is an
    130   // integer. Only one such header is allowed per listing.
    131   bool received_total_line = false;
    132 
    133   for (size_t i = 0; i < lines.size(); i++) {
    134     if (lines[i].empty())
    135       continue;
    136 
    137     std::vector<base::string16> columns;
    138     base::SplitString(base::CollapseWhitespace(lines[i], false), ' ', &columns);
    139 
    140     // Some FTP servers put a "total n" line at the beginning of the listing
    141     // (n is an integer). Allow such a line, but only once, and only if it's
    142     // the first non-empty line. Do not match the word exactly, because it may
    143     // be in different languages (at least English and German have been seen
    144     // in the field).
    145     if (columns.size() == 2 && !received_total_line) {
    146       received_total_line = true;
    147 
    148       int64 total_number;
    149       if (!base::StringToInt64(columns[1], &total_number))
    150         return false;
    151       if (total_number < 0)
    152         return false;
    153 
    154       continue;
    155     }
    156 
    157     FtpDirectoryListingEntry entry;
    158 
    159     size_t column_offset;
    160     base::string16 size;
    161     if (!DetectColumnOffsetSizeAndModificationTime(columns,
    162                                                    current_time,
    163                                                    &column_offset,
    164                                                    &size,
    165                                                    &entry.last_modified)) {
    166       // Some servers send a message in one of the first few lines.
    167       // All those messages have in common is the string ".:",
    168       // where "." means the current directory, and ":" separates it
    169       // from the rest of the message, which may be empty.
    170       if (lines[i].find(base::ASCIIToUTF16(".:")) != base::string16::npos)
    171         continue;
    172 
    173       return false;
    174     }
    175 
    176     // Do not check "validity" of the permission listing. It's quirky,
    177     // and some servers send garbage here while other parts of the line are OK.
    178 
    179     if (!columns[0].empty() && columns[0][0] == 'l') {
    180       entry.type = FtpDirectoryListingEntry::SYMLINK;
    181     } else if (!columns[0].empty() && columns[0][0] == 'd') {
    182       entry.type = FtpDirectoryListingEntry::DIRECTORY;
    183     } else {
    184       entry.type = FtpDirectoryListingEntry::FILE;
    185     }
    186 
    187     if (!base::StringToInt64(size, &entry.size)) {
    188       // Some FTP servers do not separate owning group name from file size,
    189       // like "group1234". We still want to display the file name for that
    190       // entry, but can't really get the size (What if the group is named
    191       // "group1", and the size is in fact 234? We can't distinguish between
    192       // that and "group" with size 1234). Use a dummy value for the size.
    193       // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes.
    194       entry.size = 0;
    195     }
    196     if (entry.size < 0) {
    197       // Some FTP servers have bugs that cause them to display the file size
    198       // as negative. They're most likely big files like DVD ISO images.
    199       // We still want to display them, so just say the real file size
    200       // is unknown.
    201       entry.size = -1;
    202     }
    203     if (entry.type != FtpDirectoryListingEntry::FILE)
    204       entry.size = -1;
    205 
    206     if (column_offset == columns.size() - 1) {
    207       // If the end of the date listing is the last column, there is no file
    208       // name. Some FTP servers send listing entries with empty names.
    209       // It's not obvious how to display such an entry, so we ignore them.
    210       // We don't want to make the parsing fail at this point though.
    211       // Other entries can still be useful.
    212       continue;
    213     }
    214 
    215     entry.name = FtpUtil::GetStringPartAfterColumns(lines[i],
    216                                                     column_offset + 1);
    217 
    218     if (entry.type == FtpDirectoryListingEntry::SYMLINK) {
    219       base::string16::size_type pos =
    220           entry.name.rfind(base::ASCIIToUTF16(" -> "));
    221 
    222       // We don't require the " -> " to be present. Some FTP servers don't send
    223       // the symlink target, possibly for security reasons.
    224       if (pos != base::string16::npos)
    225         entry.name = entry.name.substr(0, pos);
    226     }
    227 
    228     entries->push_back(entry);
    229   }
    230 
    231   return true;
    232 }
    233 
    234 }  // namespace net
    235