Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // FilePath is a container for pathnames stored in a platform's native string
      6 // type, providing containers for manipulation in according with the
      7 // platform's conventions for pathnames.  It supports the following path
      8 // types:
      9 //
     10 //                   POSIX            Windows
     11 //                   ---------------  ----------------------------------
     12 // Fundamental type  char[]           wchar_t[]
     13 // Encoding          unspecified*     UTF-16
     14 // Separator         /                \, tolerant of /
     15 // Drive letters     no               case-insensitive A-Z followed by :
     16 // Alternate root    // (surprise!)   \\, for UNC paths
     17 //
     18 // * The encoding need not be specified on POSIX systems, although some
     19 //   POSIX-compliant systems do specify an encoding.  Mac OS X uses UTF-8.
     20 //   Linux does not specify an encoding, but in practice, the locale's
     21 //   character set may be used.
     22 //
     23 // For more arcane bits of path trivia, see below.
     24 //
     25 // FilePath objects are intended to be used anywhere paths are.  An
     26 // application may pass FilePath objects around internally, masking the
     27 // underlying differences between systems, only differing in implementation
     28 // where interfacing directly with the system.  For example, a single
     29 // OpenFile(const FilePath &) function may be made available, allowing all
     30 // callers to operate without regard to the underlying implementation.  On
     31 // POSIX-like platforms, OpenFile might wrap fopen, and on Windows, it might
     32 // wrap _wfopen_s, perhaps both by calling file_path.value().c_str().  This
     33 // allows each platform to pass pathnames around without requiring conversions
     34 // between encodings, which has an impact on performance, but more imporantly,
     35 // has an impact on correctness on platforms that do not have well-defined
     36 // encodings for pathnames.
     37 //
     38 // Several methods are available to perform common operations on a FilePath
     39 // object, such as determining the parent directory (DirName), isolating the
     40 // final path component (BaseName), and appending a relative pathname string
     41 // to an existing FilePath object (Append).  These methods are highly
     42 // recommended over attempting to split and concatenate strings directly.
     43 // These methods are based purely on string manipulation and knowledge of
     44 // platform-specific pathname conventions, and do not consult the filesystem
     45 // at all, making them safe to use without fear of blocking on I/O operations.
     46 // These methods do not function as mutators but instead return distinct
     47 // instances of FilePath objects, and are therefore safe to use on const
     48 // objects.  The objects themselves are safe to share between threads.
     49 //
     50 // To aid in initialization of FilePath objects from string literals, a
     51 // FILE_PATH_LITERAL macro is provided, which accounts for the difference
     52 // between char[]-based pathnames on POSIX systems and wchar_t[]-based
     53 // pathnames on Windows.
     54 //
     55 // Because a FilePath object should not be instantiated at the global scope,
     56 // instead, use a FilePath::CharType[] and initialize it with
     57 // FILE_PATH_LITERAL.  At runtime, a FilePath object can be created from the
     58 // character array.  Example:
     59 //
     60 // | const FilePath::CharType kLogFileName[] = FILE_PATH_LITERAL("log.txt");
     61 // |
     62 // | void Function() {
     63 // |   FilePath log_file_path(kLogFileName);
     64 // |   [...]
     65 // | }
     66 //
     67 // WARNING: FilePaths should ALWAYS be displayed with LTR directionality, even
     68 // when the UI language is RTL. This means you always need to pass filepaths
     69 // through base::i18n::WrapPathWithLTRFormatting() before displaying it in the
     70 // RTL UI.
     71 //
     72 // This is a very common source of bugs, please try to keep this in mind.
     73 //
     74 // ARCANE BITS OF PATH TRIVIA
     75 //
     76 //  - A double leading slash is actually part of the POSIX standard.  Systems
     77 //    are allowed to treat // as an alternate root, as Windows does for UNC
     78 //    (network share) paths.  Most POSIX systems don't do anything special
     79 //    with two leading slashes, but FilePath handles this case properly
     80 //    in case it ever comes across such a system.  FilePath needs this support
     81 //    for Windows UNC paths, anyway.
     82 //    References:
     83 //    The Open Group Base Specifications Issue 7, sections 3.266 ("Pathname")
     84 //    and 4.12 ("Pathname Resolution"), available at:
     85 //    http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_266
     86 //    http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_12
     87 //
     88 //  - Windows treats c:\\ the same way it treats \\.  This was intended to
     89 //    allow older applications that require drive letters to support UNC paths
     90 //    like \\server\share\path, by permitting c:\\server\share\path as an
     91 //    equivalent.  Since the OS treats these paths specially, FilePath needs
     92 //    to do the same.  Since Windows can use either / or \ as the separator,
     93 //    FilePath treats c://, c:\\, //, and \\ all equivalently.
     94 //    Reference:
     95 //    The Old New Thing, "Why is a drive letter permitted in front of UNC
     96 //    paths (sometimes)?", available at:
     97 //    http://blogs.msdn.com/oldnewthing/archive/2005/11/22/495740.aspx
     98 
     99 #ifndef BASE_FILE_PATH_H_
    100 #define BASE_FILE_PATH_H_
    101 #pragma once
    102 
    103 #include <stddef.h>
    104 #include <string>
    105 #include <vector>
    106 
    107 #include "base/base_api.h"
    108 #include "base/compiler_specific.h"
    109 #include "base/hash_tables.h"
    110 #include "base/string16.h"
    111 #include "base/string_piece.h"  // For implicit conversions.
    112 #include "build/build_config.h"
    113 
    114 // Windows-style drive letter support and pathname separator characters can be
    115 // enabled and disabled independently, to aid testing.  These #defines are
    116 // here so that the same setting can be used in both the implementation and
    117 // in the unit test.
    118 #if defined(OS_WIN)
    119 #define FILE_PATH_USES_DRIVE_LETTERS
    120 #define FILE_PATH_USES_WIN_SEPARATORS
    121 #endif  // OS_WIN
    122 
    123 class Pickle;
    124 
    125 // An abstraction to isolate users from the differences between native
    126 // pathnames on different platforms.
    127 class BASE_API FilePath {
    128  public:
    129 #if defined(OS_POSIX)
    130   // On most platforms, native pathnames are char arrays, and the encoding
    131   // may or may not be specified.  On Mac OS X, native pathnames are encoded
    132   // in UTF-8.
    133   typedef std::string StringType;
    134 #elif defined(OS_WIN)
    135   // On Windows, for Unicode-aware applications, native pathnames are wchar_t
    136   // arrays encoded in UTF-16.
    137   typedef std::wstring StringType;
    138 #endif  // OS_WIN
    139 
    140   typedef StringType::value_type CharType;
    141 
    142   // Null-terminated array of separators used to separate components in
    143   // hierarchical paths.  Each character in this array is a valid separator,
    144   // but kSeparators[0] is treated as the canonical separator and will be used
    145   // when composing pathnames.
    146   static const CharType kSeparators[];
    147 
    148   // A special path component meaning "this directory."
    149   static const CharType kCurrentDirectory[];
    150 
    151   // A special path component meaning "the parent directory."
    152   static const CharType kParentDirectory[];
    153 
    154   // The character used to identify a file extension.
    155   static const CharType kExtensionSeparator;
    156 
    157   FilePath();
    158   FilePath(const FilePath& that);
    159   explicit FilePath(const StringType& path);
    160   ~FilePath();
    161   FilePath& operator=(const FilePath& that);
    162 
    163   bool operator==(const FilePath& that) const;
    164 
    165   bool operator!=(const FilePath& that) const;
    166 
    167   // Required for some STL containers and operations
    168   bool operator<(const FilePath& that) const {
    169     return path_ < that.path_;
    170   }
    171 
    172   const StringType& value() const { return path_; }
    173 
    174   bool empty() const { return path_.empty(); }
    175 
    176   void clear() { path_.clear(); }
    177 
    178   // Returns true if |character| is in kSeparators.
    179   static bool IsSeparator(CharType character);
    180 
    181   // Returns a vector of all of the components of the provided path. It is
    182   // equivalent to calling DirName().value() on the path's root component,
    183   // and BaseName().value() on each child component.
    184   void GetComponents(std::vector<FilePath::StringType>* components) const;
    185 
    186   // Returns true if this FilePath is a strict parent of the |child|. Absolute
    187   // and relative paths are accepted i.e. is /foo parent to /foo/bar and
    188   // is foo parent to foo/bar. Does not convert paths to absolute, follow
    189   // symlinks or directory navigation (e.g. ".."). A path is *NOT* its own
    190   // parent.
    191   bool IsParent(const FilePath& child) const;
    192 
    193   // If IsParent(child) holds, appends to path (if non-NULL) the
    194   // relative path to child and returns true.  For example, if parent
    195   // holds "/Users/johndoe/Library/Application Support", child holds
    196   // "/Users/johndoe/Library/Application Support/Google/Chrome/Default", and
    197   // *path holds "/Users/johndoe/Library/Caches", then after
    198   // parent.AppendRelativePath(child, path) is called *path will hold
    199   // "/Users/johndoe/Library/Caches/Google/Chrome/Default".  Otherwise,
    200   // returns false.
    201   bool AppendRelativePath(const FilePath& child, FilePath* path) const;
    202 
    203   // Returns a FilePath corresponding to the directory containing the path
    204   // named by this object, stripping away the file component.  If this object
    205   // only contains one component, returns a FilePath identifying
    206   // kCurrentDirectory.  If this object already refers to the root directory,
    207   // returns a FilePath identifying the root directory.
    208   FilePath DirName() const;
    209 
    210   // Returns a FilePath corresponding to the last path component of this
    211   // object, either a file or a directory.  If this object already refers to
    212   // the root directory, returns a FilePath identifying the root directory;
    213   // this is the only situation in which BaseName will return an absolute path.
    214   FilePath BaseName() const;
    215 
    216   // Returns ".jpg" for path "C:\pics\jojo.jpg", or an empty string if
    217   // the file has no extension.  If non-empty, Extension() will always start
    218   // with precisely one ".".  The following code should always work regardless
    219   // of the value of path.
    220   // new_path = path.RemoveExtension().value().append(path.Extension());
    221   // ASSERT(new_path == path.value());
    222   // NOTE: this is different from the original file_util implementation which
    223   // returned the extension without a leading "." ("jpg" instead of ".jpg")
    224   StringType Extension() const;
    225 
    226   // Returns "C:\pics\jojo" for path "C:\pics\jojo.jpg"
    227   // NOTE: this is slightly different from the similar file_util implementation
    228   // which returned simply 'jojo'.
    229   FilePath RemoveExtension() const;
    230 
    231   // Inserts |suffix| after the file name portion of |path| but before the
    232   // extension.  Returns "" if BaseName() == "." or "..".
    233   // Examples:
    234   // path == "C:\pics\jojo.jpg" suffix == " (1)", returns "C:\pics\jojo (1).jpg"
    235   // path == "jojo.jpg"         suffix == " (1)", returns "jojo (1).jpg"
    236   // path == "C:\pics\jojo"     suffix == " (1)", returns "C:\pics\jojo (1)"
    237   // path == "C:\pics.old\jojo" suffix == " (1)", returns "C:\pics.old\jojo (1)"
    238   FilePath InsertBeforeExtension(const StringType& suffix) const;
    239   FilePath InsertBeforeExtensionASCII(const base::StringPiece& suffix) const;
    240 
    241   // Replaces the extension of |file_name| with |extension|.  If |file_name|
    242   // does not have an extension, them |extension| is added.  If |extension| is
    243   // empty, then the extension is removed from |file_name|.
    244   // Returns "" if BaseName() == "." or "..".
    245   FilePath ReplaceExtension(const StringType& extension) const;
    246 
    247   // Returns true if the file path matches the specified extension. The test is
    248   // case insensitive. Don't forget the leading period if appropriate.
    249   bool MatchesExtension(const StringType& extension) const;
    250 
    251   // Returns a FilePath by appending a separator and the supplied path
    252   // component to this object's path.  Append takes care to avoid adding
    253   // excessive separators if this object's path already ends with a separator.
    254   // If this object's path is kCurrentDirectory, a new FilePath corresponding
    255   // only to |component| is returned.  |component| must be a relative path;
    256   // it is an error to pass an absolute path.
    257   FilePath Append(const StringType& component) const WARN_UNUSED_RESULT;
    258   FilePath Append(const FilePath& component) const WARN_UNUSED_RESULT;
    259 
    260   // Although Windows StringType is std::wstring, since the encoding it uses for
    261   // paths is well defined, it can handle ASCII path components as well.
    262   // Mac uses UTF8, and since ASCII is a subset of that, it works there as well.
    263   // On Linux, although it can use any 8-bit encoding for paths, we assume that
    264   // ASCII is a valid subset, regardless of the encoding, since many operating
    265   // system paths will always be ASCII.
    266   FilePath AppendASCII(const base::StringPiece& component)
    267       const WARN_UNUSED_RESULT;
    268 
    269   // Returns true if this FilePath contains an absolute path.  On Windows, an
    270   // absolute path begins with either a drive letter specification followed by
    271   // a separator character, or with two separator characters.  On POSIX
    272   // platforms, an absolute path begins with a separator character.
    273   bool IsAbsolute() const;
    274 
    275   // Returns a copy of this FilePath that does not end with a trailing
    276   // separator.
    277   FilePath StripTrailingSeparators() const;
    278 
    279   // Returns true if this FilePath contains any attempt to reference a parent
    280   // directory (i.e. has a path component that is ".."
    281   bool ReferencesParent() const;
    282 
    283   // Return a Unicode human-readable version of this path.
    284   // Warning: you can *not*, in general, go from a display name back to a real
    285   // path.  Only use this when displaying paths to users, not just when you
    286   // want to stuff a string16 into some other API.
    287   string16 LossyDisplayName() const;
    288 
    289   // Return the path as ASCII, or the empty string if the path is not ASCII.
    290   // This should only be used for cases where the FilePath is representing a
    291   // known-ASCII filename.
    292   std::string MaybeAsASCII() const;
    293 
    294   // Older Chromium code assumes that paths are always wstrings.
    295   // This function converts wstrings to FilePaths, and is
    296   // useful to smooth porting that old code to the FilePath API.
    297   // It has "Hack" its name so people feel bad about using it.
    298   // http://code.google.com/p/chromium/issues/detail?id=24672
    299   //
    300   // If you are trying to be a good citizen and remove these, ask yourself:
    301   // - Am I interacting with other Chrome code that deals with files?  Then
    302   //   try to convert the API into using FilePath.
    303   // - Am I interacting with OS-native calls?  Then use value() to get at an
    304   //   OS-native string format.
    305   // - Am I using well-known file names, like "config.ini"?  Then use the
    306   //   ASCII functions (we require paths to always be supersets of ASCII).
    307   // - Am I displaying a string to the user in some UI?  Then use the
    308   //   LossyDisplayName() function, but keep in mind that you can't
    309   //   ever use the result of that again as a path.
    310   static FilePath FromWStringHack(const std::wstring& wstring);
    311 
    312   // Static helper method to write a StringType to a pickle.
    313   static void WriteStringTypeToPickle(Pickle* pickle,
    314                                       const FilePath::StringType& path);
    315   static bool ReadStringTypeFromPickle(Pickle* pickle, void** iter,
    316                                        FilePath::StringType* path);
    317 
    318   void WriteToPickle(Pickle* pickle);
    319   bool ReadFromPickle(Pickle* pickle, void** iter);
    320 
    321 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
    322   // Normalize all path separators to backslash.
    323   FilePath NormalizeWindowsPathSeparators() const;
    324 #endif
    325 
    326   // Compare two strings in the same way the file system does.
    327   // Note that these always ignore case, even on file systems that are case-
    328   // sensitive. If case-sensitive comparison is ever needed, add corresponding
    329   // methods here.
    330   // The methods are written as a static method so that they can also be used
    331   // on parts of a file path, e.g., just the extension.
    332   // CompareIgnoreCase() returns -1, 0 or 1 for less-than, equal-to and
    333   // greater-than respectively.
    334   static int CompareIgnoreCase(const StringType& string1,
    335                                const StringType& string2);
    336   static bool CompareEqualIgnoreCase(const StringType& string1,
    337                                      const StringType& string2) {
    338     return CompareIgnoreCase(string1, string2) == 0;
    339   }
    340   static bool CompareLessIgnoreCase(const StringType& string1,
    341                                     const StringType& string2) {
    342     return CompareIgnoreCase(string1, string2) < 0;
    343   }
    344 
    345 #if defined(OS_MACOSX)
    346   // Returns the string in the special canonical decomposed form as defined for
    347   // HFS, which is close to, but not quite, decomposition form D. See
    348   // http://developer.apple.com/mac/library/technotes/tn/tn1150.html#UnicodeSubtleties
    349   // for further comments.
    350   // Returns the epmty string if the conversion failed.
    351   static StringType GetHFSDecomposedForm(const FilePath::StringType& string);
    352 
    353   // Special UTF-8 version of FastUnicodeCompare. Cf:
    354   // http://developer.apple.com/mac/library/technotes/tn/tn1150.html#StringComparisonAlgorithm
    355   // IMPORTANT: The input strings must be in the special HFS decomposed form!
    356   // (cf. above GetHFSDecomposedForm method)
    357   static int HFSFastUnicodeCompare(const StringType& string1,
    358                                    const StringType& string2);
    359 #endif
    360 
    361  private:
    362   // Remove trailing separators from this object.  If the path is absolute, it
    363   // will never be stripped any more than to refer to the absolute root
    364   // directory, so "////" will become "/", not "".  A leading pair of
    365   // separators is never stripped, to support alternate roots.  This is used to
    366   // support UNC paths on Windows.
    367   void StripTrailingSeparatorsInternal();
    368 
    369   StringType path_;
    370 };
    371 
    372 // Macros for string literal initialization of FilePath::CharType[], and for
    373 // using a FilePath::CharType[] in a printf-style format string.
    374 #if defined(OS_POSIX)
    375 #define FILE_PATH_LITERAL(x) x
    376 #define PRFilePath "s"
    377 #define PRFilePathLiteral "%s"
    378 #elif defined(OS_WIN)
    379 #define FILE_PATH_LITERAL(x) L ## x
    380 #define PRFilePath "ls"
    381 #define PRFilePathLiteral L"%ls"
    382 #endif  // OS_WIN
    383 
    384 // Provide a hash function so that hash_sets and maps can contain FilePath
    385 // objects.
    386 #if defined(COMPILER_GCC)
    387 namespace __gnu_cxx {
    388 
    389 template<>
    390 struct hash<FilePath> {
    391   size_t operator()(const FilePath& f) const {
    392     return hash<FilePath::StringType>()(f.value());
    393   }
    394 };
    395 
    396 }  // namespace __gnu_cxx
    397 #elif defined(COMPILER_MSVC)
    398 namespace stdext {
    399 
    400 inline size_t hash_value(const FilePath& f) {
    401   return hash_value(f.value());
    402 }
    403 
    404 }  // namespace stdext
    405 #endif  // COMPILER
    406 
    407 #endif  // BASE_FILE_PATH_H_
    408