Home | History | Annotate | Download | only in io
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/lib/io/path.h"
     17 
     18 #include <errno.h>
     19 #include <fcntl.h>
     20 #include <stdlib.h>
     21 #include <sys/stat.h>
     22 #include <sys/types.h>
     23 #if !defined(PLATFORM_WINDOWS)
     24 #include <unistd.h>
     25 #endif
     26 
     27 #include <vector>
     28 
     29 #include "tensorflow/core/lib/strings/scanner.h"
     30 #include "tensorflow/core/lib/strings/str_util.h"
     31 #include "tensorflow/core/lib/strings/strcat.h"
     32 #include "tensorflow/core/platform/env.h"
     33 
     34 namespace tensorflow {
     35 namespace io {
     36 namespace internal {
     37 
     38 string JoinPathImpl(std::initializer_list<StringPiece> paths) {
     39   string result;
     40 
     41   for (StringPiece path : paths) {
     42     if (path.empty()) continue;
     43 
     44     if (result.empty()) {
     45       result = path.ToString();
     46       continue;
     47     }
     48 
     49     if (result[result.size() - 1] == '/') {
     50       if (IsAbsolutePath(path)) {
     51         strings::StrAppend(&result, path.substr(1));
     52       } else {
     53         strings::StrAppend(&result, path);
     54       }
     55     } else {
     56       if (IsAbsolutePath(path)) {
     57         strings::StrAppend(&result, path);
     58       } else {
     59         strings::StrAppend(&result, "/", path);
     60       }
     61     }
     62   }
     63 
     64   return result;
     65 }
     66 
     67 // Return the parts of the URI, split on the final "/" in the path. If there is
     68 // no "/" in the path, the first part of the output is the scheme and host, and
     69 // the second is the path. If the only "/" in the path is the first character,
     70 // it is included in the first part of the output.
     71 std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
     72   StringPiece scheme, host, path;
     73   ParseURI(uri, &scheme, &host, &path);
     74 
     75   auto pos = path.rfind('/');
     76 #ifdef PLATFORM_WINDOWS
     77   if (pos == StringPiece::npos) pos = path.rfind('\\');
     78 #endif
     79   // Handle the case with no '/' in 'path'.
     80   if (pos == StringPiece::npos)
     81     return std::make_pair(StringPiece(uri.begin(), host.end() - uri.begin()),
     82                           path);
     83 
     84   // Handle the case with a single leading '/' in 'path'.
     85   if (pos == 0)
     86     return std::make_pair(
     87         StringPiece(uri.begin(), path.begin() + 1 - uri.begin()),
     88         StringPiece(path.data() + 1, path.size() - 1));
     89 
     90   return std::make_pair(
     91       StringPiece(uri.begin(), path.begin() + pos - uri.begin()),
     92       StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
     93 }
     94 
     95 // Return the parts of the basename of path, split on the final ".".
     96 // If there is no "." in the basename or "." is the final character in the
     97 // basename, the second value will be empty.
     98 std::pair<StringPiece, StringPiece> SplitBasename(StringPiece path) {
     99   path = Basename(path);
    100 
    101   auto pos = path.rfind('.');
    102   if (pos == StringPiece::npos)
    103     return std::make_pair(path, StringPiece(path.data() + path.size(), 0));
    104   return std::make_pair(
    105       StringPiece(path.data(), pos),
    106       StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
    107 }
    108 }  // namespace internal
    109 
    110 bool IsAbsolutePath(StringPiece path) {
    111   return !path.empty() && path[0] == '/';
    112 }
    113 
    114 StringPiece Dirname(StringPiece path) {
    115   return internal::SplitPath(path).first;
    116 }
    117 
    118 StringPiece Basename(StringPiece path) {
    119   return internal::SplitPath(path).second;
    120 }
    121 
    122 StringPiece Extension(StringPiece path) {
    123   return internal::SplitBasename(path).second;
    124 }
    125 
    126 string CleanPath(StringPiece unclean_path) {
    127   string path = unclean_path.ToString();
    128   const char* src = path.c_str();
    129   string::iterator dst = path.begin();
    130 
    131   // Check for absolute path and determine initial backtrack limit.
    132   const bool is_absolute_path = *src == '/';
    133   if (is_absolute_path) {
    134     *dst++ = *src++;
    135     while (*src == '/') ++src;
    136   }
    137   string::const_iterator backtrack_limit = dst;
    138 
    139   // Process all parts
    140   while (*src) {
    141     bool parsed = false;
    142 
    143     if (src[0] == '.') {
    144       //  1dot ".<whateverisnext>", check for END or SEP.
    145       if (src[1] == '/' || !src[1]) {
    146         if (*++src) {
    147           ++src;
    148         }
    149         parsed = true;
    150       } else if (src[1] == '.' && (src[2] == '/' || !src[2])) {
    151         // 2dot END or SEP (".." | "../<whateverisnext>").
    152         src += 2;
    153         if (dst != backtrack_limit) {
    154           // We can backtrack the previous part
    155           for (--dst; dst != backtrack_limit && dst[-1] != '/'; --dst) {
    156             // Empty.
    157           }
    158         } else if (!is_absolute_path) {
    159           // Failed to backtrack and we can't skip it either. Rewind and copy.
    160           src -= 2;
    161           *dst++ = *src++;
    162           *dst++ = *src++;
    163           if (*src) {
    164             *dst++ = *src;
    165           }
    166           // We can never backtrack over a copied "../" part so set new limit.
    167           backtrack_limit = dst;
    168         }
    169         if (*src) {
    170           ++src;
    171         }
    172         parsed = true;
    173       }
    174     }
    175 
    176     // If not parsed, copy entire part until the next SEP or EOS.
    177     if (!parsed) {
    178       while (*src && *src != '/') {
    179         *dst++ = *src++;
    180       }
    181       if (*src) {
    182         *dst++ = *src++;
    183       }
    184     }
    185 
    186     // Skip consecutive SEP occurrences
    187     while (*src == '/') {
    188       ++src;
    189     }
    190   }
    191 
    192   // Calculate and check the length of the cleaned path.
    193   string::difference_type path_length = dst - path.begin();
    194   if (path_length != 0) {
    195     // Remove trailing '/' except if it is root path ("/" ==> path_length := 1)
    196     if (path_length > 1 && path[path_length - 1] == '/') {
    197       --path_length;
    198     }
    199     path.resize(path_length);
    200   } else {
    201     // The cleaned path is empty; assign "." as per the spec.
    202     path.assign(1, '.');
    203   }
    204   return path;
    205 }
    206 
    207 void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
    208               StringPiece* path) {
    209   // 0. Parse scheme
    210   // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
    211   // TODO(keveman): Allow "+" and "-" in the scheme.
    212   // Keep URI pattern in tensorboard/backend/server.py updated accordingly
    213   if (!strings::Scanner(remaining)
    214            .One(strings::Scanner::LETTER)
    215            .Many(strings::Scanner::LETTER_DIGIT_DOT)
    216            .StopCapture()
    217            .OneLiteral("://")
    218            .GetResult(&remaining, scheme)) {
    219     // If there's no scheme, assume the entire string is a path.
    220     *scheme = StringPiece(remaining.begin(), 0);
    221     *host = StringPiece(remaining.begin(), 0);
    222     *path = remaining;
    223     return;
    224   }
    225 
    226   // 1. Parse host
    227   if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
    228     // No path, so the rest of the URI is the host.
    229     *host = remaining;
    230     *path = StringPiece(remaining.end(), 0);
    231     return;
    232   }
    233 
    234   // 2. The rest is the path
    235   *path = remaining;
    236 }
    237 
    238 string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
    239   if (scheme.empty()) {
    240     return path.ToString();
    241   }
    242   return strings::StrCat(scheme, "://", host, path);
    243 }
    244 
    245 // Returns a unique number every time it is called.
    246 int64 UniqueId() {
    247   static mutex mu(LINKER_INITIALIZED);
    248   static int64 id = 0;
    249   mutex_lock l(mu);
    250   return ++id;
    251 }
    252 
    253 string GetTempFilename(const string& extension) {
    254 #if defined(PLATFORM_WINDOWS) || defined(__ANDROID__)
    255   LOG(FATAL) << "GetTempFilename is not implemented in this platform.";
    256 #else
    257   for (const char* dir : std::vector<const char*>(
    258            {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) {
    259     if (!dir || !dir[0]) {
    260       continue;
    261     }
    262     struct stat statbuf;
    263     if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) {
    264       // UniqueId is added here because mkstemps is not as thread safe as it
    265       // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows
    266       // the problem.
    267       string tmp_filepath;
    268       int fd;
    269       if (extension.length()) {
    270         tmp_filepath = io::JoinPath(
    271             dir, strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.",
    272                                  extension));
    273         fd = mkstemps(&tmp_filepath[0], extension.length() + 1);
    274       } else {
    275         tmp_filepath = io::JoinPath(
    276             dir,
    277             strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX"));
    278         fd = mkstemp(&tmp_filepath[0]);
    279       }
    280       if (fd < 0) {
    281         LOG(FATAL) << "Failed to create temp file.";
    282       } else {
    283         close(fd);
    284         return tmp_filepath;
    285       }
    286     }
    287   }
    288   LOG(FATAL) << "No temp directory found.";
    289 #endif
    290 }
    291 
    292 }  // namespace io
    293 }  // namespace tensorflow
    294