1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PLATFORM_GCS_FILE_SYSTEM_H_ 17 #define TENSORFLOW_CORE_PLATFORM_GCS_FILE_SYSTEM_H_ 18 19 #include <string> 20 #include <utility> 21 #include <vector> 22 23 #include "tensorflow/core/lib/core/status.h" 24 #include "tensorflow/core/platform/cloud/auth_provider.h" 25 #include "tensorflow/core/platform/cloud/expiring_lru_cache.h" 26 #include "tensorflow/core/platform/cloud/file_block_cache.h" 27 #include "tensorflow/core/platform/cloud/gcs_dns_cache.h" 28 #include "tensorflow/core/platform/cloud/gcs_throttle.h" 29 #include "tensorflow/core/platform/cloud/http_request.h" 30 #include "tensorflow/core/platform/cloud/retrying_file_system.h" 31 #include "tensorflow/core/platform/file_system.h" 32 33 namespace tensorflow { 34 35 /// Google Cloud Storage implementation of a file system. 36 /// 37 /// The clients should use RetryingGcsFileSystem defined below, 38 /// which adds retry logic to GCS operations. 39 class GcsFileSystem : public FileSystem { 40 public: 41 struct TimeoutConfig; 42 43 GcsFileSystem(); 44 GcsFileSystem(std::unique_ptr<AuthProvider> auth_provider, 45 std::unique_ptr<HttpRequest::Factory> http_request_factory, 46 size_t block_size, size_t max_bytes, uint64 max_staleness, 47 uint64 stat_cache_max_age, size_t stat_cache_max_entries, 48 uint64 matching_paths_cache_max_age, 49 size_t matching_paths_cache_max_entries, 50 int64 initial_retry_delay_usec, TimeoutConfig timeouts, 51 std::pair<const string, const string>* additional_header); 52 53 Status NewRandomAccessFile( 54 const string& filename, 55 std::unique_ptr<RandomAccessFile>* result) override; 56 57 Status NewWritableFile(const string& fname, 58 std::unique_ptr<WritableFile>* result) override; 59 60 Status NewAppendableFile(const string& fname, 61 std::unique_ptr<WritableFile>* result) override; 62 63 Status NewReadOnlyMemoryRegionFromFile( 64 const string& filename, 65 std::unique_ptr<ReadOnlyMemoryRegion>* result) override; 66 67 Status FileExists(const string& fname) override; 68 69 Status Stat(const string& fname, FileStatistics* stat) override; 70 71 Status GetChildren(const string& dir, std::vector<string>* result) override; 72 73 Status GetMatchingPaths(const string& pattern, 74 std::vector<string>* results) override; 75 76 Status DeleteFile(const string& fname) override; 77 78 Status CreateDir(const string& dirname) override; 79 80 Status DeleteDir(const string& dirname) override; 81 82 Status GetFileSize(const string& fname, uint64* file_size) override; 83 84 Status RenameFile(const string& src, const string& target) override; 85 86 Status IsDirectory(const string& fname) override; 87 88 Status DeleteRecursively(const string& dirname, int64* undeleted_files, 89 int64* undeleted_dirs) override; 90 91 void FlushCaches() override; 92 93 /// These accessors are mainly for testing purposes, to verify that the 94 /// environment variables that control these parameters are handled correctly. 95 size_t block_size() const { return file_block_cache_->block_size(); } 96 size_t max_bytes() const { return file_block_cache_->max_bytes(); } 97 uint64 max_staleness() const { return file_block_cache_->max_staleness(); } 98 TimeoutConfig timeouts() const { return timeouts_; } 99 string additional_header_name() const { 100 return additional_header_ ? additional_header_->first : ""; 101 } 102 string additional_header_value() const { 103 return additional_header_ ? additional_header_->second : ""; 104 } 105 106 uint64 stat_cache_max_age() const { return stat_cache_->max_age(); } 107 size_t stat_cache_max_entries() const { return stat_cache_->max_entries(); } 108 109 uint64 matching_paths_cache_max_age() const { 110 return matching_paths_cache_->max_age(); 111 } 112 size_t matching_paths_cache_max_entries() const { 113 return matching_paths_cache_->max_entries(); 114 } 115 116 /// Structure containing the information for timeouts related to accessing the 117 /// GCS APIs. 118 /// 119 /// All values are in seconds. 120 struct TimeoutConfig { 121 // The request connection timeout. If a connection cannot be established 122 // within `connect` seconds, abort the request. 123 uint32 connect = 120; // 2 minutes 124 125 // The request idle timeout. If a request has seen no activity in `idle` 126 // seconds, abort the request. 127 uint32 idle = 60; // 1 minute 128 129 // The maximum total time a metadata request can take. If a request has not 130 // completed within `metadata` seconds, the request is aborted. 131 uint32 metadata = 3600; // 1 hour 132 133 // The maximum total time a block read request can take. If a request has 134 // not completed within `read` seconds, the request is aborted. 135 uint32 read = 3600; // 1 hour 136 137 // The maximum total time an upload request can take. If a request has not 138 // completed within `write` seconds, the request is aborted. 139 uint32 write = 3600; // 1 hour 140 141 TimeoutConfig() {} 142 TimeoutConfig(uint32 connect, uint32 idle, uint32 metadata, uint32 read, 143 uint32 write) 144 : connect(connect), 145 idle(idle), 146 metadata(metadata), 147 read(read), 148 write(write) {} 149 }; 150 151 Status CreateHttpRequest(std::unique_ptr<HttpRequest>* request); 152 153 private: 154 /// \brief Checks if the bucket exists. Returns OK if the check succeeded. 155 /// 156 /// 'result' is set if the function returns OK. 'result' cannot be nullptr. 157 Status BucketExists(const string& bucket, bool* result); 158 159 /// \brief Checks if the object exists. Returns OK if the check succeeded. 160 /// 161 /// 'result' is set if the function returns OK. 'result' cannot be nullptr. 162 Status ObjectExists(const string& fname, const string& bucket, 163 const string& object, bool* result); 164 165 /// \brief Checks if the folder exists. Returns OK if the check succeeded. 166 /// 167 /// 'result' is set if the function returns OK. 'result' cannot be nullptr. 168 Status FolderExists(const string& dirname, bool* result); 169 170 /// \brief Internal version of GetChildren with more knobs. 171 /// 172 /// If 'recursively' is true, returns all objects in all subfolders. 173 /// Otherwise only returns the immediate children in the directory. 174 /// 175 /// If 'include_self_directory_marker' is true and there is a GCS directory 176 /// marker at the path 'dir', GetChildrenBound will return an empty string 177 /// as one of the children that represents this marker. 178 Status GetChildrenBounded(const string& dir, uint64 max_results, 179 std::vector<string>* result, bool recursively, 180 bool include_self_directory_marker); 181 /// Retrieves file statistics assuming fname points to a GCS object. 182 Status StatForObject(const string& fname, const string& bucket, 183 const string& object, FileStatistics* stat); 184 Status RenameObject(const string& src, const string& target); 185 186 std::unique_ptr<FileBlockCache> MakeFileBlockCache(size_t block_size, 187 size_t max_bytes, 188 uint64 max_staleness); 189 190 /// Loads file contents from GCS for a given filename, offset, and length. 191 Status LoadBufferFromGCS(const string& filename, size_t offset, size_t n, 192 char* buffer, size_t* bytes_transferred); 193 194 std::unique_ptr<AuthProvider> auth_provider_; 195 std::unique_ptr<HttpRequest::Factory> http_request_factory_; 196 std::unique_ptr<FileBlockCache> file_block_cache_; 197 std::unique_ptr<GcsDnsCache> dns_cache_; 198 GcsThrottle throttle_; 199 200 using StatCache = ExpiringLRUCache<FileStatistics>; 201 std::unique_ptr<StatCache> stat_cache_; 202 203 using MatchingPathsCache = ExpiringLRUCache<std::vector<string>>; 204 std::unique_ptr<MatchingPathsCache> matching_paths_cache_; 205 206 TimeoutConfig timeouts_; 207 208 /// The initial delay for exponential backoffs when retrying failed calls. 209 const int64 initial_retry_delay_usec_ = 1000000L; 210 211 // Additional header material to be transmitted with all GCS requests 212 std::unique_ptr<std::pair<const string, const string>> additional_header_; 213 214 TF_DISALLOW_COPY_AND_ASSIGN(GcsFileSystem); 215 }; 216 217 /// Google Cloud Storage implementation of a file system with retry on failures. 218 class RetryingGcsFileSystem : public RetryingFileSystem { 219 public: 220 RetryingGcsFileSystem() 221 : RetryingFileSystem(std::unique_ptr<FileSystem>(new GcsFileSystem)) {} 222 }; 223 224 } // namespace tensorflow 225 226 #endif // TENSORFLOW_CORE_PLATFORM_GCS_FILE_SYSTEM_H_ 227