1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #include "tensorflow/core/util/memmapped_file_system.h" 16 17 #include "tensorflow/core/lib/core/errors.h" 18 #include "tensorflow/core/lib/strings/str_util.h" 19 #include "tensorflow/core/platform/protobuf.h" 20 #include "tensorflow/core/util/memmapped_file_system.pb.h" 21 22 namespace tensorflow { 23 24 namespace { 25 26 uint64 DecodeUint64LittleEndian(const uint8* buffer) { 27 uint64 result = 0; 28 for (int i = 0; i < static_cast<int>(sizeof(uint64)); ++i) { 29 result |= static_cast<uint64>(buffer[i]) << (8 * i); 30 } 31 return result; 32 } 33 34 } // namespace 35 36 namespace { 37 38 class ReadOnlyMemoryRegionFromMemmapped : public ReadOnlyMemoryRegion { 39 public: 40 ReadOnlyMemoryRegionFromMemmapped(const void* data, uint64 length) 41 : data_(data), length_(length) {} 42 ~ReadOnlyMemoryRegionFromMemmapped() override = default; 43 const void* data() override { return data_; } 44 uint64 length() override { return length_; } 45 46 private: 47 const void* const data_; 48 const uint64 length_; 49 // intentionally copyable 50 }; 51 52 class RandomAccessFileFromMemmapped : public RandomAccessFile { 53 public: 54 RandomAccessFileFromMemmapped(const void* data, uint64 length) 55 : data_(data), length_(length) {} 56 57 ~RandomAccessFileFromMemmapped() override = default; 58 59 Status Name(StringPiece* result) const override { 60 return errors::Unimplemented( 61 "RandomAccessFileFromMemmapped does not support Name()"); 62 } 63 64 Status Read(uint64 offset, size_t to_read, StringPiece* result, 65 char* scratch) const override { 66 if (offset >= length_) { 67 *result = StringPiece(scratch, 0); 68 return Status(error::OUT_OF_RANGE, "Read after file end"); 69 } 70 const uint64 region_left = 71 std::min(length_ - offset, static_cast<uint64>(to_read)); 72 *result = 73 StringPiece(reinterpret_cast<const char*>(data_) + offset, region_left); 74 return (region_left == to_read) 75 ? Status::OK() 76 : Status(error::OUT_OF_RANGE, "Read less bytes than requested"); 77 } 78 79 private: 80 const void* const data_; 81 const uint64 length_; 82 // intentionally copyable 83 }; 84 85 } // namespace 86 87 MemmappedFileSystem::MemmappedFileSystem() {} 88 89 Status MemmappedFileSystem::FileExists(const string& fname) { 90 if (!mapped_memory_) { 91 return errors::FailedPrecondition("MemmappedEnv is not initialized"); 92 } 93 const auto dir_element = directory_.find(fname); 94 if (dir_element != directory_.end()) { 95 return Status::OK(); 96 } 97 return errors::NotFound(fname, " not found"); 98 } 99 100 Status MemmappedFileSystem::NewRandomAccessFile( 101 const string& filename, std::unique_ptr<RandomAccessFile>* result) { 102 if (!mapped_memory_) { 103 return errors::FailedPrecondition("MemmappedEnv is not initialized"); 104 } 105 const auto dir_element = directory_.find(filename); 106 if (dir_element == directory_.end()) { 107 return errors::NotFound("Region ", filename, " is not found"); 108 } 109 result->reset(new RandomAccessFileFromMemmapped( 110 GetMemoryWithOffset(dir_element->second.offset), 111 dir_element->second.length)); 112 return Status::OK(); 113 } 114 115 Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile( 116 const string& filename, std::unique_ptr<ReadOnlyMemoryRegion>* result) { 117 if (!mapped_memory_) { 118 return errors::FailedPrecondition("MemmappedEnv is not initialized"); 119 } 120 const auto dir_element = directory_.find(filename); 121 if (dir_element == directory_.end()) { 122 return errors::NotFound("Region ", filename, " is not found"); 123 } 124 result->reset(new ReadOnlyMemoryRegionFromMemmapped( 125 GetMemoryWithOffset(dir_element->second.offset), 126 dir_element->second.length)); 127 return Status::OK(); 128 } 129 130 Status MemmappedFileSystem::GetFileSize(const string& filename, uint64* size) { 131 if (!mapped_memory_) { 132 return errors::FailedPrecondition("MemmappedEnv is not initialized"); 133 } 134 const auto dir_element = directory_.find(filename); 135 if (dir_element == directory_.end()) { 136 return errors::NotFound("Region ", filename, " is not found"); 137 } 138 *size = dir_element->second.length; 139 return Status::OK(); 140 } 141 142 Status MemmappedFileSystem::Stat(const string& fname, FileStatistics* stat) { 143 uint64 size; 144 auto status = GetFileSize(fname, &size); 145 if (status.ok()) { 146 stat->length = size; 147 } 148 return status; 149 } 150 151 Status MemmappedFileSystem::NewWritableFile(const string& filename, 152 std::unique_ptr<WritableFile>* wf) { 153 return errors::Unimplemented("memmapped format doesn't support writing"); 154 } 155 156 Status MemmappedFileSystem::NewAppendableFile( 157 const string& filename, std::unique_ptr<WritableFile>* result) { 158 return errors::Unimplemented("memmapped format doesn't support writing"); 159 } 160 161 Status MemmappedFileSystem::GetChildren(const string& filename, 162 std::vector<string>* strings) { 163 return errors::Unimplemented("memmapped format doesn't support GetChildren"); 164 } 165 166 Status MemmappedFileSystem::GetMatchingPaths(const string& pattern, 167 std::vector<string>* results) { 168 return errors::Unimplemented( 169 "memmapped format doesn't support GetMatchingPaths"); 170 } 171 172 Status MemmappedFileSystem::DeleteFile(const string& filename) { 173 return errors::Unimplemented("memmapped format doesn't support DeleteFile"); 174 } 175 176 Status MemmappedFileSystem::CreateDir(const string& dirname) { 177 return errors::Unimplemented("memmapped format doesn't support CreateDir"); 178 } 179 180 Status MemmappedFileSystem::DeleteDir(const string& dirname) { 181 return errors::Unimplemented("memmapped format doesn't support DeleteDir"); 182 } 183 184 Status MemmappedFileSystem::RenameFile(const string& filename_from, 185 const string& filename_to) { 186 return errors::Unimplemented("memmapped format doesn't support RenameFile"); 187 } 188 189 const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const { 190 return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset; 191 } 192 193 #if defined(_MSC_VER) 194 constexpr char* MemmappedFileSystem::kMemmappedPackagePrefix; 195 constexpr char* MemmappedFileSystem::kMemmappedPackageDefaultGraphDef; 196 #else 197 constexpr char MemmappedFileSystem::kMemmappedPackagePrefix[]; 198 constexpr char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[]; 199 #endif 200 201 Status MemmappedFileSystem::InitializeFromFile(Env* env, 202 const string& filename) { 203 TF_RETURN_IF_ERROR( 204 env->NewReadOnlyMemoryRegionFromFile(filename, &mapped_memory_)); 205 directory_.clear(); 206 if (mapped_memory_->length() <= sizeof(uint64)) { 207 return errors::DataLoss("Corrupted memmapped model file: ", filename, 208 " Invalid package size"); 209 } 210 const auto memory_start = 211 reinterpret_cast<const uint8*>(mapped_memory_->data()); 212 const uint64 directory_offset = DecodeUint64LittleEndian( 213 memory_start + mapped_memory_->length() - sizeof(uint64)); 214 if (directory_offset > mapped_memory_->length() - sizeof(uint64)) { 215 return errors::DataLoss("Corrupted memmapped model file: ", filename, 216 " Invalid directory offset"); 217 } 218 MemmappedFileSystemDirectory proto_directory; 219 if (!ParseProtoUnlimited( 220 &proto_directory, memory_start + directory_offset, 221 mapped_memory_->length() - directory_offset - sizeof(uint64))) { 222 return errors::DataLoss("Corrupted memmapped model file: ", filename, 223 " Can't parse its internal directory"); 224 } 225 226 // Iterating in reverse order to get lengths of elements; 227 uint64 prev_element_offset = directory_offset; 228 for (auto element_iter = proto_directory.element().rbegin(); 229 element_iter != proto_directory.element().rend(); ++element_iter) { 230 // Check that the element offset is in the right range. 231 if (element_iter->offset() >= prev_element_offset) { 232 return errors::DataLoss("Corrupted memmapped model file: ", filename, 233 " Invalid offset of internal component"); 234 } 235 if (!directory_ 236 .insert(std::make_pair( 237 element_iter->name(), 238 FileRegion(element_iter->offset(), 239 prev_element_offset - element_iter->offset()))) 240 .second) { 241 return errors::DataLoss("Corrupted memmapped model file: ", filename, 242 " Duplicate name of internal component ", 243 element_iter->name()); 244 } 245 prev_element_offset = element_iter->offset(); 246 } 247 return Status::OK(); 248 } 249 250 bool MemmappedFileSystem::IsMemmappedPackageFilename(const string& filename) { 251 return str_util::StartsWith(filename, kMemmappedPackagePrefix); 252 } 253 254 namespace { 255 bool IsValidRegionChar(char c) { 256 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || 257 (c >= '0' && c <= '9') || c == '_' || c == '.'; 258 } 259 } // namespace 260 261 bool MemmappedFileSystem::IsWellFormedMemmappedPackageFilename( 262 const string& filename) { 263 if (!IsMemmappedPackageFilename(filename)) { 264 return false; 265 } 266 for (char c : 267 filename.substr(strlen(kMemmappedPackagePrefix), 268 filename.length() - strlen(kMemmappedPackagePrefix))) { 269 if (!IsValidRegionChar(c)) { 270 return false; 271 } 272 } 273 return true; 274 } 275 276 MemmappedEnv::MemmappedEnv(Env* env) : EnvWrapper(env) {} 277 278 Status MemmappedEnv::GetFileSystemForFile(const string& fname, 279 FileSystem** result) { 280 if (MemmappedFileSystem::IsMemmappedPackageFilename(fname)) { 281 if (!memmapped_file_system_) { 282 return errors::FailedPrecondition( 283 "MemmappedEnv is not initialized from a file."); 284 } 285 *result = memmapped_file_system_.get(); 286 return Status::OK(); 287 } 288 return EnvWrapper::GetFileSystemForFile(fname, result); 289 } 290 291 Status MemmappedEnv::GetRegisteredFileSystemSchemes( 292 std::vector<string>* schemes) { 293 const auto status = EnvWrapper::GetRegisteredFileSystemSchemes(schemes); 294 if (status.ok()) { 295 schemes->emplace_back(MemmappedFileSystem::kMemmappedPackagePrefix); 296 } 297 return status; 298 } 299 300 Status MemmappedEnv::InitializeFromFile(const string& package_filename) { 301 std::unique_ptr<MemmappedFileSystem> file_system_ptr(new MemmappedFileSystem); 302 const auto status = 303 file_system_ptr->InitializeFromFile(target(), package_filename); 304 if (status.ok()) { 305 memmapped_file_system_ = std::move(file_system_ptr); 306 } 307 return status; 308 } 309 310 } // namespace tensorflow 311