Home | History | Annotate | Download | only in util
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 #include "tensorflow/core/util/memmapped_file_system.h"
     16 
     17 #include "tensorflow/core/lib/core/errors.h"
     18 #include "tensorflow/core/lib/strings/str_util.h"
     19 #include "tensorflow/core/platform/protobuf.h"
     20 #include "tensorflow/core/util/memmapped_file_system.pb.h"
     21 
     22 namespace tensorflow {
     23 
     24 namespace {
     25 
     26 uint64 DecodeUint64LittleEndian(const uint8* buffer) {
     27   uint64 result = 0;
     28   for (int i = 0; i < static_cast<int>(sizeof(uint64)); ++i) {
     29     result |= static_cast<uint64>(buffer[i]) << (8 * i);
     30   }
     31   return result;
     32 }
     33 
     34 }  // namespace
     35 
     36 namespace {
     37 
     38 class ReadOnlyMemoryRegionFromMemmapped : public ReadOnlyMemoryRegion {
     39  public:
     40   ReadOnlyMemoryRegionFromMemmapped(const void* data, uint64 length)
     41       : data_(data), length_(length) {}
     42   ~ReadOnlyMemoryRegionFromMemmapped() override = default;
     43   const void* data() override { return data_; }
     44   uint64 length() override { return length_; }
     45 
     46  private:
     47   const void* const data_;
     48   const uint64 length_;
     49   // intentionally copyable
     50 };
     51 
     52 class RandomAccessFileFromMemmapped : public RandomAccessFile {
     53  public:
     54   RandomAccessFileFromMemmapped(const void* data, uint64 length)
     55       : data_(data), length_(length) {}
     56 
     57   ~RandomAccessFileFromMemmapped() override = default;
     58 
     59   Status Name(StringPiece* result) const override {
     60     return errors::Unimplemented(
     61         "RandomAccessFileFromMemmapped does not support Name()");
     62   }
     63 
     64   Status Read(uint64 offset, size_t to_read, StringPiece* result,
     65               char* scratch) const override {
     66     if (offset >= length_) {
     67       *result = StringPiece(scratch, 0);
     68       return Status(error::OUT_OF_RANGE, "Read after file end");
     69     }
     70     const uint64 region_left =
     71         std::min(length_ - offset, static_cast<uint64>(to_read));
     72     *result =
     73         StringPiece(reinterpret_cast<const char*>(data_) + offset, region_left);
     74     return (region_left == to_read)
     75                ? Status::OK()
     76                : Status(error::OUT_OF_RANGE, "Read less bytes than requested");
     77   }
     78 
     79  private:
     80   const void* const data_;
     81   const uint64 length_;
     82   // intentionally copyable
     83 };
     84 
     85 }  // namespace
     86 
     87 MemmappedFileSystem::MemmappedFileSystem() {}
     88 
     89 Status MemmappedFileSystem::FileExists(const string& fname) {
     90   if (!mapped_memory_) {
     91     return errors::FailedPrecondition("MemmappedEnv is not initialized");
     92   }
     93   const auto dir_element = directory_.find(fname);
     94   if (dir_element != directory_.end()) {
     95     return Status::OK();
     96   }
     97   return errors::NotFound(fname, " not found");
     98 }
     99 
    100 Status MemmappedFileSystem::NewRandomAccessFile(
    101     const string& filename, std::unique_ptr<RandomAccessFile>* result) {
    102   if (!mapped_memory_) {
    103     return errors::FailedPrecondition("MemmappedEnv is not initialized");
    104   }
    105   const auto dir_element = directory_.find(filename);
    106   if (dir_element == directory_.end()) {
    107     return errors::NotFound("Region ", filename, " is not found");
    108   }
    109   result->reset(new RandomAccessFileFromMemmapped(
    110       GetMemoryWithOffset(dir_element->second.offset),
    111       dir_element->second.length));
    112   return Status::OK();
    113 }
    114 
    115 Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile(
    116     const string& filename, std::unique_ptr<ReadOnlyMemoryRegion>* result) {
    117   if (!mapped_memory_) {
    118     return errors::FailedPrecondition("MemmappedEnv is not initialized");
    119   }
    120   const auto dir_element = directory_.find(filename);
    121   if (dir_element == directory_.end()) {
    122     return errors::NotFound("Region ", filename, " is not found");
    123   }
    124   result->reset(new ReadOnlyMemoryRegionFromMemmapped(
    125       GetMemoryWithOffset(dir_element->second.offset),
    126       dir_element->second.length));
    127   return Status::OK();
    128 }
    129 
    130 Status MemmappedFileSystem::GetFileSize(const string& filename, uint64* size) {
    131   if (!mapped_memory_) {
    132     return errors::FailedPrecondition("MemmappedEnv is not initialized");
    133   }
    134   const auto dir_element = directory_.find(filename);
    135   if (dir_element == directory_.end()) {
    136     return errors::NotFound("Region ", filename, " is not found");
    137   }
    138   *size = dir_element->second.length;
    139   return Status::OK();
    140 }
    141 
    142 Status MemmappedFileSystem::Stat(const string& fname, FileStatistics* stat) {
    143   uint64 size;
    144   auto status = GetFileSize(fname, &size);
    145   if (status.ok()) {
    146     stat->length = size;
    147   }
    148   return status;
    149 }
    150 
    151 Status MemmappedFileSystem::NewWritableFile(const string& filename,
    152                                             std::unique_ptr<WritableFile>* wf) {
    153   return errors::Unimplemented("memmapped format doesn't support writing");
    154 }
    155 
    156 Status MemmappedFileSystem::NewAppendableFile(
    157     const string& filename, std::unique_ptr<WritableFile>* result) {
    158   return errors::Unimplemented("memmapped format doesn't support writing");
    159 }
    160 
    161 Status MemmappedFileSystem::GetChildren(const string& filename,
    162                                         std::vector<string>* strings) {
    163   return errors::Unimplemented("memmapped format doesn't support GetChildren");
    164 }
    165 
    166 Status MemmappedFileSystem::GetMatchingPaths(const string& pattern,
    167                                              std::vector<string>* results) {
    168   return errors::Unimplemented(
    169       "memmapped format doesn't support GetMatchingPaths");
    170 }
    171 
    172 Status MemmappedFileSystem::DeleteFile(const string& filename) {
    173   return errors::Unimplemented("memmapped format doesn't support DeleteFile");
    174 }
    175 
    176 Status MemmappedFileSystem::CreateDir(const string& dirname) {
    177   return errors::Unimplemented("memmapped format doesn't support CreateDir");
    178 }
    179 
    180 Status MemmappedFileSystem::DeleteDir(const string& dirname) {
    181   return errors::Unimplemented("memmapped format doesn't support DeleteDir");
    182 }
    183 
    184 Status MemmappedFileSystem::RenameFile(const string& filename_from,
    185                                        const string& filename_to) {
    186   return errors::Unimplemented("memmapped format doesn't support RenameFile");
    187 }
    188 
    189 const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const {
    190   return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset;
    191 }
    192 
    193 #if defined(_MSC_VER)
    194 constexpr char* MemmappedFileSystem::kMemmappedPackagePrefix;
    195 constexpr char* MemmappedFileSystem::kMemmappedPackageDefaultGraphDef;
    196 #else
    197 constexpr char MemmappedFileSystem::kMemmappedPackagePrefix[];
    198 constexpr char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[];
    199 #endif
    200 
    201 Status MemmappedFileSystem::InitializeFromFile(Env* env,
    202                                                const string& filename) {
    203   TF_RETURN_IF_ERROR(
    204       env->NewReadOnlyMemoryRegionFromFile(filename, &mapped_memory_));
    205   directory_.clear();
    206   if (mapped_memory_->length() <= sizeof(uint64)) {
    207     return errors::DataLoss("Corrupted memmapped model file: ", filename,
    208                             " Invalid package size");
    209   }
    210   const auto memory_start =
    211       reinterpret_cast<const uint8*>(mapped_memory_->data());
    212   const uint64 directory_offset = DecodeUint64LittleEndian(
    213       memory_start + mapped_memory_->length() - sizeof(uint64));
    214   if (directory_offset > mapped_memory_->length() - sizeof(uint64)) {
    215     return errors::DataLoss("Corrupted memmapped model file: ", filename,
    216                             " Invalid directory offset");
    217   }
    218   MemmappedFileSystemDirectory proto_directory;
    219   if (!ParseProtoUnlimited(
    220           &proto_directory, memory_start + directory_offset,
    221           mapped_memory_->length() - directory_offset - sizeof(uint64))) {
    222     return errors::DataLoss("Corrupted memmapped model file: ", filename,
    223                             " Can't parse its internal directory");
    224   }
    225 
    226   // Iterating in reverse order to get lengths of elements;
    227   uint64 prev_element_offset = directory_offset;
    228   for (auto element_iter = proto_directory.element().rbegin();
    229        element_iter != proto_directory.element().rend(); ++element_iter) {
    230     // Check that the element offset is in the right range.
    231     if (element_iter->offset() >= prev_element_offset) {
    232       return errors::DataLoss("Corrupted memmapped model file: ", filename,
    233                               " Invalid offset of internal component");
    234     }
    235     if (!directory_
    236              .insert(std::make_pair(
    237                  element_iter->name(),
    238                  FileRegion(element_iter->offset(),
    239                             prev_element_offset - element_iter->offset())))
    240              .second) {
    241       return errors::DataLoss("Corrupted memmapped model file: ", filename,
    242                               " Duplicate name of internal component ",
    243                               element_iter->name());
    244     }
    245     prev_element_offset = element_iter->offset();
    246   }
    247   return Status::OK();
    248 }
    249 
    250 bool MemmappedFileSystem::IsMemmappedPackageFilename(const string& filename) {
    251   return str_util::StartsWith(filename, kMemmappedPackagePrefix);
    252 }
    253 
    254 namespace {
    255 bool IsValidRegionChar(char c) {
    256   return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
    257          (c >= '0' && c <= '9') || c == '_' || c == '.';
    258 }
    259 }  // namespace
    260 
    261 bool MemmappedFileSystem::IsWellFormedMemmappedPackageFilename(
    262     const string& filename) {
    263   if (!IsMemmappedPackageFilename(filename)) {
    264     return false;
    265   }
    266   for (char c :
    267        filename.substr(strlen(kMemmappedPackagePrefix),
    268                        filename.length() - strlen(kMemmappedPackagePrefix))) {
    269     if (!IsValidRegionChar(c)) {
    270       return false;
    271     }
    272   }
    273   return true;
    274 }
    275 
    276 MemmappedEnv::MemmappedEnv(Env* env) : EnvWrapper(env) {}
    277 
    278 Status MemmappedEnv::GetFileSystemForFile(const string& fname,
    279                                           FileSystem** result) {
    280   if (MemmappedFileSystem::IsMemmappedPackageFilename(fname)) {
    281     if (!memmapped_file_system_) {
    282       return errors::FailedPrecondition(
    283           "MemmappedEnv is not initialized from a file.");
    284     }
    285     *result = memmapped_file_system_.get();
    286     return Status::OK();
    287   }
    288   return EnvWrapper::GetFileSystemForFile(fname, result);
    289 }
    290 
    291 Status MemmappedEnv::GetRegisteredFileSystemSchemes(
    292     std::vector<string>* schemes) {
    293   const auto status = EnvWrapper::GetRegisteredFileSystemSchemes(schemes);
    294   if (status.ok()) {
    295     schemes->emplace_back(MemmappedFileSystem::kMemmappedPackagePrefix);
    296   }
    297   return status;
    298 }
    299 
    300 Status MemmappedEnv::InitializeFromFile(const string& package_filename) {
    301   std::unique_ptr<MemmappedFileSystem> file_system_ptr(new MemmappedFileSystem);
    302   const auto status =
    303       file_system_ptr->InitializeFromFile(target(), package_filename);
    304   if (status.ok()) {
    305     memmapped_file_system_ = std::move(file_system_ptr);
    306   }
    307   return status;
    308 }
    309 
    310 }  // namespace tensorflow
    311