1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/util/events_writer.h" 17 18 #include <stddef.h> // for NULL 19 20 #include "tensorflow/core/lib/core/errors.h" 21 #include "tensorflow/core/lib/core/status.h" 22 #include "tensorflow/core/lib/io/path.h" 23 #include "tensorflow/core/lib/strings/strcat.h" 24 #include "tensorflow/core/lib/strings/stringprintf.h" 25 #include "tensorflow/core/platform/env.h" 26 #include "tensorflow/core/platform/host_info.h" 27 #include "tensorflow/core/platform/logging.h" 28 #include "tensorflow/core/platform/types.h" 29 #include "tensorflow/core/util/event.pb.h" 30 31 namespace tensorflow { 32 33 EventsWriter::EventsWriter(const string& file_prefix) 34 // TODO(jeff,sanjay): Pass in env and use that here instead of Env::Default 35 : env_(Env::Default()), 36 file_prefix_(file_prefix), 37 num_outstanding_events_(0) {} 38 39 EventsWriter::~EventsWriter() { 40 Close().IgnoreError(); // Autoclose in destructor. 41 } 42 43 Status EventsWriter::Init() { return InitWithSuffix(""); } 44 45 Status EventsWriter::InitWithSuffix(const string& suffix) { 46 file_suffix_ = suffix; 47 return InitIfNeeded(); 48 } 49 50 Status EventsWriter::InitIfNeeded() { 51 if (recordio_writer_ != nullptr) { 52 CHECK(!filename_.empty()); 53 if (!FileStillExists().ok()) { 54 // Warn user of data loss and let .reset() below do basic cleanup. 55 if (num_outstanding_events_ > 0) { 56 LOG(WARNING) << "Re-initialization, attempting to open a new file, " 57 << num_outstanding_events_ << " events will be lost."; 58 } 59 } else { 60 // No-op: File is present and writer is initialized. 61 return Status::OK(); 62 } 63 } 64 65 int64 time_in_seconds = env_->NowMicros() / 1000000; 66 67 filename_ = 68 strings::Printf("%s.out.tfevents.%010lld.%s%s", file_prefix_.c_str(), 69 static_cast<int64>(time_in_seconds), 70 port::Hostname().c_str(), file_suffix_.c_str()); 71 72 TF_RETURN_WITH_CONTEXT_IF_ERROR( 73 env_->NewWritableFile(filename_, &recordio_file_), 74 "Creating writable file ", filename_); 75 recordio_writer_.reset(new io::RecordWriter(recordio_file_.get())); 76 if (recordio_writer_ == nullptr) { 77 return errors::Unknown("Could not create record writer"); 78 } 79 num_outstanding_events_ = 0; 80 VLOG(1) << "Successfully opened events file: " << filename_; 81 { 82 // Write the first event with the current version, and flush 83 // right away so the file contents will be easily determined. 84 85 Event event; 86 event.set_wall_time(time_in_seconds); 87 event.set_file_version(strings::StrCat(kVersionPrefix, kCurrentVersion)); 88 WriteEvent(event); 89 TF_RETURN_WITH_CONTEXT_IF_ERROR(Flush(), "Flushing first event."); 90 } 91 return Status::OK(); 92 } 93 94 string EventsWriter::FileName() { 95 if (filename_.empty()) { 96 InitIfNeeded().IgnoreError(); 97 } 98 return filename_; 99 } 100 101 void EventsWriter::WriteSerializedEvent(StringPiece event_str) { 102 if (recordio_writer_ == nullptr) { 103 if (!InitIfNeeded().ok()) { 104 LOG(ERROR) << "Write failed because file could not be opened."; 105 return; 106 } 107 } 108 num_outstanding_events_++; 109 recordio_writer_->WriteRecord(event_str).IgnoreError(); 110 } 111 112 // NOTE(touts); This is NOT the function called by the Python code. 113 // Python calls WriteSerializedEvent(), see events_writer.i. 114 void EventsWriter::WriteEvent(const Event& event) { 115 string record; 116 event.AppendToString(&record); 117 WriteSerializedEvent(record); 118 } 119 120 Status EventsWriter::Flush() { 121 if (num_outstanding_events_ == 0) return Status::OK(); 122 CHECK(recordio_file_ != nullptr) << "Unexpected NULL file"; 123 124 TF_RETURN_WITH_CONTEXT_IF_ERROR(recordio_writer_->Flush(), "Failed to flush ", 125 num_outstanding_events_, " to ", filename_); 126 TF_RETURN_WITH_CONTEXT_IF_ERROR(recordio_file_->Sync(), "Failed to sync ", 127 num_outstanding_events_, " to ", filename_); 128 129 // The FileStillExists() condition is necessary because 130 // recordio_writer_->Sync() can return OK even if the underlying 131 // file has been deleted. EventWriter.FileDeletionBeforeWriting 132 // demonstrates this and will fail if the FileHasDisappeared() 133 // condition is removed. 134 // Also, we deliberately attempt to Sync() before checking for a 135 // disappearing file, in case for some file system File::Exists() is 136 // false after File::Open() but before File::Sync(). 137 TF_RETURN_WITH_CONTEXT_IF_ERROR(FileStillExists(), "Failed to flush ", 138 num_outstanding_events_, " to ", filename_); 139 VLOG(1) << "Wrote " << num_outstanding_events_ << " events to disk."; 140 num_outstanding_events_ = 0; 141 return Status::OK(); 142 } 143 144 Status EventsWriter::Close() { 145 Status status = Flush(); 146 if (recordio_file_ != nullptr) { 147 Status close_status = recordio_file_->Close(); 148 if (!close_status.ok()) { 149 status = close_status; 150 } 151 recordio_writer_.reset(nullptr); 152 recordio_file_.reset(nullptr); 153 } 154 num_outstanding_events_ = 0; 155 return status; 156 } 157 158 Status EventsWriter::FileStillExists() { 159 if (env_->FileExists(filename_).ok()) { 160 return Status::OK(); 161 } 162 // This can happen even with non-null recordio_writer_ if some other 163 // process has removed the file. 164 return errors::Unknown("The events file ", filename_, " has disappeared."); 165 } 166 167 } // namespace tensorflow 168