Home | History | Annotate | Download | only in distributed_runtime
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_
     17 #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_
     18 
     19 #include <unordered_map>
     20 
     21 #include "tensorflow/core/common_runtime/device.h"
     22 #include "tensorflow/core/distributed_runtime/call_options.h"
     23 #include "tensorflow/core/distributed_runtime/master_env.h"
     24 #include "tensorflow/core/distributed_runtime/master_session.h"
     25 #include "tensorflow/core/lib/core/notification.h"
     26 #include "tensorflow/core/lib/gtl/map_util.h"
     27 #include "tensorflow/core/platform/macros.h"
     28 #include "tensorflow/core/platform/mutex.h"
     29 #include "tensorflow/core/platform/types.h"
     30 #include "tensorflow/core/protobuf/master.pb.h"
     31 #include "tensorflow/core/util/util.h"
     32 
     33 namespace tensorflow {
     34 
     35 class Master {
     36  public:
     37   explicit Master(MasterEnv* env, double session_gc_seconds);
     38   virtual ~Master();
     39 
     40   // Convenient typedef for a closure passing a Status.
     41   typedef std::function<void(const Status&)> MyClosure;
     42 
     43   void CreateSession(const CreateSessionRequest* req,
     44                      CreateSessionResponse* resp, MyClosure done);
     45 
     46   void ExtendSession(const ExtendSessionRequest* req,
     47                      ExtendSessionResponse* resp, MyClosure done);
     48 
     49   void PartialRunSetup(const PartialRunSetupRequest* req,
     50                        PartialRunSetupResponse* resp, MyClosure done);
     51 
     52   void RunStep(CallOptions* opts, const RunStepRequestWrapper* req,
     53                MutableRunStepResponseWrapper* resp, MyClosure done);
     54 
     55   void CloseSession(const CloseSessionRequest* req, CloseSessionResponse* resp,
     56                     MyClosure done);
     57 
     58   void ListDevices(const ListDevicesRequest* req, ListDevicesResponse* resp,
     59                    MyClosure done);
     60 
     61   // See tensorflow::Reset() and the comment on ResetRequest.
     62   void Reset(const ResetRequest* req, ResetResponse* resp, MyClosure done);
     63 
     64  private:
     65   typedef Master ME;
     66 
     67   // Not owned.
     68   MasterEnv* env_ = nullptr;
     69 
     70   // Owned.
     71   mutex mu_;
     72 
     73   // shutdown_ is set to true by the dtor.
     74   condition_variable shutdown_cv_;
     75   bool shutdown_ GUARDED_BY(mu_) = false;
     76   Thread* gc_thread_;
     77 
     78   // Maps session handles to sessions.
     79   std::unordered_map<string, MasterSession*> sessions_ GUARDED_BY(mu_);
     80 
     81   // Moving average of step times.
     82   MovingAverage last_1000_steps_ GUARDED_BY(mu_);
     83 
     84   // Cumulative number of steps executed.
     85   int64 step_count_ GUARDED_BY(mu_);
     86 
     87   // If a session is not active for this many seconds, it will be
     88   // closed automatically.
     89   const double session_gc_seconds_;
     90 
     91   // Call CleanupAll on all workers.
     92   void CleanupWorkers(const ResetRequest& reset);
     93 
     94   // Cleanup unused session.
     95   void GC();
     96 
     97   // Find master session by session handle, and increments the reference count
     98   // on the returned MasterSession if not null.
     99   MasterSession* FindMasterSession(const string& handle);
    100 
    101   TF_DISALLOW_COPY_AND_ASSIGN(Master);
    102 };
    103 
    104 }  // namespace tensorflow
    105 
    106 #endif  // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_
    107