1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_UTIL_WORK_SHARDER_H_ 17 #define TENSORFLOW_UTIL_WORK_SHARDER_H_ 18 19 #include <functional> 20 21 #include "tensorflow/core/lib/core/threadpool.h" 22 #include "tensorflow/core/platform/types.h" 23 24 namespace tensorflow { 25 26 // Shards the "total" unit of work assuming each unit of work having 27 // roughly "cost_per_unit". Each unit of work is indexed 0, 1, ..., 28 // total - 1. Each shard contains 1 or more units of work and the 29 // total cost of each shard is roughly the same. The calling thread and the 30 // "workers" are used to compute each shard (calling work(start, 31 // limit). A common configuration is that "workers" is a thread pool 32 // with at least "max_parallelism" threads. 33 // 34 // "cost_per_unit" is an estimate of the number of CPU cycles (or nanoseconds 35 // if not CPU-bound) to complete a unit of work. Overestimating creates too 36 // many shards and CPU time will be dominated by per-shard overhead, such as 37 // Context creation. Underestimating may not fully make use of the specified 38 // parallelism. 39 // 40 // "work" should be a callable taking (int64, int64) arguments. 41 // work(start, limit) computes the work units from [start, 42 // limit), i.e., [start, limit) is a shard. 43 // 44 // REQUIRES: max_parallelism >= 0 45 // REQUIRES: workers != nullptr 46 // REQUIRES: total >= 0 47 // REQUIRES: cost_per_unit >= 0 48 void Shard(int max_parallelism, thread::ThreadPool* workers, int64 total, 49 int64 cost_per_unit, std::function<void(int64, int64)> work); 50 51 } // end namespace tensorflow 52 53 #endif // TENSORFLOW_UTIL_WORK_SHARDER_H_ 54