1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" 17 18 #include <mutex> // NOLINT(build/c++11): only using std::call_once, not mutex. 19 #include <vector> 20 #include "tensorflow/compiler/xla/legacy_flags/debug_options_parsers.h" 21 #include "tensorflow/compiler/xla/legacy_flags/parse_flags_from_env.h" 22 #include "tensorflow/core/lib/strings/str_util.h" 23 24 namespace xla { 25 namespace legacy_flags { 26 27 namespace { 28 29 DebugOptions* flag_values; 30 std::vector<tensorflow::Flag>* flag_objects; 31 std::once_flag flags_init; 32 33 void SetDebugOptionsDefaults(DebugOptions* flags) { 34 flags->set_xla_enable_fast_math(true); 35 flags->set_xla_llvm_enable_alias_scope_metadata(true); 36 flags->set_xla_llvm_enable_noalias_metadata(true); 37 flags->set_xla_llvm_enable_invariant_load_metadata(true); 38 flags->set_xla_llvm_disable_expensive_passes(false); 39 flags->set_xla_backend_optimization_level(3); 40 flags->set_xla_cpu_multi_thread_eigen(true); 41 flags->set_xla_gpu_cuda_data_dir("./cuda_sdk_lib"); 42 flags->set_xla_eliminate_hlo_implicit_broadcast(true); 43 44 // Set cudnn batchnorm off by default; it does not provide a performance win 45 // on average. 46 flags->set_xla_gpu_use_cudnn_batchnorm(false); 47 } 48 49 // Allocates flag_values and flag_objects; this function must not be called more 50 // than once - its call done via call_once. 51 void AllocateFlags() { 52 flag_values = new DebugOptions; 53 54 SetDebugOptionsDefaults(flag_values); 55 56 // Returns a lambda that calls "member_setter" on "flag_values" with the 57 // argument passed in to the lambda. 58 auto bool_setter_for = [](void (DebugOptions::*member_setter)(bool)) { 59 return [member_setter](bool value) { 60 (flag_values->*member_setter)(value); 61 return true; 62 }; 63 }; 64 65 // Returns a lambda that calls "member_setter" on "flag_values" with the 66 // argument passed in to the lambda. 67 auto int32_setter_for = [](void (DebugOptions::*member_setter)(int32)) { 68 return [member_setter](int32 value) { 69 (flag_values->*member_setter)(value); 70 return true; 71 }; 72 }; 73 74 // Custom "sub-parser" lambda for xla_disable_hlo_passes. 75 auto setter_for_xla_disable_hlo_passes = [](string comma_separated_values) { 76 std::vector<string> disabled_passes = 77 tensorflow::str_util::Split(comma_separated_values, ','); 78 for (const auto& passname : disabled_passes) { 79 flag_values->add_xla_disable_hlo_passes(passname); 80 } 81 return true; 82 }; 83 84 // Custom "sub-parser" lambda for xla_backend_extra_options. 85 auto setter_for_xla_backend_extra_options = 86 [](string comma_separated_values) { 87 auto* extra_options_map = 88 flag_values->mutable_xla_backend_extra_options(); 89 impl::parse_xla_backend_extra_options(extra_options_map, 90 comma_separated_values); 91 return true; 92 }; 93 94 // Custom "sub-parser" lambda for xla_reduce_precision. 95 auto setter_for_xla_reduce_precision = 96 [](string reduce_precision_option_value) { 97 HloReducePrecisionOptions* option_proto = 98 flag_values->add_hlo_reduce_precision_options(); 99 return impl::parse_xla_reduce_precision_option( 100 option_proto, reduce_precision_option_value); 101 }; 102 103 flag_objects = new std::vector<tensorflow::Flag>({ 104 tensorflow::Flag( 105 "xla_generate_hlo_graph", 106 flag_values->mutable_xla_generate_hlo_graph(), 107 "HLO modules matching this regex will be dumped to a .dot file " 108 "throughout various stages in compilation."), 109 tensorflow::Flag( 110 "xla_hlo_graph_addresses", 111 bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses), 112 flag_values->xla_hlo_graph_addresses(), 113 "With xla_generate_hlo_graph, show addresses of HLO ops in " 114 "graph dump."), 115 tensorflow::Flag( 116 "xla_hlo_graph_path", flag_values->mutable_xla_hlo_graph_path(), 117 "With xla_generate_hlo_graph, dump the graphs into this path."), 118 tensorflow::Flag( 119 "xla_hlo_dump_as_graphdef", 120 bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_graphdef), 121 flag_values->xla_hlo_dump_as_graphdef(), 122 "Dump HLO graphs as TensorFlow GraphDefs."), 123 tensorflow::Flag( 124 "xla_hlo_graph_sharding_color", 125 bool_setter_for(&DebugOptions::set_xla_hlo_graph_sharding_color), 126 flag_values->xla_hlo_graph_sharding_color(), 127 "Assign colors based on sharding assignments when generating the " 128 "HLO graphs."), 129 tensorflow::Flag( 130 "xla_hlo_tfgraph_device_scopes", 131 bool_setter_for(&DebugOptions::set_xla_hlo_tfgraph_device_scopes), 132 flag_values->xla_hlo_tfgraph_device_scopes(), 133 "When generating TensorFlow HLO graphs, if the HLO instructions " 134 "are assigned to a specific device, prefix the name scope with " 135 "\"devX\" with X being the device ordinal."), 136 tensorflow::Flag( 137 "xla_log_hlo_text", flag_values->mutable_xla_log_hlo_text(), 138 "HLO modules matching this regex will be dumped to LOG(INFO)."), 139 tensorflow::Flag( 140 "xla_generate_hlo_text_to", 141 flag_values->mutable_xla_generate_hlo_text_to(), 142 "Dump all HLO modules as text into the provided directory path."), 143 tensorflow::Flag( 144 "xla_enable_fast_math", 145 bool_setter_for(&DebugOptions::set_xla_enable_fast_math), 146 flag_values->xla_enable_fast_math(), 147 "Enable unsafe fast-math optimizations in the compiler; " 148 "this may produce faster code at the expense of some accuracy."), 149 tensorflow::Flag( 150 "xla_llvm_enable_alias_scope_metadata", 151 bool_setter_for( 152 &DebugOptions::set_xla_llvm_enable_alias_scope_metadata), 153 flag_values->xla_llvm_enable_alias_scope_metadata(), 154 "In LLVM-based backends, enable the emission of " 155 "!alias.scope metadata in the generated IR."), 156 tensorflow::Flag( 157 "xla_llvm_enable_noalias_metadata", 158 bool_setter_for(&DebugOptions::set_xla_llvm_enable_noalias_metadata), 159 flag_values->xla_llvm_enable_noalias_metadata(), 160 "In LLVM-based backends, enable the emission of " 161 "!noalias metadata in the generated IR."), 162 tensorflow::Flag( 163 "xla_llvm_enable_invariant_load_metadata", 164 bool_setter_for( 165 &DebugOptions::set_xla_llvm_enable_invariant_load_metadata), 166 flag_values->xla_llvm_enable_invariant_load_metadata(), 167 "In LLVM-based backends, enable the emission of " 168 "!invariant.load metadata in " 169 "the generated IR."), 170 tensorflow::Flag( 171 "xla_llvm_disable_expensive_passes", 172 bool_setter_for(&DebugOptions::set_xla_llvm_disable_expensive_passes), 173 flag_values->xla_llvm_disable_expensive_passes(), 174 "In LLVM-based backends, disable a custom set of " 175 "expensive optimization passes."), 176 tensorflow::Flag( 177 "xla_backend_optimization_level", 178 int32_setter_for(&DebugOptions::set_xla_backend_optimization_level), 179 flag_values->xla_backend_optimization_level(), 180 "Numerical optimization level for the XLA compiler backend."), 181 tensorflow::Flag( 182 "xla_disable_hlo_passes", setter_for_xla_disable_hlo_passes, "", 183 "Comma-separated list of hlo passes to be disabled. These names " 184 "must exactly match the passes' names; no whitespace around " 185 "commas."), 186 tensorflow::Flag( 187 "xla_embed_ir_in_executable", 188 bool_setter_for(&DebugOptions::set_xla_embed_ir_in_executable), 189 flag_values->xla_embed_ir_in_executable(), 190 "Embed the compiler IR as a string in the executable."), 191 tensorflow::Flag( 192 "xla_dump_ir_to", flag_values->mutable_xla_dump_ir_to(), 193 "Dump the compiler IR into this directory as individual files."), 194 tensorflow::Flag( 195 "xla_eliminate_hlo_implicit_broadcast", 196 bool_setter_for( 197 &DebugOptions::set_xla_eliminate_hlo_implicit_broadcast), 198 flag_values->xla_eliminate_hlo_implicit_broadcast(), 199 "Eliminate implicit broadcasts when lowering user " 200 "computations to HLO instructions; use explicit " 201 "broadcast instead."), 202 tensorflow::Flag( 203 "xla_cpu_multi_thread_eigen", 204 bool_setter_for(&DebugOptions::set_xla_cpu_multi_thread_eigen), 205 flag_values->xla_cpu_multi_thread_eigen(), 206 "When generating calls to Eigen in the CPU backend, " 207 "use multi-threaded Eigen mode."), 208 tensorflow::Flag("xla_gpu_cuda_data_dir", 209 flag_values->mutable_xla_gpu_cuda_data_dir(), 210 "If non-empty, speficies a local directory containing " 211 "ptxas and nvvm libdevice files; otherwise we use " 212 "those from runfile directories."), 213 tensorflow::Flag("xla_gpu_ftz", 214 bool_setter_for(&DebugOptions::set_xla_gpu_ftz), 215 flag_values->xla_gpu_ftz(), 216 "If true, flush-to-zero semantics are enabled in the " 217 "code generated for GPUs."), 218 tensorflow::Flag( 219 "xla_gpu_disable_multi_streaming", 220 bool_setter_for(&DebugOptions::set_xla_gpu_disable_multi_streaming), 221 flag_values->xla_gpu_disable_multi_streaming(), 222 "If true, multi-streaming in the GPU backend is disabled."), 223 tensorflow::Flag( 224 "xla_dump_optimized_hlo_proto_to", 225 flag_values->mutable_xla_dump_optimized_hlo_proto_to(), 226 "Dump Hlo after all hlo passes are executed as proto binary into " 227 "this directory."), 228 tensorflow::Flag( 229 "xla_dump_unoptimized_hlo_proto_to", 230 flag_values->mutable_xla_dump_unoptimized_hlo_proto_to(), 231 "Dump HLO before any hlo passes are executed as proto binary into " 232 "this directory."), 233 tensorflow::Flag("xla_dump_per_pass_hlo_proto_to", 234 flag_values->mutable_xla_dump_per_pass_hlo_proto_to(), 235 "Dump HLO after each pass as an HloProto in binary file " 236 "format into this directory."), 237 tensorflow::Flag( 238 "xla_test_all_output_layouts", 239 bool_setter_for(&DebugOptions::set_xla_test_all_output_layouts), 240 flag_values->xla_test_all_output_layouts(), 241 "Let ClientLibraryTestBase::ComputeAndCompare* test " 242 "all permutations of output layouts. For example, with " 243 "a 3D shape, all permutations of the set {0, 1, 2} are " 244 "tried."), 245 tensorflow::Flag( 246 "xla_test_all_input_layouts", 247 bool_setter_for(&DebugOptions::set_xla_test_all_input_layouts), 248 flag_values->xla_test_all_input_layouts(), 249 "Let ClientLibraryTestBase::ComputeAndCompare* test " 250 "all permutations of *input* layouts. For example, for " 251 "2 input arguments with 2D shape and 4D shape, the " 252 "computation will run 2! * 4! times for every possible " 253 "layouts"), 254 tensorflow::Flag( 255 "xla_hlo_profile", 256 bool_setter_for(&DebugOptions::set_xla_hlo_profile), 257 flag_values->xla_hlo_profile(), 258 "Instrument the computation to collect per-HLO cycle counts"), 259 tensorflow::Flag("xla_dump_computations_to", 260 flag_values->mutable_xla_dump_computations_to(), 261 "Dump computations that XLA executes into the provided " 262 "directory path"), 263 tensorflow::Flag("xla_dump_executions_to", 264 flag_values->mutable_xla_dump_executions_to(), 265 "Dump parameters and results of computations that XLA " 266 "executes into the provided directory path"), 267 tensorflow::Flag("xla_backend_extra_options", 268 setter_for_xla_backend_extra_options, "", 269 "Extra options to pass to a backend; " 270 "comma-separated list of 'key=val' strings (=val " 271 "may be omitted); no whitespace around commas."), 272 tensorflow::Flag("xla_reduce_precision", setter_for_xla_reduce_precision, 273 "", 274 "Directions for adding reduce-precision operations. " 275 "Format is 'LOCATION=E,M:OPS;NAMES' where LOCATION is " 276 "the class of locations in which to insert the " 277 "operations (e.g., 'OP_OUTPUTS'), E and M are the " 278 "exponent and matissa bit counts respectively, and " 279 "OPS and NAMES are comma-separated (no spaces) lists " 280 "of the operation types and names to which to attach " 281 "the reduce-precision operations. The NAMES string " 282 "and its preceding ';' may be omitted. This option " 283 "may be repeated to define multiple sets of added " 284 "reduce-precision operations."), 285 tensorflow::Flag( 286 "xla_gpu_use_cudnn_batchnorm", 287 bool_setter_for(&DebugOptions::set_xla_gpu_use_cudnn_batchnorm), 288 flag_values->xla_gpu_use_cudnn_batchnorm(), 289 "Allows the GPU backend to implement batchnorm HLOs using cudnn, " 290 "rather than expanding them to a soup of HLOs."), 291 }); 292 ParseFlagsFromEnv(*flag_objects); 293 } 294 295 } // namespace 296 297 void AppendDebugOptionsFlags(std::vector<tensorflow::Flag>* flag_list) { 298 std::call_once(flags_init, &AllocateFlags); 299 flag_list->insert(flag_list->end(), flag_objects->begin(), 300 flag_objects->end()); 301 } 302 303 xla::DebugOptions GetDebugOptionsFromFlags() { 304 std::call_once(flags_init, &AllocateFlags); 305 return *flag_values; 306 } 307 308 } // namespace legacy_flags 309 } // namespace xla 310