Home | History | Annotate | Download | only in legacy_flags
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
     17 
     18 #include <mutex>  // NOLINT(build/c++11): only using std::call_once, not mutex.
     19 #include <vector>
     20 #include "tensorflow/compiler/xla/legacy_flags/debug_options_parsers.h"
     21 #include "tensorflow/compiler/xla/legacy_flags/parse_flags_from_env.h"
     22 #include "tensorflow/core/lib/strings/str_util.h"
     23 
     24 namespace xla {
     25 namespace legacy_flags {
     26 
     27 namespace {
     28 
     29 DebugOptions* flag_values;
     30 std::vector<tensorflow::Flag>* flag_objects;
     31 std::once_flag flags_init;
     32 
     33 void SetDebugOptionsDefaults(DebugOptions* flags) {
     34   flags->set_xla_enable_fast_math(true);
     35   flags->set_xla_llvm_enable_alias_scope_metadata(true);
     36   flags->set_xla_llvm_enable_noalias_metadata(true);
     37   flags->set_xla_llvm_enable_invariant_load_metadata(true);
     38   flags->set_xla_llvm_disable_expensive_passes(false);
     39   flags->set_xla_backend_optimization_level(3);
     40   flags->set_xla_cpu_multi_thread_eigen(true);
     41   flags->set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
     42   flags->set_xla_eliminate_hlo_implicit_broadcast(true);
     43 
     44   // Set cudnn batchnorm off by default; it does not provide a performance win
     45   // on average.
     46   flags->set_xla_gpu_use_cudnn_batchnorm(false);
     47 }
     48 
     49 // Allocates flag_values and flag_objects; this function must not be called more
     50 // than once - its call done via call_once.
     51 void AllocateFlags() {
     52   flag_values = new DebugOptions;
     53 
     54   SetDebugOptionsDefaults(flag_values);
     55 
     56   // Returns a lambda that calls "member_setter" on "flag_values" with the
     57   // argument passed in to the lambda.
     58   auto bool_setter_for = [](void (DebugOptions::*member_setter)(bool)) {
     59     return [member_setter](bool value) {
     60       (flag_values->*member_setter)(value);
     61       return true;
     62     };
     63   };
     64 
     65   // Returns a lambda that calls "member_setter" on "flag_values" with the
     66   // argument passed in to the lambda.
     67   auto int32_setter_for = [](void (DebugOptions::*member_setter)(int32)) {
     68     return [member_setter](int32 value) {
     69       (flag_values->*member_setter)(value);
     70       return true;
     71     };
     72   };
     73 
     74   // Custom "sub-parser" lambda for xla_disable_hlo_passes.
     75   auto setter_for_xla_disable_hlo_passes = [](string comma_separated_values) {
     76     std::vector<string> disabled_passes =
     77         tensorflow::str_util::Split(comma_separated_values, ',');
     78     for (const auto& passname : disabled_passes) {
     79       flag_values->add_xla_disable_hlo_passes(passname);
     80     }
     81     return true;
     82   };
     83 
     84   // Custom "sub-parser" lambda for xla_backend_extra_options.
     85   auto setter_for_xla_backend_extra_options =
     86       [](string comma_separated_values) {
     87         auto* extra_options_map =
     88             flag_values->mutable_xla_backend_extra_options();
     89         impl::parse_xla_backend_extra_options(extra_options_map,
     90                                               comma_separated_values);
     91         return true;
     92       };
     93 
     94   // Custom "sub-parser" lambda for xla_reduce_precision.
     95   auto setter_for_xla_reduce_precision =
     96       [](string reduce_precision_option_value) {
     97         HloReducePrecisionOptions* option_proto =
     98             flag_values->add_hlo_reduce_precision_options();
     99         return impl::parse_xla_reduce_precision_option(
    100             option_proto, reduce_precision_option_value);
    101       };
    102 
    103   flag_objects = new std::vector<tensorflow::Flag>({
    104       tensorflow::Flag(
    105           "xla_generate_hlo_graph",
    106           flag_values->mutable_xla_generate_hlo_graph(),
    107           "HLO modules matching this regex will be dumped to a .dot file "
    108           "throughout various stages in compilation."),
    109       tensorflow::Flag(
    110           "xla_hlo_graph_addresses",
    111           bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),
    112           flag_values->xla_hlo_graph_addresses(),
    113           "With xla_generate_hlo_graph, show addresses of HLO ops in "
    114           "graph dump."),
    115       tensorflow::Flag(
    116           "xla_hlo_graph_path", flag_values->mutable_xla_hlo_graph_path(),
    117           "With xla_generate_hlo_graph, dump the graphs into this path."),
    118       tensorflow::Flag(
    119           "xla_hlo_dump_as_graphdef",
    120           bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_graphdef),
    121           flag_values->xla_hlo_dump_as_graphdef(),
    122           "Dump HLO graphs as TensorFlow GraphDefs."),
    123       tensorflow::Flag(
    124           "xla_hlo_graph_sharding_color",
    125           bool_setter_for(&DebugOptions::set_xla_hlo_graph_sharding_color),
    126           flag_values->xla_hlo_graph_sharding_color(),
    127           "Assign colors based on sharding assignments when generating the "
    128           "HLO graphs."),
    129       tensorflow::Flag(
    130           "xla_hlo_tfgraph_device_scopes",
    131           bool_setter_for(&DebugOptions::set_xla_hlo_tfgraph_device_scopes),
    132           flag_values->xla_hlo_tfgraph_device_scopes(),
    133           "When generating TensorFlow HLO graphs, if the HLO instructions "
    134           "are assigned to a specific device, prefix the name scope with "
    135           "\"devX\" with X being the device ordinal."),
    136       tensorflow::Flag(
    137           "xla_log_hlo_text", flag_values->mutable_xla_log_hlo_text(),
    138           "HLO modules matching this regex will be dumped to LOG(INFO)."),
    139       tensorflow::Flag(
    140           "xla_generate_hlo_text_to",
    141           flag_values->mutable_xla_generate_hlo_text_to(),
    142           "Dump all HLO modules as text into the provided directory path."),
    143       tensorflow::Flag(
    144           "xla_enable_fast_math",
    145           bool_setter_for(&DebugOptions::set_xla_enable_fast_math),
    146           flag_values->xla_enable_fast_math(),
    147           "Enable unsafe fast-math optimizations in the compiler; "
    148           "this may produce faster code at the expense of some accuracy."),
    149       tensorflow::Flag(
    150           "xla_llvm_enable_alias_scope_metadata",
    151           bool_setter_for(
    152               &DebugOptions::set_xla_llvm_enable_alias_scope_metadata),
    153           flag_values->xla_llvm_enable_alias_scope_metadata(),
    154           "In LLVM-based backends, enable the emission of "
    155           "!alias.scope metadata in the generated IR."),
    156       tensorflow::Flag(
    157           "xla_llvm_enable_noalias_metadata",
    158           bool_setter_for(&DebugOptions::set_xla_llvm_enable_noalias_metadata),
    159           flag_values->xla_llvm_enable_noalias_metadata(),
    160           "In LLVM-based backends, enable the emission of "
    161           "!noalias metadata in the generated IR."),
    162       tensorflow::Flag(
    163           "xla_llvm_enable_invariant_load_metadata",
    164           bool_setter_for(
    165               &DebugOptions::set_xla_llvm_enable_invariant_load_metadata),
    166           flag_values->xla_llvm_enable_invariant_load_metadata(),
    167           "In LLVM-based backends, enable the emission of "
    168           "!invariant.load metadata in "
    169           "the generated IR."),
    170       tensorflow::Flag(
    171           "xla_llvm_disable_expensive_passes",
    172           bool_setter_for(&DebugOptions::set_xla_llvm_disable_expensive_passes),
    173           flag_values->xla_llvm_disable_expensive_passes(),
    174           "In LLVM-based backends, disable a custom set of "
    175           "expensive optimization passes."),
    176       tensorflow::Flag(
    177           "xla_backend_optimization_level",
    178           int32_setter_for(&DebugOptions::set_xla_backend_optimization_level),
    179           flag_values->xla_backend_optimization_level(),
    180           "Numerical optimization level for the XLA compiler backend."),
    181       tensorflow::Flag(
    182           "xla_disable_hlo_passes", setter_for_xla_disable_hlo_passes, "",
    183           "Comma-separated list of hlo passes to be disabled. These names "
    184           "must exactly match the passes' names; no whitespace around "
    185           "commas."),
    186       tensorflow::Flag(
    187           "xla_embed_ir_in_executable",
    188           bool_setter_for(&DebugOptions::set_xla_embed_ir_in_executable),
    189           flag_values->xla_embed_ir_in_executable(),
    190           "Embed the compiler IR as a string in the executable."),
    191       tensorflow::Flag(
    192           "xla_dump_ir_to", flag_values->mutable_xla_dump_ir_to(),
    193           "Dump the compiler IR into this directory as individual files."),
    194       tensorflow::Flag(
    195           "xla_eliminate_hlo_implicit_broadcast",
    196           bool_setter_for(
    197               &DebugOptions::set_xla_eliminate_hlo_implicit_broadcast),
    198           flag_values->xla_eliminate_hlo_implicit_broadcast(),
    199           "Eliminate implicit broadcasts when lowering user "
    200           "computations to HLO instructions; use explicit "
    201           "broadcast instead."),
    202       tensorflow::Flag(
    203           "xla_cpu_multi_thread_eigen",
    204           bool_setter_for(&DebugOptions::set_xla_cpu_multi_thread_eigen),
    205           flag_values->xla_cpu_multi_thread_eigen(),
    206           "When generating calls to Eigen in the CPU backend, "
    207           "use multi-threaded Eigen mode."),
    208       tensorflow::Flag("xla_gpu_cuda_data_dir",
    209                        flag_values->mutable_xla_gpu_cuda_data_dir(),
    210                        "If non-empty, speficies a local directory containing "
    211                        "ptxas and nvvm libdevice files; otherwise we use "
    212                        "those from runfile directories."),
    213       tensorflow::Flag("xla_gpu_ftz",
    214                        bool_setter_for(&DebugOptions::set_xla_gpu_ftz),
    215                        flag_values->xla_gpu_ftz(),
    216                        "If true, flush-to-zero semantics are enabled in the "
    217                        "code generated for GPUs."),
    218       tensorflow::Flag(
    219           "xla_gpu_disable_multi_streaming",
    220           bool_setter_for(&DebugOptions::set_xla_gpu_disable_multi_streaming),
    221           flag_values->xla_gpu_disable_multi_streaming(),
    222           "If true, multi-streaming in the GPU backend is disabled."),
    223       tensorflow::Flag(
    224           "xla_dump_optimized_hlo_proto_to",
    225           flag_values->mutable_xla_dump_optimized_hlo_proto_to(),
    226           "Dump Hlo after all hlo passes are executed as proto binary into "
    227           "this directory."),
    228       tensorflow::Flag(
    229           "xla_dump_unoptimized_hlo_proto_to",
    230           flag_values->mutable_xla_dump_unoptimized_hlo_proto_to(),
    231           "Dump HLO before any hlo passes are executed as proto binary into "
    232           "this directory."),
    233       tensorflow::Flag("xla_dump_per_pass_hlo_proto_to",
    234                        flag_values->mutable_xla_dump_per_pass_hlo_proto_to(),
    235                        "Dump HLO after each pass as an HloProto in binary file "
    236                        "format into this directory."),
    237       tensorflow::Flag(
    238           "xla_test_all_output_layouts",
    239           bool_setter_for(&DebugOptions::set_xla_test_all_output_layouts),
    240           flag_values->xla_test_all_output_layouts(),
    241           "Let ClientLibraryTestBase::ComputeAndCompare* test "
    242           "all permutations of output layouts. For example, with "
    243           "a 3D shape, all permutations of the set {0, 1, 2} are "
    244           "tried."),
    245       tensorflow::Flag(
    246           "xla_test_all_input_layouts",
    247           bool_setter_for(&DebugOptions::set_xla_test_all_input_layouts),
    248           flag_values->xla_test_all_input_layouts(),
    249           "Let ClientLibraryTestBase::ComputeAndCompare* test "
    250           "all permutations of *input* layouts. For example, for "
    251           "2 input arguments with 2D shape and 4D shape, the "
    252           "computation will run 2! * 4! times for every possible "
    253           "layouts"),
    254       tensorflow::Flag(
    255           "xla_hlo_profile",
    256           bool_setter_for(&DebugOptions::set_xla_hlo_profile),
    257           flag_values->xla_hlo_profile(),
    258           "Instrument the computation to collect per-HLO cycle counts"),
    259       tensorflow::Flag("xla_dump_computations_to",
    260                        flag_values->mutable_xla_dump_computations_to(),
    261                        "Dump computations that XLA executes into the provided "
    262                        "directory path"),
    263       tensorflow::Flag("xla_dump_executions_to",
    264                        flag_values->mutable_xla_dump_executions_to(),
    265                        "Dump parameters and results of computations that XLA "
    266                        "executes into the provided directory path"),
    267       tensorflow::Flag("xla_backend_extra_options",
    268                        setter_for_xla_backend_extra_options, "",
    269                        "Extra options to pass to a backend; "
    270                        "comma-separated list of 'key=val' strings (=val "
    271                        "may be omitted); no whitespace around commas."),
    272       tensorflow::Flag("xla_reduce_precision", setter_for_xla_reduce_precision,
    273                        "",
    274                        "Directions for adding reduce-precision operations. "
    275                        "Format is 'LOCATION=E,M:OPS;NAMES' where LOCATION is "
    276                        "the class of locations in which to insert the "
    277                        "operations (e.g., 'OP_OUTPUTS'), E and M are the "
    278                        "exponent and matissa bit counts respectively, and "
    279                        "OPS and NAMES are comma-separated (no spaces) lists "
    280                        "of the operation types and names to which to attach "
    281                        "the reduce-precision operations.  The NAMES string "
    282                        "and its preceding ';' may be omitted.  This option "
    283                        "may be repeated to define multiple sets of added "
    284                        "reduce-precision operations."),
    285       tensorflow::Flag(
    286           "xla_gpu_use_cudnn_batchnorm",
    287           bool_setter_for(&DebugOptions::set_xla_gpu_use_cudnn_batchnorm),
    288           flag_values->xla_gpu_use_cudnn_batchnorm(),
    289           "Allows the GPU backend to implement batchnorm HLOs using cudnn, "
    290           "rather than expanding them to a soup of HLOs."),
    291   });
    292   ParseFlagsFromEnv(*flag_objects);
    293 }
    294 
    295 }  // namespace
    296 
    297 void AppendDebugOptionsFlags(std::vector<tensorflow::Flag>* flag_list) {
    298   std::call_once(flags_init, &AllocateFlags);
    299   flag_list->insert(flag_list->end(), flag_objects->begin(),
    300                     flag_objects->end());
    301 }
    302 
    303 xla::DebugOptions GetDebugOptionsFromFlags() {
    304   std::call_once(flags_init, &AllocateFlags);
    305   return *flag_values;
    306 }
    307 
    308 }  // namespace legacy_flags
    309 }  // namespace xla
    310