Home | History | Annotate | Download | only in gpu
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
     17 
     18 #include <memory>
     19 #include <set>
     20 #include <vector>
     21 
     22 #include "tensorflow/compiler/xla/service/call_graph.h"
     23 #include "tensorflow/compiler/xla/service/copy_insertion.h"
     24 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
     25 #include "tensorflow/compiler/xla/service/hlo_computation.h"
     26 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
     27 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
     28 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
     29 #include "tensorflow/core/lib/core/status.h"
     30 #include "tensorflow/core/lib/gtl/flatset.h"
     31 #include "tensorflow/core/platform/logging.h"
     32 
     33 namespace xla {
     34 
     35 namespace gpu {
     36 
     37 StatusOr<HloInstruction*> GpuCopyInsertion::FindOrInsertCopy(
     38     HloInstruction* hlo) {
     39   HloInstruction*& copy = hlo_to_copy_map_[hlo];
     40   if (copy == nullptr) {
     41     TF_ASSIGN_OR_RETURN(copy, hlo->parent()->DeepCopyInstruction(hlo));
     42   }
     43   return copy;
     44 }
     45 
     46 StatusOr<bool> GpuCopyInsertion::Run(HloModule* module) {
     47   CopyInsertion generic_copy_insertion;
     48 
     49   TF_ASSIGN_OR_RETURN(bool changed, generic_copy_insertion.Run(module));
     50 
     51   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloDataflowAnalysis> dataflow,
     52                       HloDataflowAnalysis::Run(*module));
     53 
     54   // Make sure all operands of a library call are in memory instead of constants
     55   // in IR.
     56   for (HloInstruction* hlo :
     57        module->entry_computation()->MakeInstructionPostOrder()) {
     58     // Inserts a copy of hlo->operand(n) if it's a constant.
     59     auto copy_operand_if_constant = [&](int64 n) -> Status {
     60       HloInstruction* operand = hlo->mutable_operand(n);
     61       TF_RET_CHECK(ShapeUtil::IsArray(operand->shape()));
     62       const auto& values = dataflow->GetValueSet(operand).values();
     63       if (std::any_of(values.begin(), values.end(), [](const HloValue* value) {
     64             return value->defining_instruction()->opcode() ==
     65                    HloOpcode::kConstant;
     66           })) {
     67         TF_ASSIGN_OR_RETURN(HloInstruction * copy, FindOrInsertCopy(operand));
     68         TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(n, copy));
     69         changed = true;
     70       }
     71       return Status::OK();
     72     };
     73 
     74     if (IsCustomCallToDnnBatchNorm(*hlo)) {
     75       // The epsilon and feature_index operands to a CUDNN batchnorm op don't
     76       // need to be materialized in memory -- in fact, they must be constants.
     77       // These are the last two operands of all three batchnorm ops.
     78       for (int64 i = 0; i < hlo->operand_count() - 2; ++i) {
     79         TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
     80       }
     81     } else if (IsCustomCallToDnnConvolution(*hlo)) {
     82       // The last two arguments to a CUDNN convolution are two HLO constants for
     83       // cudnn algorithm and tensor_ops_enabled flag, which shouldn't be copied.
     84       for (int64 i = 0; i < hlo->operand_count() - 2; ++i) {
     85         TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
     86       }
     87     } else if (ImplementedAsLibraryCall(*hlo)) {
     88       // For all other library calls, materialize all the operands into memory.
     89       for (int64 i = 0; i < hlo->operand_count(); ++i) {
     90         TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
     91       }
     92     }
     93   }
     94 
     95   // Init values of while and conditional nodes cannot be constants. Insert
     96   // copies for any constants found at the operands of these nodes.
     97   tensorflow::gtl::FlatSet<HloInstruction*> inserted_copies;
     98   for (HloComputation* computation : module->computations()) {
     99     for (HloInstruction* instruction : computation->instructions()) {
    100       if (instruction->opcode() != HloOpcode::kWhile &&
    101           instruction->opcode() != HloOpcode::kConditional) {
    102         continue;
    103       }
    104       for (auto operand : instruction->operands()) {
    105         // Skip the operands that have already been replaced with a copy in a
    106         // previous iteration (which is possible when a constant is used as an
    107         // operand in multiple places).
    108         if (ContainsKey(inserted_copies, operand)) {
    109           continue;
    110         }
    111         for (auto& pair : dataflow->GetInstructionValueSet(operand)) {
    112           const HloValueSet& value_set = pair.second;
    113           for (const HloValue* value : value_set.values()) {
    114             if (value->defining_instruction()->IsConstant() &&
    115                 !ContainsKey(hlo_to_copy_map_, value->defining_instruction())) {
    116               HloInstruction* constant = value->defining_instruction();
    117               TF_ASSIGN_OR_RETURN(HloInstruction * copy,
    118                                   FindOrInsertCopy(constant));
    119               TF_RETURN_IF_ERROR(constant->ReplaceAllUsesWith(copy));
    120               inserted_copies.insert(copy);
    121               changed = true;
    122             }
    123           }
    124         }
    125       }
    126     }
    127   }
    128 
    129   // The GPU backend needs additional copies added due to deficiencies in
    130   // buffer assignment.
    131   TF_ASSIGN_OR_RETURN(bool buffer_assignment_changed,
    132                       CopyInsertion::AddCopiesForBufferAssignment(module));
    133 
    134   return changed || buffer_assignment_changed;
    135 }
    136 
    137 }  // namespace gpu
    138 }  // namespace xla
    139