Home | History | Annotate | Download | only in gpu
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
     17 
     18 #include "tensorflow/compiler/xla/layout_util.h"
     19 #include "tensorflow/compiler/xla/service/computation_layout.h"
     20 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
     21 #include "tensorflow/compiler/xla/service/hlo_computation.h"
     22 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
     23 #include "tensorflow/compiler/xla/service/hlo_module.h"
     24 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
     25 #include "tensorflow/compiler/xla/shape_layout.h"
     26 #include "tensorflow/compiler/xla/shape_util.h"
     27 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
     28 #include "tensorflow/compiler/xla/xla_data.pb.h"
     29 
     30 namespace xla {
     31 namespace gpu {
     32 namespace {
     33 
     34 using LayoutAssignmentTest = HloTestBase;
     35 
     36 TEST_F(LayoutAssignmentTest, Elementwise) {
     37   Shape ashape = ShapeUtil::MakeShape(F32, {42, 12});
     38   Shape ashape_in_row_major(ashape);
     39   Shape ashape_in_col_major(ashape);
     40   *ashape_in_row_major.mutable_layout() = LayoutUtil::MakeLayout({1, 0});
     41   *ashape_in_col_major.mutable_layout() = LayoutUtil::MakeLayout({0, 1});
     42 
     43   // Enumerate all possible combinations of layouts.
     44   for (const Shape& lhs_shape_with_layout :
     45        {ashape_in_row_major, ashape_in_col_major}) {
     46     for (const Shape& rhs_shape_with_layout :
     47          {ashape_in_row_major, ashape_in_col_major}) {
     48       for (const Shape& result_shape_with_layout :
     49            {ashape_in_row_major, ashape_in_col_major}) {
     50         // GpuLayoutAssignment should assign the same layout to "add" and its
     51         // two operands.
     52         auto builder = HloComputation::Builder(TestName());
     53         auto x = builder.AddInstruction(
     54             HloInstruction::CreateParameter(0, ashape, "x"));
     55         auto y = builder.AddInstruction(
     56             HloInstruction::CreateParameter(1, ashape, "y"));
     57         auto add = builder.AddInstruction(
     58             HloInstruction::CreateBinary(ashape, HloOpcode::kAdd, x, y));
     59         auto module = CreateNewModule();
     60         HloComputation* computation =
     61             module->AddEntryComputation(builder.Build(add));
     62 
     63         ComputationLayout computation_layout(
     64             computation->ComputeProgramShape());
     65         *computation_layout.mutable_parameter_layout(0) =
     66             ShapeLayout(lhs_shape_with_layout);
     67         *computation_layout.mutable_parameter_layout(1) =
     68             ShapeLayout(rhs_shape_with_layout);
     69         *computation_layout.mutable_result_layout() =
     70             ShapeLayout(result_shape_with_layout);
     71 
     72         GpuLayoutAssignment layout_assignment(&computation_layout);
     73         EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
     74 
     75         for (const HloInstruction* operand : add->operands()) {
     76           EXPECT_TRUE(LayoutUtil::Equal(add->shape().layout(),
     77                                         operand->shape().layout()));
     78         }
     79       }
     80     }
     81   }
     82 }
     83 
     84 // Returns a list shapes with all the possible layouts of this shape, including
     85 // a shape with no layout.
     86 std::vector<Shape> AllLayoutsOf(const Shape& s) {
     87   std::vector<int64> layout_vec(s.dimensions_size());
     88   std::iota(layout_vec.begin(), layout_vec.end(), 0);
     89 
     90   std::vector<Shape> shapes;
     91   shapes.push_back(s);
     92   shapes.back().clear_layout();
     93 
     94   do {
     95     shapes.push_back(s);
     96     *shapes.back().mutable_layout() = LayoutUtil::MakeLayout(layout_vec);
     97   } while (std::next_permutation(layout_vec.begin(), layout_vec.end()));
     98 
     99   return shapes;
    100 }
    101 
    102 TEST_F(LayoutAssignmentTest, BatchNormInference) {
    103   const int64 kFeatureIndex = 1;
    104 
    105   // The shape of the data operand to BatchNormInference and of the output of
    106   // the BatchNormInference call.
    107   Shape shape = ShapeUtil::MakeShape(F32, {42, 12, 1, 100});
    108 
    109   // The shape of the scale, offset, mean, and variance inputs to
    110   // BatchNormTraining.  These are rank 1, with as many elements are in the
    111   // kFeatureIndex dim of shape.
    112   Shape aux_shape =
    113       ShapeUtil::MakeShape(F32, {shape.dimensions(kFeatureIndex)});
    114 
    115   for (const Shape& input_shape : AllLayoutsOf(shape)) {
    116     for (const Shape& result_shape : AllLayoutsOf(shape)) {
    117       SCOPED_TRACE(tensorflow::strings::StrCat(
    118           "input_shape=", ShapeUtil::HumanStringWithLayout(input_shape),
    119           ", result_shape=", ShapeUtil::HumanStringWithLayout(result_shape)));
    120 
    121       auto builder = HloComputation::Builder(TestName());
    122       auto* operand = builder.AddInstruction(
    123           HloInstruction::CreateParameter(0, shape, "operand"));
    124       auto* scale = builder.AddInstruction(
    125           HloInstruction::CreateParameter(1, aux_shape, "scale"));
    126       auto* offset = builder.AddInstruction(
    127           HloInstruction::CreateParameter(2, aux_shape, "offset"));
    128       auto* mean = builder.AddInstruction(
    129           HloInstruction::CreateParameter(3, aux_shape, "mean"));
    130       auto* variance = builder.AddInstruction(
    131           HloInstruction::CreateParameter(4, aux_shape, "variance"));
    132 
    133       auto* epsilon = builder.AddInstruction(
    134           HloInstruction::CreateConstant(Literal::CreateR0<float>(1)));
    135       auto* feature_index =
    136           builder.AddInstruction(HloInstruction::CreateConstant(
    137               Literal::CreateR0<int64>(kFeatureIndex)));
    138 
    139       auto* batchnorm = builder.AddInstruction(HloInstruction::CreateCustomCall(
    140           shape,
    141           {operand, scale, offset, mean, variance, epsilon, feature_index},
    142           kCudnnBatchNormForwardInferenceCallTarget));
    143 
    144       auto module = CreateNewModule();
    145       HloComputation* computation =
    146           module->AddEntryComputation(builder.Build(batchnorm));
    147 
    148       ComputationLayout computation_layout(computation->ComputeProgramShape());
    149 
    150       if (input_shape.has_layout()) {
    151         *computation_layout.mutable_parameter_layout(0) =
    152             ShapeLayout(input_shape);
    153       }
    154 
    155       if (result_shape.has_layout()) {
    156         *computation_layout.mutable_result_layout() = ShapeLayout(result_shape);
    157       }
    158 
    159       GpuLayoutAssignment layout_assignment(&computation_layout);
    160       EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
    161 
    162       // The first operand to batchnorm should have the same layout as the
    163       // result.
    164       EXPECT_TRUE(LayoutUtil::Equal(batchnorm->operand(0)->shape().layout(),
    165                                     batchnorm->shape().layout()))
    166           << batchnorm->ToString();
    167     }
    168   }
    169 }
    170 
    171 TEST_F(LayoutAssignmentTest, BatchNormTraining) {
    172   const int64 kFeatureIndex = 1;
    173 
    174   // The shape of the data operand to BatchNormTraining.
    175   Shape shape = ShapeUtil::MakeShape(F32, {42, 12, 1, 100});
    176 
    177   // The shape of the offset and scale inputs to BatchNormTraining.  These are
    178   // rank 1, with as many elements are in the kFeatureIndex dim of shape.
    179   Shape offset_scale_shape =
    180       ShapeUtil::MakeShape(F32, {shape.dimensions(kFeatureIndex)});
    181 
    182   // Shape of the output of our BatchNormTraining op.
    183   Shape batchnorm_shape = ShapeUtil::MakeTupleShape(
    184       {shape, offset_scale_shape, offset_scale_shape});
    185 
    186   // Enumerate all combinations of shapes.
    187   for (const Shape& input_shape : AllLayoutsOf(shape)) {
    188     for (const Shape& result_shape : AllLayoutsOf(shape)) {
    189       SCOPED_TRACE(tensorflow::strings::StrCat(
    190           "input_shape=", ShapeUtil::HumanStringWithLayout(input_shape),
    191           ", result_shape=", ShapeUtil::HumanStringWithLayout(result_shape)));
    192 
    193       auto builder = HloComputation::Builder(TestName());
    194       auto* operand = builder.AddInstruction(
    195           HloInstruction::CreateParameter(0, shape, "operand"));
    196       auto* scale = builder.AddInstruction(
    197           HloInstruction::CreateParameter(1, offset_scale_shape, "scale"));
    198       auto* offset = builder.AddInstruction(
    199           HloInstruction::CreateParameter(2, offset_scale_shape, "offset"));
    200 
    201       auto* epsilon = builder.AddInstruction(
    202           HloInstruction::CreateConstant(Literal::CreateR0<float>(1)));
    203       auto* feature_index =
    204           builder.AddInstruction(HloInstruction::CreateConstant(
    205               Literal::CreateR0<int64>(kFeatureIndex)));
    206 
    207       auto* batchnorm = builder.AddInstruction(HloInstruction::CreateCustomCall(
    208           batchnorm_shape, {operand, scale, offset, epsilon, feature_index},
    209           kCudnnBatchNormForwardTrainingCallTarget));
    210 
    211       auto module = CreateNewModule();
    212       HloComputation* computation =
    213           module->AddEntryComputation(builder.Build(batchnorm));
    214 
    215       ComputationLayout computation_layout(computation->ComputeProgramShape());
    216 
    217       if (input_shape.has_layout()) {
    218         *computation_layout.mutable_parameter_layout(0) =
    219             ShapeLayout(input_shape);
    220       }
    221 
    222       if (result_shape.has_layout()) {
    223         *computation_layout.mutable_result_layout() =
    224             ShapeLayout(ShapeUtil::MakeTupleShape(
    225                 {result_shape, offset_scale_shape, offset_scale_shape}));
    226       }
    227 
    228       GpuLayoutAssignment layout_assignment(&computation_layout);
    229       EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
    230 
    231       // The first operand to batchnorm should have the same layout as the
    232       // first element of the result tuple.
    233       EXPECT_TRUE(
    234           LayoutUtil::Equal(batchnorm->operand(0)->shape().layout(),
    235                             batchnorm->shape().tuple_shapes(0).layout()))
    236           << batchnorm->ToString();
    237     }
    238   }
    239 }
    240 
    241 TEST_F(LayoutAssignmentTest, BatchNormGrad) {
    242   const int64 kFeatureIndex = 1;
    243 
    244   // The shape of the data operand to BatchNormTraining.
    245   Shape shape = ShapeUtil::MakeShape(F32, {42, 12, 1, 100});
    246 
    247   // The shape of the scale, mean, and variance inputs to BatchNormGrad.  These
    248   // are rank 1, with as many elements are in the kFeatureIndex dim of shape.
    249   Shape scale_shape =
    250       ShapeUtil::MakeShape(F32, {shape.dimensions(kFeatureIndex)});
    251 
    252   // Shape of the output of our BatchNormGrad op.
    253   Shape batchnorm_shape =
    254       ShapeUtil::MakeTupleShape({shape, scale_shape, scale_shape});
    255 
    256   // Enumerate all combinations of shapes plus whether we're constraining param
    257   // 0 or param 4.
    258   for (const Shape& input_shape : AllLayoutsOf(shape)) {
    259     for (const Shape& result_shape : AllLayoutsOf(shape)) {
    260       for (int constrained_param_no : {0, 4}) {
    261         SCOPED_TRACE(tensorflow::strings::StrCat(
    262             "input_shape=", ShapeUtil::HumanStringWithLayout(input_shape),
    263             ", result_shape=", ShapeUtil::HumanStringWithLayout(result_shape)));
    264 
    265         auto builder = HloComputation::Builder(TestName());
    266         auto* operand = builder.AddInstruction(
    267             HloInstruction::CreateParameter(0, shape, "operand"));
    268         auto* scale = builder.AddInstruction(
    269             HloInstruction::CreateParameter(1, scale_shape, "scale"));
    270         auto* mean = builder.AddInstruction(
    271             HloInstruction::CreateParameter(2, scale_shape, "mean"));
    272         auto* var = builder.AddInstruction(
    273             HloInstruction::CreateParameter(3, scale_shape, "var"));
    274         auto* grad_offset = builder.AddInstruction(
    275             HloInstruction::CreateParameter(4, shape, "var"));
    276 
    277         auto* epsilon = builder.AddInstruction(
    278             HloInstruction::CreateConstant(Literal::CreateR0<float>(1)));
    279         auto* feature_index =
    280             builder.AddInstruction(HloInstruction::CreateConstant(
    281                 Literal::CreateR0<int64>(kFeatureIndex)));
    282 
    283         auto* batchnorm =
    284             builder.AddInstruction(HloInstruction::CreateCustomCall(
    285                 batchnorm_shape,
    286                 {operand, scale, mean, var, grad_offset, epsilon,
    287                  feature_index},
    288                 kCudnnBatchNormBackwardCallTarget));
    289 
    290         auto module = CreateNewModule();
    291         HloComputation* computation =
    292             module->AddEntryComputation(builder.Build(batchnorm));
    293 
    294         ComputationLayout computation_layout(
    295             computation->ComputeProgramShape());
    296 
    297         if (input_shape.has_layout()) {
    298           *computation_layout.mutable_parameter_layout(constrained_param_no) =
    299               ShapeLayout(input_shape);
    300         }
    301 
    302         if (result_shape.has_layout()) {
    303           *computation_layout.mutable_result_layout() =
    304               ShapeLayout(ShapeUtil::MakeTupleShape(
    305                   {result_shape, scale_shape, scale_shape}));
    306         }
    307 
    308         GpuLayoutAssignment layout_assignment(&computation_layout);
    309         EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
    310 
    311         // The first and fourth operands to the batchnorm call should have the
    312         // same layout as the first element of the result tuple.
    313         EXPECT_TRUE(
    314             LayoutUtil::Equal(batchnorm->operand(0)->shape().layout(),
    315                               batchnorm->shape().tuple_shapes(0).layout()))
    316             << batchnorm->ToString();
    317         EXPECT_TRUE(
    318             LayoutUtil::Equal(batchnorm->operand(4)->shape().layout(),
    319                               batchnorm->shape().tuple_shapes(0).layout()))
    320             << batchnorm->ToString();
    321       }
    322     }
    323   }
    324 }
    325 
    326 }  // namespace
    327 }  // namespace gpu
    328 }  // namespace xla
    329