Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include <memory>
     17 #include <vector>
     18 
     19 #include "tensorflow/contrib/coder/kernels/range_coder.h"
     20 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
     21 #include "tensorflow/core/common_runtime/shape_refiner.h"
     22 #include "tensorflow/core/framework/fake_input.h"
     23 #include "tensorflow/core/framework/node_def.pb.h"
     24 #include "tensorflow/core/framework/node_def_builder.h"
     25 #include "tensorflow/core/framework/op.h"
     26 #include "tensorflow/core/framework/tensor_types.h"
     27 #include "tensorflow/core/framework/types.pb.h"
     28 #include "tensorflow/core/framework/versions.pb.h"
     29 #include "tensorflow/core/graph/graph.h"
     30 #include "tensorflow/core/graph/node_builder.h"
     31 #include "tensorflow/core/graph/testlib.h"
     32 #include "tensorflow/core/kernels/ops_testutil.h"
     33 #include "tensorflow/core/lib/core/bits.h"
     34 #include "tensorflow/core/lib/core/status_test_util.h"
     35 #include "tensorflow/core/lib/gtl/array_slice.h"
     36 #include "tensorflow/core/lib/random/random.h"
     37 #include "tensorflow/core/lib/random/simple_philox.h"
     38 #include "tensorflow/core/platform/test.h"
     39 #include "tensorflow/core/platform/test_benchmark.h"
     40 #include "tensorflow/core/public/session.h"
     41 #include "tensorflow/core/public/session_options.h"
     42 
     43 namespace tensorflow {
     44 namespace {
     45 int LogUniform(random::SimplePhilox* gen, uint32 n) {
     46   CHECK_GT(n, 0);
     47 
     48   // Split [0, n) into {0}, [1, 2), [2, 4), [4, 8), ..., [2^(m-1), n).
     49   const int m = Log2Ceiling(n);
     50 
     51   int outcome;
     52   do {
     53     // Uniform() consumes at least 32 bits per call, therefore this is somewhat
     54     // wasteful implementation. Since this is used only for test, we do not
     55     // refine this implementation further.
     56     const int k = gen->Uniform(m + 1) - 1;
     57     // If k == -1, then sample from {0}.
     58     // If k == 0, then sample from [1, 2).
     59     // If k == 1, then sample from [2, 4), ... and so on.
     60     if (k < 1) {
     61       outcome = k + 1;
     62     } else {
     63       outcome = (1 << k) + gen->Uniform(1 << k);
     64     }
     65   } while (n <= outcome);
     66   return outcome;
     67 }
     68 
     69 std::vector<int64> ComputeStrides(const TensorShape& shape) {
     70   std::vector<int64> stride(shape.dims());
     71   int64 current = 1;
     72   for (int i = shape.dims() - 1; i >= 0; --i) {
     73     stride[i] = current;
     74     current *= shape.dim_size(i);
     75   }
     76   return stride;
     77 }
     78 
     79 class RangeCoderOpsTest : public OpsTestBase {
     80  protected:
     81   Status RunEncodeOp(int precision, gtl::ArraySlice<Tensor> input,
     82                      Tensor* output) {
     83     TF_RETURN_IF_ERROR(NodeDefBuilder("encode", "RangeEncode")
     84                            .Input(tensorflow::FakeInput(DT_INT16))
     85                            .Input(tensorflow::FakeInput(DT_INT32))
     86                            .Attr("precision", precision)
     87                            .Finalize(node_def()));
     88     TF_RETURN_IF_ERROR(InitOp());
     89 
     90     inputs_.clear();
     91     std::vector<Tensor> copies(input.size());
     92     for (int i = 0; i < input.size(); ++i) {
     93       copies[i] = input[i];
     94       inputs_.emplace_back(&copies[i]);
     95     }
     96 
     97     TF_RETURN_IF_ERROR(RunOpKernel());
     98 
     99     *output = *GetOutput(0);
    100     inputs_.clear();
    101 
    102     return Status::OK();
    103   }
    104 
    105   Status RunDecodeOp(int precision, gtl::ArraySlice<Tensor> input,
    106                      Tensor* output) {
    107     TF_RETURN_IF_ERROR(NodeDefBuilder("decode", "RangeDecode")
    108                            .Input(tensorflow::FakeInput(DT_STRING))
    109                            .Input(tensorflow::FakeInput(DT_INT32))
    110                            .Input(tensorflow::FakeInput(DT_INT32))
    111                            .Attr("precision", precision)
    112                            .Finalize(node_def()));
    113     TF_RETURN_IF_ERROR(InitOp());
    114 
    115     inputs_.clear();
    116     std::vector<Tensor> copies(input.size());
    117     for (int i = 0; i < input.size(); ++i) {
    118       copies[i] = input[i];
    119       inputs_.emplace_back(&copies[i]);
    120     }
    121 
    122     TF_RETURN_IF_ERROR(RunOpKernel());
    123 
    124     *output = *GetOutput(0);
    125     inputs_.clear();
    126 
    127     return Status::OK();
    128   }
    129 
    130   void TestEncodeAndDecode(int precision, const Tensor& data,
    131                            const Tensor& cdf) {
    132     Tensor encoded;
    133     TF_ASSERT_OK(RunEncodeOp(precision, {data, cdf}, &encoded));
    134 
    135     const TensorShape& data_shape = data.shape();
    136     Tensor shape{DT_INT32, {data_shape.dims()}};
    137     for (int i = 0; i < data_shape.dims(); ++i) {
    138       shape.flat<int32>()(i) = data_shape.dim_size(i);
    139     }
    140 
    141     Tensor decoded;
    142     TF_ASSERT_OK(RunDecodeOp(precision, {encoded, shape, cdf}, &decoded));
    143 
    144     EXPECT_EQ(decoded.dtype(), data.dtype());
    145     EXPECT_EQ(decoded.shape(), data.shape());
    146     EXPECT_EQ(decoded.tensor_data(), data.tensor_data());
    147   }
    148 
    149   void PopulateMaxValues(random::SimplePhilox* gen, Tensor* maxvalue_tensor,
    150                          int min_maxvalue, int max_maxvalue) {
    151     const int range = max_maxvalue - min_maxvalue;
    152     TTypes<int16>::Flat flat = maxvalue_tensor->flat<int16>();
    153 
    154     for (int64 i = 0; i < flat.size(); ++i) {
    155       flat(i) = min_maxvalue + gen->Uniform(range);
    156     }
    157   }
    158 
    159   void BuildCdf(random::SimplePhilox* gen, Tensor* data_tensor,
    160                 Tensor* cdf_tensor, const Tensor& maxvalue_tensor) {
    161     CHECK(TensorShapeUtils::StartsWith(cdf_tensor->shape(),
    162                                        maxvalue_tensor.shape()));
    163     CHECK_EQ(cdf_tensor->dims(), maxvalue_tensor.dims() + 1);
    164     const int64 chip_size = cdf_tensor->dim_size(cdf_tensor->dims() - 1);
    165 
    166     std::vector<int64> data_stride = ComputeStrides(data_tensor->shape());
    167     std::vector<int64> cdf_stride = ComputeStrides(cdf_tensor->shape());
    168 
    169     for (int i = 0; i < cdf_tensor->dims(); ++i) {
    170       if (cdf_tensor->dim_size(i) == 1) {
    171         cdf_stride[i] = 0;
    172       }
    173     }
    174 
    175     Tensor histogram_tensor{DT_INT32, cdf_tensor->shape()};
    176     TTypes<int16>::Flat data = data_tensor->flat<int16>();
    177     TTypes<int32>::Flat histogram = histogram_tensor.flat<int32>();
    178     TTypes<int16>::ConstFlat maxvalue = maxvalue_tensor.flat<int16>();
    179     histogram.setZero();
    180 
    181     for (int64 index = 0; index < data.size(); ++index) {
    182       int64 temp = index;
    183       int64 offset = 0;
    184       for (int dim = 0; dim < data_stride.size(); ++dim) {
    185         const int64 coord = temp / data_stride[dim];
    186         offset += coord * cdf_stride[dim];
    187         temp -= coord * data_stride[dim];
    188       }
    189       ASSERT_EQ(temp, 0);
    190 
    191       const int64 maxvalue_offset = offset / chip_size;
    192       CHECK_EQ(maxvalue_offset * chip_size, offset);
    193       CHECK_LT(maxvalue(maxvalue_offset) + 1, chip_size);
    194       const int value = LogUniform(gen, maxvalue(maxvalue_offset));
    195       data(index) = value;
    196       histogram(offset + value + 1) += 1;
    197     }
    198 
    199     cdf_tensor->flat_inner_dims<int32, 2>() =
    200         histogram_tensor.flat_inner_dims<int32, 2>().cumsum(1);
    201   }
    202 };
    203 
    204 TEST_F(RangeCoderOpsTest, NoBroadcast) {
    205   constexpr int kPrecision = 14;
    206   constexpr int kMaxValue = 10;
    207 
    208   Tensor data{DT_INT16, {1, 32, 32, 16}};
    209   Tensor temp{DT_INT32, {1, 1, 1, 1, kMaxValue + 2}};
    210   Tensor maxvalue{DT_INT16, {1, 1, 1, 1}};
    211   maxvalue.flat<int16>()(0) = kMaxValue;
    212 
    213   ASSERT_LE(data.shape().num_elements(), 1 << kPrecision);
    214 
    215   random::PhiloxRandom philox(random::New64(), random::New64());
    216   random::SimplePhilox gen(&philox);
    217   BuildCdf(&gen, &data, &temp, maxvalue);
    218 
    219   const Eigen::array<int32, 5> broadcast = {1, 32, 32, 16, 1};
    220 
    221   Tensor cdf{DT_INT32, {1, 32, 32, 16, kMaxValue + 2}};
    222   cdf.tensor<int32, 5>() = temp.tensor<int32, 5>().broadcast(broadcast);
    223 
    224   TestEncodeAndDecode(kPrecision, data, cdf);
    225 }
    226 
    227 TEST_F(RangeCoderOpsTest, Broadcast1Axis) {
    228   constexpr int kPrecision = 9;
    229   constexpr int kDimensionSize = 1 << kPrecision;
    230   constexpr int kMinMaxValue = 10;
    231   constexpr int kMaxMaxValue = 64;
    232 
    233   random::PhiloxRandom philox(random::New64(), random::New64());
    234   random::SimplePhilox gen(&philox);
    235   Tensor data{DT_INT16, {1, kDimensionSize, kDimensionSize}};
    236 
    237   Tensor maxvalue{DT_INT16, {kDimensionSize}};
    238   PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue);
    239 
    240   {
    241     // Axis 1.
    242     Tensor maxvalue1;
    243     ASSERT_TRUE(maxvalue1.CopyFrom(maxvalue, {1, 1, kDimensionSize}));
    244 
    245     Tensor cdf{DT_INT32, {1, 1, kDimensionSize, kMaxMaxValue + 2}};
    246     BuildCdf(&gen, &data, &cdf, maxvalue1);
    247     TestEncodeAndDecode(kPrecision, data, cdf);
    248   }
    249 
    250   {
    251     // Axis 2.
    252     Tensor maxvalue2;
    253     ASSERT_TRUE(maxvalue2.CopyFrom(maxvalue, {1, kDimensionSize, 1}));
    254 
    255     Tensor cdf{DT_INT32, {1, kDimensionSize, 1, kMaxMaxValue + 2}};
    256     BuildCdf(&gen, &data, &cdf, maxvalue2);
    257     TestEncodeAndDecode(kPrecision, data, cdf);
    258   }
    259 }
    260 
    261 TEST_F(RangeCoderOpsTest, Broadcast2Axes) {
    262   constexpr int kPrecision = 13;
    263   constexpr int kDimensionSize1 = 1 << (kPrecision / 2);
    264   constexpr int kDimensionSize2 = 1 << (kPrecision - kPrecision / 2);
    265   constexpr int kMinMaxValue = 10;
    266   constexpr int kMaxMaxValue = 64;
    267 
    268   random::PhiloxRandom philox(random::New64(), random::New64());
    269   random::SimplePhilox gen(&philox);
    270   Tensor maxvalue{DT_INT16, {2, 1, 1, 7}};
    271   PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue);
    272 
    273   Tensor data{DT_INT16, {2, kDimensionSize1, kDimensionSize2, 7}};
    274   Tensor cdf{DT_INT32, {2, 1, 1, 7, kMaxMaxValue + 2}};
    275   BuildCdf(&gen, &data, &cdf, maxvalue);
    276   TestEncodeAndDecode(kPrecision, data, cdf);
    277 }
    278 
    279 TEST_F(RangeCoderOpsTest, InvalidCdfShape) {
    280   Tensor data{DT_INT16, {3, 3}};
    281   Tensor cdf{DT_INT32, {3, 3}};
    282 
    283   Tensor unused;
    284   {
    285     const Status status = RunEncodeOp(10, {data, cdf}, &unused);
    286     EXPECT_FALSE(status.ok());
    287     EXPECT_NE(status.error_message().find("`cdf` should have one more axis"),
    288               string::npos);
    289   }
    290 
    291   Tensor empty{DT_STRING, {}};
    292   Tensor shape{DT_INT32, {2}};
    293   shape.vec<int32>().setValues({3, 3});
    294   {
    295     const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
    296     EXPECT_FALSE(status.ok());
    297     EXPECT_NE(status.error_message().find("`cdf` should have one more axis"),
    298               string::npos);
    299   }
    300 
    301   cdf = Tensor{DT_INT32, {3, 3, 1}};
    302   {
    303     const Status status = RunEncodeOp(10, {data, cdf}, &unused);
    304     EXPECT_FALSE(status.ok());
    305     EXPECT_NE(
    306         status.error_message().find("last dimension of `cdf` should be > 1"),
    307         string::npos);
    308   }
    309   {
    310     const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
    311     EXPECT_FALSE(status.ok());
    312     EXPECT_NE(
    313         status.error_message().find("last dimension of `cdf` should be > 1"),
    314         string::npos);
    315   }
    316 }
    317 
    318 TEST_F(RangeCoderOpsTest, DecoderShapeFn) {
    319   Tensor encoded_tensor{DT_STRING, {}};
    320   Tensor shape_tensor{DT_INT32, {3}};
    321   Tensor cdf_tensor{DT_INT32, {4, 6, 8, 2}};
    322 
    323   shape_tensor.flat<int32>().setValues({4, 6, 8});
    324 
    325   Graph g{OpRegistry::Global()};
    326   Node* encoded = test::graph::Constant(&g, encoded_tensor);
    327   Node* shape = test::graph::Constant(&g, shape_tensor);
    328   Node* cdf = test::graph::Constant(&g, cdf_tensor);
    329   Node* decode;
    330   TF_ASSERT_OK(NodeBuilder("range_decode", "RangeDecode", g.op_registry())
    331                    .Input(encoded)
    332                    .Input(shape)
    333                    .Input(cdf)
    334                    .Attr("precision", 10)
    335                    .Finalize(&g, &decode));
    336 
    337   ShapeRefiner refiner{g.versions().producer(), g.op_registry()};
    338   TF_ASSERT_OK(refiner.AddNode(encoded));
    339   TF_ASSERT_OK(refiner.AddNode(shape));
    340   TF_ASSERT_OK(refiner.AddNode(cdf));
    341   TF_ASSERT_OK(refiner.AddNode(decode));
    342 
    343   auto* context = refiner.GetContext(decode);
    344   ASSERT_NE(context, nullptr);
    345 
    346   ASSERT_EQ(context->num_outputs(), 1);
    347   auto shape_handle = context->output(0);
    348 
    349   ASSERT_EQ(context->Rank(shape_handle), 3);
    350   EXPECT_EQ(context->Value(context->Dim(shape_handle, 0)), 4);
    351   EXPECT_EQ(context->Value(context->Dim(shape_handle, 1)), 6);
    352   EXPECT_EQ(context->Value(context->Dim(shape_handle, 2)), 8);
    353 }
    354 
    355 TEST_F(RangeCoderOpsTest, InvalidBroadcast) {
    356   Tensor data{DT_INT16, {3, 3}};
    357   Tensor cdf{DT_INT32, {3, 2, 2}};
    358 
    359   Tensor unused;
    360   {
    361     const Status status = RunEncodeOp(10, {data, cdf}, &unused);
    362     EXPECT_FALSE(status.ok());
    363     EXPECT_NE(status.error_message().find("Cannot broadcast shape"),
    364               string::npos);
    365   }
    366 
    367   data = Tensor{DT_INT16, {3, 1}};
    368   cdf = Tensor{DT_INT32, {3, 3, 2}};
    369   Tensor empty{DT_STRING, {}};
    370   Tensor shape{DT_INT32, {2}};
    371   shape.vec<int32>().setValues({3, 1});
    372   {
    373     const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
    374     EXPECT_FALSE(status.ok());
    375     EXPECT_NE(status.error_message().find("Cannot broadcast shape"),
    376               string::npos);
    377   }
    378 
    379   std::vector<int64> shape_vector = {2, 2, 2, 2, 2, 2, 2, 2, 2};
    380   data = Tensor{DT_INT16, TensorShape{shape_vector}};
    381   cdf = Tensor{DT_INT32, {2, 1, 2, 1, 2, 1, 2, 1, 2, 2}};
    382   {
    383     const Status status = RunEncodeOp(10, {data, cdf}, &unused);
    384     EXPECT_FALSE(status.ok());
    385     EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos);
    386   }
    387 
    388   shape = Tensor{DT_INT32, {static_cast<int64>(shape_vector.size())}};
    389   for (int i = 0; i < shape_vector.size(); ++i) {
    390     shape.flat<int32>()(i) = shape_vector[i];
    391   }
    392   {
    393     const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
    394     EXPECT_FALSE(status.ok());
    395     EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos);
    396   }
    397 }
    398 
    399 // Benchmark -------------------------------------------------------------
    400 
    401 // This function creates RangeEncode graph with CDF built from a separate data
    402 // sample.
    403 Graph* CreateRangeEncodeFullBroadcastGraph(const TensorShape& shape,
    404                                            int precision) {
    405   CHECK_EQ(shape.dims(), 4);
    406 
    407   constexpr int kAlphabetSize = 70;
    408 
    409   Tensor histogram{DT_INT32, {kAlphabetSize + 1}};
    410   TTypes<int32>::Vec h = histogram.vec<int32>();
    411   h.setConstant(1);
    412   h(0) = 0;
    413 
    414   random::PhiloxRandom philox(random::New64(), random::New64());
    415   random::SimplePhilox gen(&philox);
    416   for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) {
    417     const int value = LogUniform(&gen, kAlphabetSize - 1);
    418     h(value + 1) += 1;
    419   }
    420 
    421   Tensor cdf{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}};
    422   cdf.flat<int32>() = h.cumsum(0);
    423 
    424   Tensor data{DT_INT16, shape};
    425   TTypes<int16>::Flat d = data.flat<int16>();
    426   for (int64 i = 0; i < d.size(); ++i) {
    427     d(i) = LogUniform(&gen, kAlphabetSize - 1);
    428   }
    429 
    430   Graph* g = new Graph(OpRegistry::Global());
    431   TF_CHECK_OK(NodeBuilder("range_encode", "RangeEncode", g->op_registry())
    432                   .Input(test::graph::Constant(g, data))
    433                   .Input(test::graph::Constant(g, cdf))
    434                   .Attr("precision", precision)
    435                   .Finalize(g, nullptr));
    436   return g;
    437 }
    438 
    439 // This function creates RangeDecode graph with CDF built from a separate data
    440 // sample.
    441 Graph* CreateRangeDecodeFullBroadcastGraph(const TensorShape& shape,
    442                                            int precision) {
    443   CHECK_EQ(shape.dims(), 4);
    444 
    445   constexpr int kAlphabetSize = 200;
    446   const int64 num_elements = shape.num_elements();
    447 
    448   Tensor histogram{DT_INT32, {kAlphabetSize + 1}};
    449   TTypes<int32>::Vec h = histogram.vec<int32>();
    450   h.setConstant(1);
    451   h(0) = 0;
    452 
    453   random::PhiloxRandom philox(random::New64(), random::New64());
    454   random::SimplePhilox gen(&philox);
    455   for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) {
    456     const int value = LogUniform(&gen, kAlphabetSize - 1);
    457     h(value + 1) += 1;
    458   }
    459 
    460   Tensor cdf_tensor{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}};
    461   TTypes<int32>::Flat cdf = cdf_tensor.flat<int32>();
    462   cdf = h.cumsum(0);
    463 
    464   Tensor string_tensor{DT_STRING, TensorShape{}};
    465   string& sink = string_tensor.scalar<string>()();
    466 
    467   RangeEncoder encoder{precision};
    468   for (int64 i = 0; i < num_elements; ++i) {
    469     const int value = LogUniform(&gen, kAlphabetSize - 1);
    470     encoder.Encode(cdf(value), cdf(value + 1), &sink);
    471   }
    472   encoder.Finalize(&sink);
    473 
    474   Tensor shape_tensor{DT_INT32, {shape.dims()}};
    475   for (int i = 0; i < shape.dims(); ++i) {
    476     shape_tensor.flat<int32>()(i) = shape.dim_size(i);
    477   }
    478 
    479   Graph* g = new Graph(OpRegistry::Global());
    480   TF_CHECK_OK(NodeBuilder("range_decode", "RangeDecode", g->op_registry())
    481                   .Input(test::graph::Constant(g, string_tensor))
    482                   .Input(test::graph::Constant(g, shape_tensor))
    483                   .Input(test::graph::Constant(g, cdf_tensor))
    484                   .Attr("precision", precision)
    485                   .Finalize(g, nullptr));
    486   return g;
    487 }
    488 
    489 void RunTensorFlowBenchmark(int iters, Graph* g, int64 num_elements) {
    490   SessionOptions opts;
    491   opts.config.set_intra_op_parallelism_threads(1);
    492   opts.config.set_inter_op_parallelism_threads(1);
    493 
    494   testing::UseRealTime();
    495   test::Benchmark("cpu", g, &opts).Run(iters);
    496 
    497   const int64 num_items = static_cast<int64>(iters) * num_elements;
    498   testing::ItemsProcessed(num_items);
    499 }
    500 
    501 void BM_RangeEncodeFullBroadcast(int iters, int code_size) {
    502   constexpr int kPrecision = 14;
    503   const TensorShape shape = {1, code_size, code_size, 256};
    504   Graph* g = CreateRangeEncodeFullBroadcastGraph(shape, kPrecision);
    505   RunTensorFlowBenchmark(iters, g, shape.num_elements());
    506 }
    507 
    508 BENCHMARK(BM_RangeEncodeFullBroadcast)->Arg(32)->Arg(64);
    509 
    510 void BM_RangeDecodeFullBroadcast(int iters, int code_size) {
    511   constexpr int kPrecision = 14;
    512   const TensorShape shape = {1, code_size, code_size, 256};
    513   Graph* g = CreateRangeDecodeFullBroadcastGraph(shape, kPrecision);
    514   RunTensorFlowBenchmark(iters, g, shape.num_elements());
    515 }
    516 
    517 BENCHMARK(BM_RangeDecodeFullBroadcast)->Arg(32)->Arg(64);
    518 
    519 }  // namespace
    520 }  // namespace tensorflow
    521