Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include <memory>
     17 #include <vector>
     18 
     19 #include "tensorflow/contrib/coder/kernels/range_coder.h"
     20 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
     21 #include "tensorflow/core/common_runtime/shape_refiner.h"
     22 #include "tensorflow/core/framework/fake_input.h"
     23 #include "tensorflow/core/framework/graph.pb.h"
     24 #include "tensorflow/core/framework/node_def.pb.h"
     25 #include "tensorflow/core/framework/node_def_builder.h"
     26 #include "tensorflow/core/framework/op.h"
     27 #include "tensorflow/core/framework/tensor_types.h"
     28 #include "tensorflow/core/framework/types.pb.h"
     29 #include "tensorflow/core/framework/versions.pb.h"
     30 #include "tensorflow/core/graph/graph.h"
     31 #include "tensorflow/core/graph/node_builder.h"
     32 #include "tensorflow/core/graph/testlib.h"
     33 #include "tensorflow/core/kernels/ops_testutil.h"
     34 #include "tensorflow/core/lib/core/bits.h"
     35 #include "tensorflow/core/lib/core/status_test_util.h"
     36 #include "tensorflow/core/lib/gtl/array_slice.h"
     37 #include "tensorflow/core/lib/random/random.h"
     38 #include "tensorflow/core/lib/random/simple_philox.h"
     39 #include "tensorflow/core/platform/test.h"
     40 #include "tensorflow/core/platform/test_benchmark.h"
     41 #include "tensorflow/core/public/session.h"
     42 #include "tensorflow/core/public/session_options.h"
     43 
     44 namespace tensorflow {
     45 namespace {
     46 int LogUniform(random::SimplePhilox* gen, uint32 n) {
     47   CHECK_GT(n, 0);
     48 
     49   // Split [0, n) into {0}, [1, 2), [2, 4), [4, 8), ..., [2^(m-1), n).
     50   const int m = Log2Ceiling(n);
     51 
     52   int outcome;
     53   do {
     54     // Uniform() consumes at least 32 bits per call, therefore this is somewhat
     55     // wasteful implementation. Since this is used only for test, we do not
     56     // refine this implementation further.
     57     const int k = gen->Uniform(m + 1) - 1;
     58     // If k == -1, then sample from {0}.
     59     // If k == 0, then sample from [1, 2).
     60     // If k == 1, then sample from [2, 4), ... and so on.
     61     if (k < 1) {
     62       outcome = k + 1;
     63     } else {
     64       outcome = (1 << k) + gen->Uniform(1 << k);
     65     }
     66   } while (n <= outcome);
     67   return outcome;
     68 }
     69 
     70 std::vector<int64> ComputeStrides(const TensorShape& shape) {
     71   std::vector<int64> stride(shape.dims());
     72   int64 current = 1;
     73   for (int i = shape.dims() - 1; i >= 0; --i) {
     74     stride[i] = current;
     75     current *= shape.dim_size(i);
     76   }
     77   return stride;
     78 }
     79 
     80 class RangeCoderOpsTest : public OpsTestBase {
     81  protected:
     82   Status RunEncodeOp(int precision, gtl::ArraySlice<Tensor> input,
     83                      Tensor* output) {
     84     TF_RETURN_IF_ERROR(NodeDefBuilder("encode", "RangeEncode")
     85                            .Input(tensorflow::FakeInput(DT_INT16))
     86                            .Input(tensorflow::FakeInput(DT_INT32))
     87                            .Attr("precision", precision)
     88                            .Finalize(node_def()));
     89     TF_RETURN_IF_ERROR(InitOp());
     90 
     91     inputs_.clear();
     92     std::vector<Tensor> copies(input.size());
     93     for (int i = 0; i < input.size(); ++i) {
     94       copies[i] = input[i];
     95       inputs_.emplace_back(&copies[i]);
     96     }
     97 
     98     TF_RETURN_IF_ERROR(RunOpKernel());
     99 
    100     *output = *GetOutput(0);
    101     inputs_.clear();
    102 
    103     return Status::OK();
    104   }
    105 
    106   Status RunDecodeOp(int precision, gtl::ArraySlice<Tensor> input,
    107                      Tensor* output) {
    108     TF_RETURN_IF_ERROR(NodeDefBuilder("decode", "RangeDecode")
    109                            .Input(tensorflow::FakeInput(DT_STRING))
    110                            .Input(tensorflow::FakeInput(DT_INT32))
    111                            .Input(tensorflow::FakeInput(DT_INT32))
    112                            .Attr("precision", precision)
    113                            .Finalize(node_def()));
    114     TF_RETURN_IF_ERROR(InitOp());
    115 
    116     inputs_.clear();
    117     std::vector<Tensor> copies(input.size());
    118     for (int i = 0; i < input.size(); ++i) {
    119       copies[i] = input[i];
    120       inputs_.emplace_back(&copies[i]);
    121     }
    122 
    123     TF_RETURN_IF_ERROR(RunOpKernel());
    124 
    125     *output = *GetOutput(0);
    126     inputs_.clear();
    127 
    128     return Status::OK();
    129   }
    130 
    131   void TestEncodeAndDecode(int precision, const Tensor& data,
    132                            const Tensor& cdf) {
    133     Tensor encoded;
    134     TF_ASSERT_OK(RunEncodeOp(precision, {data, cdf}, &encoded));
    135 
    136     const TensorShape& data_shape = data.shape();
    137     Tensor shape{DT_INT32, {data_shape.dims()}};
    138     for (int i = 0; i < data_shape.dims(); ++i) {
    139       shape.flat<int32>()(i) = data_shape.dim_size(i);
    140     }
    141 
    142     Tensor decoded;
    143     TF_ASSERT_OK(RunDecodeOp(precision, {encoded, shape, cdf}, &decoded));
    144 
    145     EXPECT_EQ(decoded.dtype(), data.dtype());
    146     EXPECT_EQ(decoded.shape(), data.shape());
    147     EXPECT_EQ(decoded.tensor_data(), data.tensor_data());
    148   }
    149 
    150   void PopulateMaxValues(random::SimplePhilox* gen, Tensor* maxvalue_tensor,
    151                          int min_maxvalue, int max_maxvalue) {
    152     const int range = max_maxvalue - min_maxvalue;
    153     TTypes<int16>::Flat flat = maxvalue_tensor->flat<int16>();
    154 
    155     for (int64 i = 0; i < flat.size(); ++i) {
    156       flat(i) = min_maxvalue + gen->Uniform(range);
    157     }
    158   }
    159 
    160   void BuildCdf(random::SimplePhilox* gen, Tensor* data_tensor,
    161                 Tensor* cdf_tensor, const Tensor& maxvalue_tensor) {
    162     CHECK(TensorShapeUtils::StartsWith(cdf_tensor->shape(),
    163                                        maxvalue_tensor.shape()));
    164     CHECK_EQ(cdf_tensor->dims(), maxvalue_tensor.dims() + 1);
    165     const int64 chip_size = cdf_tensor->dim_size(cdf_tensor->dims() - 1);
    166 
    167     std::vector<int64> data_stride = ComputeStrides(data_tensor->shape());
    168     std::vector<int64> cdf_stride = ComputeStrides(cdf_tensor->shape());
    169 
    170     for (int i = 0; i < cdf_tensor->dims(); ++i) {
    171       if (cdf_tensor->dim_size(i) == 1) {
    172         cdf_stride[i] = 0;
    173       }
    174     }
    175 
    176     Tensor histogram_tensor{DT_INT32, cdf_tensor->shape()};
    177     TTypes<int16>::Flat data = data_tensor->flat<int16>();
    178     TTypes<int32>::Flat histogram = histogram_tensor.flat<int32>();
    179     TTypes<int16>::ConstFlat maxvalue = maxvalue_tensor.flat<int16>();
    180     histogram.setZero();
    181 
    182     for (int64 index = 0; index < data.size(); ++index) {
    183       int64 temp = index;
    184       int64 offset = 0;
    185       for (int dim = 0; dim < data_stride.size(); ++dim) {
    186         const int64 coord = temp / data_stride[dim];
    187         offset += coord * cdf_stride[dim];
    188         temp -= coord * data_stride[dim];
    189       }
    190       ASSERT_EQ(temp, 0);
    191 
    192       const int64 maxvalue_offset = offset / chip_size;
    193       CHECK_EQ(maxvalue_offset * chip_size, offset);
    194       CHECK_LT(maxvalue(maxvalue_offset) + 1, chip_size);
    195       const int value = LogUniform(gen, maxvalue(maxvalue_offset));
    196       data(index) = value;
    197       histogram(offset + value + 1) += 1;
    198     }
    199 
    200     cdf_tensor->flat_inner_dims<int32, 2>() =
    201         histogram_tensor.flat_inner_dims<int32, 2>().cumsum(1);
    202   }
    203 };
    204 
    205 TEST_F(RangeCoderOpsTest, NoBroadcast) {
    206   constexpr int kPrecision = 14;
    207   constexpr int kMaxValue = 10;
    208 
    209   Tensor data{DT_INT16, {1, 32, 32, 16}};
    210   Tensor temp{DT_INT32, {1, 1, 1, 1, kMaxValue + 2}};
    211   Tensor maxvalue{DT_INT16, {1, 1, 1, 1}};
    212   maxvalue.flat<int16>()(0) = kMaxValue;
    213 
    214   ASSERT_LE(data.shape().num_elements(), 1 << kPrecision);
    215 
    216   random::PhiloxRandom philox(random::New64(), random::New64());
    217   random::SimplePhilox gen(&philox);
    218   BuildCdf(&gen, &data, &temp, maxvalue);
    219 
    220   const Eigen::array<int32, 5> broadcast = {1, 32, 32, 16, 1};
    221 
    222   Tensor cdf{DT_INT32, {1, 32, 32, 16, kMaxValue + 2}};
    223   cdf.tensor<int32, 5>() = temp.tensor<int32, 5>().broadcast(broadcast);
    224 
    225   TestEncodeAndDecode(kPrecision, data, cdf);
    226 }
    227 
    228 TEST_F(RangeCoderOpsTest, Broadcast1Axis) {
    229   constexpr int kPrecision = 9;
    230   constexpr int kDimensionSize = 1 << kPrecision;
    231   constexpr int kMinMaxValue = 10;
    232   constexpr int kMaxMaxValue = 64;
    233 
    234   random::PhiloxRandom philox(random::New64(), random::New64());
    235   random::SimplePhilox gen(&philox);
    236   Tensor data{DT_INT16, {1, kDimensionSize, kDimensionSize}};
    237 
    238   Tensor maxvalue{DT_INT16, {kDimensionSize}};
    239   PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue);
    240 
    241   {
    242     // Axis 1.
    243     Tensor maxvalue1;
    244     ASSERT_TRUE(maxvalue1.CopyFrom(maxvalue, {1, 1, kDimensionSize}));
    245 
    246     Tensor cdf{DT_INT32, {1, 1, kDimensionSize, kMaxMaxValue + 2}};
    247     BuildCdf(&gen, &data, &cdf, maxvalue1);
    248     TestEncodeAndDecode(kPrecision, data, cdf);
    249   }
    250 
    251   {
    252     // Axis 2.
    253     Tensor maxvalue2;
    254     ASSERT_TRUE(maxvalue2.CopyFrom(maxvalue, {1, kDimensionSize, 1}));
    255 
    256     Tensor cdf{DT_INT32, {1, kDimensionSize, 1, kMaxMaxValue + 2}};
    257     BuildCdf(&gen, &data, &cdf, maxvalue2);
    258     TestEncodeAndDecode(kPrecision, data, cdf);
    259   }
    260 }
    261 
    262 TEST_F(RangeCoderOpsTest, Broadcast2Axes) {
    263   constexpr int kPrecision = 13;
    264   constexpr int kDimensionSize1 = 1 << (kPrecision / 2);
    265   constexpr int kDimensionSize2 = 1 << (kPrecision - kPrecision / 2);
    266   constexpr int kMinMaxValue = 10;
    267   constexpr int kMaxMaxValue = 64;
    268 
    269   random::PhiloxRandom philox(random::New64(), random::New64());
    270   random::SimplePhilox gen(&philox);
    271   Tensor maxvalue{DT_INT16, {2, 1, 1, 7}};
    272   PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue);
    273 
    274   Tensor data{DT_INT16, {2, kDimensionSize1, kDimensionSize2, 7}};
    275   Tensor cdf{DT_INT32, {2, 1, 1, 7, kMaxMaxValue + 2}};
    276   BuildCdf(&gen, &data, &cdf, maxvalue);
    277   TestEncodeAndDecode(kPrecision, data, cdf);
    278 }
    279 
    280 TEST_F(RangeCoderOpsTest, InvalidCdfShape) {
    281   Tensor data{DT_INT16, {3, 3}};
    282   Tensor cdf{DT_INT32, {3, 3}};
    283 
    284   Tensor unused;
    285   {
    286     const Status status = RunEncodeOp(10, {data, cdf}, &unused);
    287     EXPECT_FALSE(status.ok());
    288     EXPECT_NE(status.error_message().find("`cdf` should have one more axis"),
    289               string::npos);
    290   }
    291 
    292   Tensor empty{DT_STRING, {}};
    293   Tensor shape{DT_INT32, {2}};
    294   shape.vec<int32>().setValues({3, 3});
    295   {
    296     const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
    297     EXPECT_FALSE(status.ok());
    298     EXPECT_NE(status.error_message().find("`cdf` should have one more axis"),
    299               string::npos);
    300   }
    301 
    302   cdf = Tensor{DT_INT32, {3, 3, 1}};
    303   {
    304     const Status status = RunEncodeOp(10, {data, cdf}, &unused);
    305     EXPECT_FALSE(status.ok());
    306     EXPECT_NE(
    307         status.error_message().find("last dimension of `cdf` should be > 1"),
    308         string::npos);
    309   }
    310   {
    311     const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
    312     EXPECT_FALSE(status.ok());
    313     EXPECT_NE(
    314         status.error_message().find("last dimension of `cdf` should be > 1"),
    315         string::npos);
    316   }
    317 }
    318 
    319 TEST_F(RangeCoderOpsTest, DecoderShapeFn) {
    320   Tensor encoded_tensor{DT_STRING, {}};
    321   Tensor shape_tensor{DT_INT32, {3}};
    322   Tensor cdf_tensor{DT_INT32, {4, 6, 8, 2}};
    323 
    324   shape_tensor.flat<int32>().setValues({4, 6, 8});
    325 
    326   Graph g{OpRegistry::Global()};
    327   Node* encoded = test::graph::Constant(&g, encoded_tensor);
    328   Node* shape = test::graph::Constant(&g, shape_tensor);
    329   Node* cdf = test::graph::Constant(&g, cdf_tensor);
    330   Node* decode;
    331   TF_ASSERT_OK(NodeBuilder("range_decode", "RangeDecode", g.op_registry())
    332                    .Input(encoded)
    333                    .Input(shape)
    334                    .Input(cdf)
    335                    .Attr("precision", 10)
    336                    .Finalize(&g, &decode));
    337 
    338   ShapeRefiner refiner{g.versions().producer(), g.op_registry()};
    339   TF_ASSERT_OK(refiner.AddNode(encoded));
    340   TF_ASSERT_OK(refiner.AddNode(shape));
    341   TF_ASSERT_OK(refiner.AddNode(cdf));
    342   TF_ASSERT_OK(refiner.AddNode(decode));
    343 
    344   auto* context = refiner.GetContext(decode);
    345   ASSERT_NE(context, nullptr);
    346 
    347   ASSERT_EQ(context->num_outputs(), 1);
    348   auto shape_handle = context->output(0);
    349 
    350   ASSERT_EQ(context->Rank(shape_handle), 3);
    351   EXPECT_EQ(context->Value(context->Dim(shape_handle, 0)), 4);
    352   EXPECT_EQ(context->Value(context->Dim(shape_handle, 1)), 6);
    353   EXPECT_EQ(context->Value(context->Dim(shape_handle, 2)), 8);
    354 }
    355 
    356 TEST_F(RangeCoderOpsTest, InvalidBroadcast) {
    357   Tensor data{DT_INT16, {3, 3}};
    358   Tensor cdf{DT_INT32, {3, 2, 2}};
    359 
    360   Tensor unused;
    361   {
    362     const Status status = RunEncodeOp(10, {data, cdf}, &unused);
    363     EXPECT_FALSE(status.ok());
    364     EXPECT_NE(status.error_message().find("Cannot broadcast shape"),
    365               string::npos);
    366   }
    367 
    368   data = Tensor{DT_INT16, {3, 1}};
    369   cdf = Tensor{DT_INT32, {3, 3, 2}};
    370   Tensor empty{DT_STRING, {}};
    371   Tensor shape{DT_INT32, {2}};
    372   shape.vec<int32>().setValues({3, 1});
    373   {
    374     const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
    375     EXPECT_FALSE(status.ok());
    376     EXPECT_NE(status.error_message().find("Cannot broadcast shape"),
    377               string::npos);
    378   }
    379 
    380   std::vector<int64> shape_vector = {2, 2, 2, 2, 2, 2, 2, 2, 2};
    381   data = Tensor{DT_INT16, TensorShape{shape_vector}};
    382   cdf = Tensor{DT_INT32, {2, 1, 2, 1, 2, 1, 2, 1, 2, 2}};
    383   {
    384     const Status status = RunEncodeOp(10, {data, cdf}, &unused);
    385     EXPECT_FALSE(status.ok());
    386     EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos);
    387   }
    388 
    389   shape = Tensor{DT_INT32, {static_cast<int64>(shape_vector.size())}};
    390   for (int i = 0; i < shape_vector.size(); ++i) {
    391     shape.flat<int32>()(i) = shape_vector[i];
    392   }
    393   {
    394     const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
    395     EXPECT_FALSE(status.ok());
    396     EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos);
    397   }
    398 }
    399 
    400 // Benchmark -------------------------------------------------------------
    401 
    402 // This function creates RangeEncode graph with CDF built from a separate data
    403 // sample.
    404 Graph* CreateRangeEncodeFullBroadcastGraph(const TensorShape& shape,
    405                                            int precision) {
    406   CHECK_EQ(shape.dims(), 4);
    407 
    408   constexpr int kAlphabetSize = 70;
    409 
    410   Tensor histogram{DT_INT32, {kAlphabetSize + 1}};
    411   TTypes<int32>::Vec h = histogram.vec<int32>();
    412   h.setConstant(1);
    413   h(0) = 0;
    414 
    415   random::PhiloxRandom philox(random::New64(), random::New64());
    416   random::SimplePhilox gen(&philox);
    417   for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) {
    418     const int value = LogUniform(&gen, kAlphabetSize - 1);
    419     h(value + 1) += 1;
    420   }
    421 
    422   Tensor cdf{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}};
    423   cdf.flat<int32>() = h.cumsum(0);
    424 
    425   Tensor data{DT_INT16, shape};
    426   TTypes<int16>::Flat d = data.flat<int16>();
    427   for (int64 i = 0; i < d.size(); ++i) {
    428     d(i) = LogUniform(&gen, kAlphabetSize - 1);
    429   }
    430 
    431   Graph* g = new Graph(OpRegistry::Global());
    432   TF_CHECK_OK(NodeBuilder("range_encode", "RangeEncode", g->op_registry())
    433                   .Input(test::graph::Constant(g, data))
    434                   .Input(test::graph::Constant(g, cdf))
    435                   .Attr("precision", precision)
    436                   .Finalize(g, nullptr));
    437   return g;
    438 }
    439 
    440 // This function creates RangeDecode graph with CDF built from a separate data
    441 // sample.
    442 Graph* CreateRangeDecodeFullBroadcastGraph(const TensorShape& shape,
    443                                            int precision) {
    444   CHECK_EQ(shape.dims(), 4);
    445 
    446   constexpr int kAlphabetSize = 200;
    447   const int64 num_elements = shape.num_elements();
    448 
    449   Tensor histogram{DT_INT32, {kAlphabetSize + 1}};
    450   TTypes<int32>::Vec h = histogram.vec<int32>();
    451   h.setConstant(1);
    452   h(0) = 0;
    453 
    454   random::PhiloxRandom philox(random::New64(), random::New64());
    455   random::SimplePhilox gen(&philox);
    456   for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) {
    457     const int value = LogUniform(&gen, kAlphabetSize - 1);
    458     h(value + 1) += 1;
    459   }
    460 
    461   Tensor cdf_tensor{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}};
    462   TTypes<int32>::Flat cdf = cdf_tensor.flat<int32>();
    463   cdf = h.cumsum(0);
    464 
    465   Tensor string_tensor{DT_STRING, TensorShape{}};
    466   string& sink = string_tensor.scalar<string>()();
    467 
    468   RangeEncoder encoder{precision};
    469   for (int64 i = 0; i < num_elements; ++i) {
    470     const int value = LogUniform(&gen, kAlphabetSize - 1);
    471     encoder.Encode(cdf(value), cdf(value + 1), &sink);
    472   }
    473   encoder.Finalize(&sink);
    474 
    475   Tensor shape_tensor{DT_INT32, {shape.dims()}};
    476   for (int i = 0; i < shape.dims(); ++i) {
    477     shape_tensor.flat<int32>()(i) = shape.dim_size(i);
    478   }
    479 
    480   Graph* g = new Graph(OpRegistry::Global());
    481   TF_CHECK_OK(NodeBuilder("range_decode", "RangeDecode", g->op_registry())
    482                   .Input(test::graph::Constant(g, string_tensor))
    483                   .Input(test::graph::Constant(g, shape_tensor))
    484                   .Input(test::graph::Constant(g, cdf_tensor))
    485                   .Attr("precision", precision)
    486                   .Finalize(g, nullptr));
    487   return g;
    488 }
    489 
    490 void RunTensorFlowBenchmark(int iters, Graph* g, int64 num_elements) {
    491   SessionOptions opts;
    492   opts.config.set_intra_op_parallelism_threads(1);
    493   opts.config.set_inter_op_parallelism_threads(1);
    494 
    495   testing::UseRealTime();
    496   test::Benchmark("cpu", g, &opts).Run(iters);
    497 
    498   const int64 num_items = static_cast<int64>(iters) * num_elements;
    499   testing::ItemsProcessed(num_items);
    500 }
    501 
    502 void BM_RangeEncodeFullBroadcast(int iters, int code_size) {
    503   constexpr int kPrecision = 14;
    504   const TensorShape shape = {1, code_size, code_size, 256};
    505   Graph* g = CreateRangeEncodeFullBroadcastGraph(shape, kPrecision);
    506   RunTensorFlowBenchmark(iters, g, shape.num_elements());
    507 }
    508 
    509 BENCHMARK(BM_RangeEncodeFullBroadcast)->Arg(32)->Arg(64);
    510 
    511 void BM_RangeDecodeFullBroadcast(int iters, int code_size) {
    512   constexpr int kPrecision = 14;
    513   const TensorShape shape = {1, code_size, code_size, 256};
    514   Graph* g = CreateRangeDecodeFullBroadcastGraph(shape, kPrecision);
    515   RunTensorFlowBenchmark(iters, g, shape.num_elements());
    516 }
    517 
    518 BENCHMARK(BM_RangeDecodeFullBroadcast)->Arg(32)->Arg(64);
    519 
    520 }  // namespace
    521 }  // namespace tensorflow
    522