1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include <memory> 17 #include <vector> 18 19 #include "tensorflow/contrib/coder/kernels/range_coder.h" 20 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" 21 #include "tensorflow/core/common_runtime/shape_refiner.h" 22 #include "tensorflow/core/framework/fake_input.h" 23 #include "tensorflow/core/framework/graph.pb.h" 24 #include "tensorflow/core/framework/node_def.pb.h" 25 #include "tensorflow/core/framework/node_def_builder.h" 26 #include "tensorflow/core/framework/op.h" 27 #include "tensorflow/core/framework/tensor_types.h" 28 #include "tensorflow/core/framework/types.pb.h" 29 #include "tensorflow/core/framework/versions.pb.h" 30 #include "tensorflow/core/graph/graph.h" 31 #include "tensorflow/core/graph/node_builder.h" 32 #include "tensorflow/core/graph/testlib.h" 33 #include "tensorflow/core/kernels/ops_testutil.h" 34 #include "tensorflow/core/lib/core/bits.h" 35 #include "tensorflow/core/lib/core/status_test_util.h" 36 #include "tensorflow/core/lib/gtl/array_slice.h" 37 #include "tensorflow/core/lib/random/random.h" 38 #include "tensorflow/core/lib/random/simple_philox.h" 39 #include "tensorflow/core/platform/test.h" 40 #include "tensorflow/core/platform/test_benchmark.h" 41 #include "tensorflow/core/public/session.h" 42 #include "tensorflow/core/public/session_options.h" 43 44 namespace tensorflow { 45 namespace { 46 int LogUniform(random::SimplePhilox* gen, uint32 n) { 47 CHECK_GT(n, 0); 48 49 // Split [0, n) into {0}, [1, 2), [2, 4), [4, 8), ..., [2^(m-1), n). 50 const int m = Log2Ceiling(n); 51 52 int outcome; 53 do { 54 // Uniform() consumes at least 32 bits per call, therefore this is somewhat 55 // wasteful implementation. Since this is used only for test, we do not 56 // refine this implementation further. 57 const int k = gen->Uniform(m + 1) - 1; 58 // If k == -1, then sample from {0}. 59 // If k == 0, then sample from [1, 2). 60 // If k == 1, then sample from [2, 4), ... and so on. 61 if (k < 1) { 62 outcome = k + 1; 63 } else { 64 outcome = (1 << k) + gen->Uniform(1 << k); 65 } 66 } while (n <= outcome); 67 return outcome; 68 } 69 70 std::vector<int64> ComputeStrides(const TensorShape& shape) { 71 std::vector<int64> stride(shape.dims()); 72 int64 current = 1; 73 for (int i = shape.dims() - 1; i >= 0; --i) { 74 stride[i] = current; 75 current *= shape.dim_size(i); 76 } 77 return stride; 78 } 79 80 class RangeCoderOpsTest : public OpsTestBase { 81 protected: 82 Status RunEncodeOp(int precision, gtl::ArraySlice<Tensor> input, 83 Tensor* output) { 84 TF_RETURN_IF_ERROR(NodeDefBuilder("encode", "RangeEncode") 85 .Input(tensorflow::FakeInput(DT_INT16)) 86 .Input(tensorflow::FakeInput(DT_INT32)) 87 .Attr("precision", precision) 88 .Finalize(node_def())); 89 TF_RETURN_IF_ERROR(InitOp()); 90 91 inputs_.clear(); 92 std::vector<Tensor> copies(input.size()); 93 for (int i = 0; i < input.size(); ++i) { 94 copies[i] = input[i]; 95 inputs_.emplace_back(&copies[i]); 96 } 97 98 TF_RETURN_IF_ERROR(RunOpKernel()); 99 100 *output = *GetOutput(0); 101 inputs_.clear(); 102 103 return Status::OK(); 104 } 105 106 Status RunDecodeOp(int precision, gtl::ArraySlice<Tensor> input, 107 Tensor* output) { 108 TF_RETURN_IF_ERROR(NodeDefBuilder("decode", "RangeDecode") 109 .Input(tensorflow::FakeInput(DT_STRING)) 110 .Input(tensorflow::FakeInput(DT_INT32)) 111 .Input(tensorflow::FakeInput(DT_INT32)) 112 .Attr("precision", precision) 113 .Finalize(node_def())); 114 TF_RETURN_IF_ERROR(InitOp()); 115 116 inputs_.clear(); 117 std::vector<Tensor> copies(input.size()); 118 for (int i = 0; i < input.size(); ++i) { 119 copies[i] = input[i]; 120 inputs_.emplace_back(&copies[i]); 121 } 122 123 TF_RETURN_IF_ERROR(RunOpKernel()); 124 125 *output = *GetOutput(0); 126 inputs_.clear(); 127 128 return Status::OK(); 129 } 130 131 void TestEncodeAndDecode(int precision, const Tensor& data, 132 const Tensor& cdf) { 133 Tensor encoded; 134 TF_ASSERT_OK(RunEncodeOp(precision, {data, cdf}, &encoded)); 135 136 const TensorShape& data_shape = data.shape(); 137 Tensor shape{DT_INT32, {data_shape.dims()}}; 138 for (int i = 0; i < data_shape.dims(); ++i) { 139 shape.flat<int32>()(i) = data_shape.dim_size(i); 140 } 141 142 Tensor decoded; 143 TF_ASSERT_OK(RunDecodeOp(precision, {encoded, shape, cdf}, &decoded)); 144 145 EXPECT_EQ(decoded.dtype(), data.dtype()); 146 EXPECT_EQ(decoded.shape(), data.shape()); 147 EXPECT_EQ(decoded.tensor_data(), data.tensor_data()); 148 } 149 150 void PopulateMaxValues(random::SimplePhilox* gen, Tensor* maxvalue_tensor, 151 int min_maxvalue, int max_maxvalue) { 152 const int range = max_maxvalue - min_maxvalue; 153 TTypes<int16>::Flat flat = maxvalue_tensor->flat<int16>(); 154 155 for (int64 i = 0; i < flat.size(); ++i) { 156 flat(i) = min_maxvalue + gen->Uniform(range); 157 } 158 } 159 160 void BuildCdf(random::SimplePhilox* gen, Tensor* data_tensor, 161 Tensor* cdf_tensor, const Tensor& maxvalue_tensor) { 162 CHECK(TensorShapeUtils::StartsWith(cdf_tensor->shape(), 163 maxvalue_tensor.shape())); 164 CHECK_EQ(cdf_tensor->dims(), maxvalue_tensor.dims() + 1); 165 const int64 chip_size = cdf_tensor->dim_size(cdf_tensor->dims() - 1); 166 167 std::vector<int64> data_stride = ComputeStrides(data_tensor->shape()); 168 std::vector<int64> cdf_stride = ComputeStrides(cdf_tensor->shape()); 169 170 for (int i = 0; i < cdf_tensor->dims(); ++i) { 171 if (cdf_tensor->dim_size(i) == 1) { 172 cdf_stride[i] = 0; 173 } 174 } 175 176 Tensor histogram_tensor{DT_INT32, cdf_tensor->shape()}; 177 TTypes<int16>::Flat data = data_tensor->flat<int16>(); 178 TTypes<int32>::Flat histogram = histogram_tensor.flat<int32>(); 179 TTypes<int16>::ConstFlat maxvalue = maxvalue_tensor.flat<int16>(); 180 histogram.setZero(); 181 182 for (int64 index = 0; index < data.size(); ++index) { 183 int64 temp = index; 184 int64 offset = 0; 185 for (int dim = 0; dim < data_stride.size(); ++dim) { 186 const int64 coord = temp / data_stride[dim]; 187 offset += coord * cdf_stride[dim]; 188 temp -= coord * data_stride[dim]; 189 } 190 ASSERT_EQ(temp, 0); 191 192 const int64 maxvalue_offset = offset / chip_size; 193 CHECK_EQ(maxvalue_offset * chip_size, offset); 194 CHECK_LT(maxvalue(maxvalue_offset) + 1, chip_size); 195 const int value = LogUniform(gen, maxvalue(maxvalue_offset)); 196 data(index) = value; 197 histogram(offset + value + 1) += 1; 198 } 199 200 cdf_tensor->flat_inner_dims<int32, 2>() = 201 histogram_tensor.flat_inner_dims<int32, 2>().cumsum(1); 202 } 203 }; 204 205 TEST_F(RangeCoderOpsTest, NoBroadcast) { 206 constexpr int kPrecision = 14; 207 constexpr int kMaxValue = 10; 208 209 Tensor data{DT_INT16, {1, 32, 32, 16}}; 210 Tensor temp{DT_INT32, {1, 1, 1, 1, kMaxValue + 2}}; 211 Tensor maxvalue{DT_INT16, {1, 1, 1, 1}}; 212 maxvalue.flat<int16>()(0) = kMaxValue; 213 214 ASSERT_LE(data.shape().num_elements(), 1 << kPrecision); 215 216 random::PhiloxRandom philox(random::New64(), random::New64()); 217 random::SimplePhilox gen(&philox); 218 BuildCdf(&gen, &data, &temp, maxvalue); 219 220 const Eigen::array<int32, 5> broadcast = {1, 32, 32, 16, 1}; 221 222 Tensor cdf{DT_INT32, {1, 32, 32, 16, kMaxValue + 2}}; 223 cdf.tensor<int32, 5>() = temp.tensor<int32, 5>().broadcast(broadcast); 224 225 TestEncodeAndDecode(kPrecision, data, cdf); 226 } 227 228 TEST_F(RangeCoderOpsTest, Broadcast1Axis) { 229 constexpr int kPrecision = 9; 230 constexpr int kDimensionSize = 1 << kPrecision; 231 constexpr int kMinMaxValue = 10; 232 constexpr int kMaxMaxValue = 64; 233 234 random::PhiloxRandom philox(random::New64(), random::New64()); 235 random::SimplePhilox gen(&philox); 236 Tensor data{DT_INT16, {1, kDimensionSize, kDimensionSize}}; 237 238 Tensor maxvalue{DT_INT16, {kDimensionSize}}; 239 PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue); 240 241 { 242 // Axis 1. 243 Tensor maxvalue1; 244 ASSERT_TRUE(maxvalue1.CopyFrom(maxvalue, {1, 1, kDimensionSize})); 245 246 Tensor cdf{DT_INT32, {1, 1, kDimensionSize, kMaxMaxValue + 2}}; 247 BuildCdf(&gen, &data, &cdf, maxvalue1); 248 TestEncodeAndDecode(kPrecision, data, cdf); 249 } 250 251 { 252 // Axis 2. 253 Tensor maxvalue2; 254 ASSERT_TRUE(maxvalue2.CopyFrom(maxvalue, {1, kDimensionSize, 1})); 255 256 Tensor cdf{DT_INT32, {1, kDimensionSize, 1, kMaxMaxValue + 2}}; 257 BuildCdf(&gen, &data, &cdf, maxvalue2); 258 TestEncodeAndDecode(kPrecision, data, cdf); 259 } 260 } 261 262 TEST_F(RangeCoderOpsTest, Broadcast2Axes) { 263 constexpr int kPrecision = 13; 264 constexpr int kDimensionSize1 = 1 << (kPrecision / 2); 265 constexpr int kDimensionSize2 = 1 << (kPrecision - kPrecision / 2); 266 constexpr int kMinMaxValue = 10; 267 constexpr int kMaxMaxValue = 64; 268 269 random::PhiloxRandom philox(random::New64(), random::New64()); 270 random::SimplePhilox gen(&philox); 271 Tensor maxvalue{DT_INT16, {2, 1, 1, 7}}; 272 PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue); 273 274 Tensor data{DT_INT16, {2, kDimensionSize1, kDimensionSize2, 7}}; 275 Tensor cdf{DT_INT32, {2, 1, 1, 7, kMaxMaxValue + 2}}; 276 BuildCdf(&gen, &data, &cdf, maxvalue); 277 TestEncodeAndDecode(kPrecision, data, cdf); 278 } 279 280 TEST_F(RangeCoderOpsTest, InvalidCdfShape) { 281 Tensor data{DT_INT16, {3, 3}}; 282 Tensor cdf{DT_INT32, {3, 3}}; 283 284 Tensor unused; 285 { 286 const Status status = RunEncodeOp(10, {data, cdf}, &unused); 287 EXPECT_FALSE(status.ok()); 288 EXPECT_NE(status.error_message().find("`cdf` should have one more axis"), 289 string::npos); 290 } 291 292 Tensor empty{DT_STRING, {}}; 293 Tensor shape{DT_INT32, {2}}; 294 shape.vec<int32>().setValues({3, 3}); 295 { 296 const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused); 297 EXPECT_FALSE(status.ok()); 298 EXPECT_NE(status.error_message().find("`cdf` should have one more axis"), 299 string::npos); 300 } 301 302 cdf = Tensor{DT_INT32, {3, 3, 1}}; 303 { 304 const Status status = RunEncodeOp(10, {data, cdf}, &unused); 305 EXPECT_FALSE(status.ok()); 306 EXPECT_NE( 307 status.error_message().find("last dimension of `cdf` should be > 1"), 308 string::npos); 309 } 310 { 311 const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused); 312 EXPECT_FALSE(status.ok()); 313 EXPECT_NE( 314 status.error_message().find("last dimension of `cdf` should be > 1"), 315 string::npos); 316 } 317 } 318 319 TEST_F(RangeCoderOpsTest, DecoderShapeFn) { 320 Tensor encoded_tensor{DT_STRING, {}}; 321 Tensor shape_tensor{DT_INT32, {3}}; 322 Tensor cdf_tensor{DT_INT32, {4, 6, 8, 2}}; 323 324 shape_tensor.flat<int32>().setValues({4, 6, 8}); 325 326 Graph g{OpRegistry::Global()}; 327 Node* encoded = test::graph::Constant(&g, encoded_tensor); 328 Node* shape = test::graph::Constant(&g, shape_tensor); 329 Node* cdf = test::graph::Constant(&g, cdf_tensor); 330 Node* decode; 331 TF_ASSERT_OK(NodeBuilder("range_decode", "RangeDecode", g.op_registry()) 332 .Input(encoded) 333 .Input(shape) 334 .Input(cdf) 335 .Attr("precision", 10) 336 .Finalize(&g, &decode)); 337 338 ShapeRefiner refiner{g.versions().producer(), g.op_registry()}; 339 TF_ASSERT_OK(refiner.AddNode(encoded)); 340 TF_ASSERT_OK(refiner.AddNode(shape)); 341 TF_ASSERT_OK(refiner.AddNode(cdf)); 342 TF_ASSERT_OK(refiner.AddNode(decode)); 343 344 auto* context = refiner.GetContext(decode); 345 ASSERT_NE(context, nullptr); 346 347 ASSERT_EQ(context->num_outputs(), 1); 348 auto shape_handle = context->output(0); 349 350 ASSERT_EQ(context->Rank(shape_handle), 3); 351 EXPECT_EQ(context->Value(context->Dim(shape_handle, 0)), 4); 352 EXPECT_EQ(context->Value(context->Dim(shape_handle, 1)), 6); 353 EXPECT_EQ(context->Value(context->Dim(shape_handle, 2)), 8); 354 } 355 356 TEST_F(RangeCoderOpsTest, InvalidBroadcast) { 357 Tensor data{DT_INT16, {3, 3}}; 358 Tensor cdf{DT_INT32, {3, 2, 2}}; 359 360 Tensor unused; 361 { 362 const Status status = RunEncodeOp(10, {data, cdf}, &unused); 363 EXPECT_FALSE(status.ok()); 364 EXPECT_NE(status.error_message().find("Cannot broadcast shape"), 365 string::npos); 366 } 367 368 data = Tensor{DT_INT16, {3, 1}}; 369 cdf = Tensor{DT_INT32, {3, 3, 2}}; 370 Tensor empty{DT_STRING, {}}; 371 Tensor shape{DT_INT32, {2}}; 372 shape.vec<int32>().setValues({3, 1}); 373 { 374 const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused); 375 EXPECT_FALSE(status.ok()); 376 EXPECT_NE(status.error_message().find("Cannot broadcast shape"), 377 string::npos); 378 } 379 380 std::vector<int64> shape_vector = {2, 2, 2, 2, 2, 2, 2, 2, 2}; 381 data = Tensor{DT_INT16, TensorShape{shape_vector}}; 382 cdf = Tensor{DT_INT32, {2, 1, 2, 1, 2, 1, 2, 1, 2, 2}}; 383 { 384 const Status status = RunEncodeOp(10, {data, cdf}, &unused); 385 EXPECT_FALSE(status.ok()); 386 EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos); 387 } 388 389 shape = Tensor{DT_INT32, {static_cast<int64>(shape_vector.size())}}; 390 for (int i = 0; i < shape_vector.size(); ++i) { 391 shape.flat<int32>()(i) = shape_vector[i]; 392 } 393 { 394 const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused); 395 EXPECT_FALSE(status.ok()); 396 EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos); 397 } 398 } 399 400 // Benchmark ------------------------------------------------------------- 401 402 // This function creates RangeEncode graph with CDF built from a separate data 403 // sample. 404 Graph* CreateRangeEncodeFullBroadcastGraph(const TensorShape& shape, 405 int precision) { 406 CHECK_EQ(shape.dims(), 4); 407 408 constexpr int kAlphabetSize = 70; 409 410 Tensor histogram{DT_INT32, {kAlphabetSize + 1}}; 411 TTypes<int32>::Vec h = histogram.vec<int32>(); 412 h.setConstant(1); 413 h(0) = 0; 414 415 random::PhiloxRandom philox(random::New64(), random::New64()); 416 random::SimplePhilox gen(&philox); 417 for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) { 418 const int value = LogUniform(&gen, kAlphabetSize - 1); 419 h(value + 1) += 1; 420 } 421 422 Tensor cdf{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}}; 423 cdf.flat<int32>() = h.cumsum(0); 424 425 Tensor data{DT_INT16, shape}; 426 TTypes<int16>::Flat d = data.flat<int16>(); 427 for (int64 i = 0; i < d.size(); ++i) { 428 d(i) = LogUniform(&gen, kAlphabetSize - 1); 429 } 430 431 Graph* g = new Graph(OpRegistry::Global()); 432 TF_CHECK_OK(NodeBuilder("range_encode", "RangeEncode", g->op_registry()) 433 .Input(test::graph::Constant(g, data)) 434 .Input(test::graph::Constant(g, cdf)) 435 .Attr("precision", precision) 436 .Finalize(g, nullptr)); 437 return g; 438 } 439 440 // This function creates RangeDecode graph with CDF built from a separate data 441 // sample. 442 Graph* CreateRangeDecodeFullBroadcastGraph(const TensorShape& shape, 443 int precision) { 444 CHECK_EQ(shape.dims(), 4); 445 446 constexpr int kAlphabetSize = 200; 447 const int64 num_elements = shape.num_elements(); 448 449 Tensor histogram{DT_INT32, {kAlphabetSize + 1}}; 450 TTypes<int32>::Vec h = histogram.vec<int32>(); 451 h.setConstant(1); 452 h(0) = 0; 453 454 random::PhiloxRandom philox(random::New64(), random::New64()); 455 random::SimplePhilox gen(&philox); 456 for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) { 457 const int value = LogUniform(&gen, kAlphabetSize - 1); 458 h(value + 1) += 1; 459 } 460 461 Tensor cdf_tensor{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}}; 462 TTypes<int32>::Flat cdf = cdf_tensor.flat<int32>(); 463 cdf = h.cumsum(0); 464 465 Tensor string_tensor{DT_STRING, TensorShape{}}; 466 string& sink = string_tensor.scalar<string>()(); 467 468 RangeEncoder encoder{precision}; 469 for (int64 i = 0; i < num_elements; ++i) { 470 const int value = LogUniform(&gen, kAlphabetSize - 1); 471 encoder.Encode(cdf(value), cdf(value + 1), &sink); 472 } 473 encoder.Finalize(&sink); 474 475 Tensor shape_tensor{DT_INT32, {shape.dims()}}; 476 for (int i = 0; i < shape.dims(); ++i) { 477 shape_tensor.flat<int32>()(i) = shape.dim_size(i); 478 } 479 480 Graph* g = new Graph(OpRegistry::Global()); 481 TF_CHECK_OK(NodeBuilder("range_decode", "RangeDecode", g->op_registry()) 482 .Input(test::graph::Constant(g, string_tensor)) 483 .Input(test::graph::Constant(g, shape_tensor)) 484 .Input(test::graph::Constant(g, cdf_tensor)) 485 .Attr("precision", precision) 486 .Finalize(g, nullptr)); 487 return g; 488 } 489 490 void RunTensorFlowBenchmark(int iters, Graph* g, int64 num_elements) { 491 SessionOptions opts; 492 opts.config.set_intra_op_parallelism_threads(1); 493 opts.config.set_inter_op_parallelism_threads(1); 494 495 testing::UseRealTime(); 496 test::Benchmark("cpu", g, &opts).Run(iters); 497 498 const int64 num_items = static_cast<int64>(iters) * num_elements; 499 testing::ItemsProcessed(num_items); 500 } 501 502 void BM_RangeEncodeFullBroadcast(int iters, int code_size) { 503 constexpr int kPrecision = 14; 504 const TensorShape shape = {1, code_size, code_size, 256}; 505 Graph* g = CreateRangeEncodeFullBroadcastGraph(shape, kPrecision); 506 RunTensorFlowBenchmark(iters, g, shape.num_elements()); 507 } 508 509 BENCHMARK(BM_RangeEncodeFullBroadcast)->Arg(32)->Arg(64); 510 511 void BM_RangeDecodeFullBroadcast(int iters, int code_size) { 512 constexpr int kPrecision = 14; 513 const TensorShape shape = {1, code_size, code_size, 256}; 514 Graph* g = CreateRangeDecodeFullBroadcastGraph(shape, kPrecision); 515 RunTensorFlowBenchmark(iters, g, shape.num_elements()); 516 } 517 518 BENCHMARK(BM_RangeDecodeFullBroadcast)->Arg(32)->Arg(64); 519 520 } // namespace 521 } // namespace tensorflow 522