1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include <memory> 17 #include <vector> 18 19 #include "tensorflow/contrib/coder/kernels/range_coder.h" 20 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" 21 #include "tensorflow/core/common_runtime/shape_refiner.h" 22 #include "tensorflow/core/framework/fake_input.h" 23 #include "tensorflow/core/framework/node_def.pb.h" 24 #include "tensorflow/core/framework/node_def_builder.h" 25 #include "tensorflow/core/framework/op.h" 26 #include "tensorflow/core/framework/tensor_types.h" 27 #include "tensorflow/core/framework/types.pb.h" 28 #include "tensorflow/core/framework/versions.pb.h" 29 #include "tensorflow/core/graph/graph.h" 30 #include "tensorflow/core/graph/node_builder.h" 31 #include "tensorflow/core/graph/testlib.h" 32 #include "tensorflow/core/kernels/ops_testutil.h" 33 #include "tensorflow/core/lib/core/bits.h" 34 #include "tensorflow/core/lib/core/status_test_util.h" 35 #include "tensorflow/core/lib/gtl/array_slice.h" 36 #include "tensorflow/core/lib/random/random.h" 37 #include "tensorflow/core/lib/random/simple_philox.h" 38 #include "tensorflow/core/platform/test.h" 39 #include "tensorflow/core/platform/test_benchmark.h" 40 #include "tensorflow/core/public/session.h" 41 #include "tensorflow/core/public/session_options.h" 42 43 namespace tensorflow { 44 namespace { 45 int LogUniform(random::SimplePhilox* gen, uint32 n) { 46 CHECK_GT(n, 0); 47 48 // Split [0, n) into {0}, [1, 2), [2, 4), [4, 8), ..., [2^(m-1), n). 49 const int m = Log2Ceiling(n); 50 51 int outcome; 52 do { 53 // Uniform() consumes at least 32 bits per call, therefore this is somewhat 54 // wasteful implementation. Since this is used only for test, we do not 55 // refine this implementation further. 56 const int k = gen->Uniform(m + 1) - 1; 57 // If k == -1, then sample from {0}. 58 // If k == 0, then sample from [1, 2). 59 // If k == 1, then sample from [2, 4), ... and so on. 60 if (k < 1) { 61 outcome = k + 1; 62 } else { 63 outcome = (1 << k) + gen->Uniform(1 << k); 64 } 65 } while (n <= outcome); 66 return outcome; 67 } 68 69 std::vector<int64> ComputeStrides(const TensorShape& shape) { 70 std::vector<int64> stride(shape.dims()); 71 int64 current = 1; 72 for (int i = shape.dims() - 1; i >= 0; --i) { 73 stride[i] = current; 74 current *= shape.dim_size(i); 75 } 76 return stride; 77 } 78 79 class RangeCoderOpsTest : public OpsTestBase { 80 protected: 81 Status RunEncodeOp(int precision, gtl::ArraySlice<Tensor> input, 82 Tensor* output) { 83 TF_RETURN_IF_ERROR(NodeDefBuilder("encode", "RangeEncode") 84 .Input(tensorflow::FakeInput(DT_INT16)) 85 .Input(tensorflow::FakeInput(DT_INT32)) 86 .Attr("precision", precision) 87 .Finalize(node_def())); 88 TF_RETURN_IF_ERROR(InitOp()); 89 90 inputs_.clear(); 91 std::vector<Tensor> copies(input.size()); 92 for (int i = 0; i < input.size(); ++i) { 93 copies[i] = input[i]; 94 inputs_.emplace_back(&copies[i]); 95 } 96 97 TF_RETURN_IF_ERROR(RunOpKernel()); 98 99 *output = *GetOutput(0); 100 inputs_.clear(); 101 102 return Status::OK(); 103 } 104 105 Status RunDecodeOp(int precision, gtl::ArraySlice<Tensor> input, 106 Tensor* output) { 107 TF_RETURN_IF_ERROR(NodeDefBuilder("decode", "RangeDecode") 108 .Input(tensorflow::FakeInput(DT_STRING)) 109 .Input(tensorflow::FakeInput(DT_INT32)) 110 .Input(tensorflow::FakeInput(DT_INT32)) 111 .Attr("precision", precision) 112 .Finalize(node_def())); 113 TF_RETURN_IF_ERROR(InitOp()); 114 115 inputs_.clear(); 116 std::vector<Tensor> copies(input.size()); 117 for (int i = 0; i < input.size(); ++i) { 118 copies[i] = input[i]; 119 inputs_.emplace_back(&copies[i]); 120 } 121 122 TF_RETURN_IF_ERROR(RunOpKernel()); 123 124 *output = *GetOutput(0); 125 inputs_.clear(); 126 127 return Status::OK(); 128 } 129 130 void TestEncodeAndDecode(int precision, const Tensor& data, 131 const Tensor& cdf) { 132 Tensor encoded; 133 TF_ASSERT_OK(RunEncodeOp(precision, {data, cdf}, &encoded)); 134 135 const TensorShape& data_shape = data.shape(); 136 Tensor shape{DT_INT32, {data_shape.dims()}}; 137 for (int i = 0; i < data_shape.dims(); ++i) { 138 shape.flat<int32>()(i) = data_shape.dim_size(i); 139 } 140 141 Tensor decoded; 142 TF_ASSERT_OK(RunDecodeOp(precision, {encoded, shape, cdf}, &decoded)); 143 144 EXPECT_EQ(decoded.dtype(), data.dtype()); 145 EXPECT_EQ(decoded.shape(), data.shape()); 146 EXPECT_EQ(decoded.tensor_data(), data.tensor_data()); 147 } 148 149 void PopulateMaxValues(random::SimplePhilox* gen, Tensor* maxvalue_tensor, 150 int min_maxvalue, int max_maxvalue) { 151 const int range = max_maxvalue - min_maxvalue; 152 TTypes<int16>::Flat flat = maxvalue_tensor->flat<int16>(); 153 154 for (int64 i = 0; i < flat.size(); ++i) { 155 flat(i) = min_maxvalue + gen->Uniform(range); 156 } 157 } 158 159 void BuildCdf(random::SimplePhilox* gen, Tensor* data_tensor, 160 Tensor* cdf_tensor, const Tensor& maxvalue_tensor) { 161 CHECK(TensorShapeUtils::StartsWith(cdf_tensor->shape(), 162 maxvalue_tensor.shape())); 163 CHECK_EQ(cdf_tensor->dims(), maxvalue_tensor.dims() + 1); 164 const int64 chip_size = cdf_tensor->dim_size(cdf_tensor->dims() - 1); 165 166 std::vector<int64> data_stride = ComputeStrides(data_tensor->shape()); 167 std::vector<int64> cdf_stride = ComputeStrides(cdf_tensor->shape()); 168 169 for (int i = 0; i < cdf_tensor->dims(); ++i) { 170 if (cdf_tensor->dim_size(i) == 1) { 171 cdf_stride[i] = 0; 172 } 173 } 174 175 Tensor histogram_tensor{DT_INT32, cdf_tensor->shape()}; 176 TTypes<int16>::Flat data = data_tensor->flat<int16>(); 177 TTypes<int32>::Flat histogram = histogram_tensor.flat<int32>(); 178 TTypes<int16>::ConstFlat maxvalue = maxvalue_tensor.flat<int16>(); 179 histogram.setZero(); 180 181 for (int64 index = 0; index < data.size(); ++index) { 182 int64 temp = index; 183 int64 offset = 0; 184 for (int dim = 0; dim < data_stride.size(); ++dim) { 185 const int64 coord = temp / data_stride[dim]; 186 offset += coord * cdf_stride[dim]; 187 temp -= coord * data_stride[dim]; 188 } 189 ASSERT_EQ(temp, 0); 190 191 const int64 maxvalue_offset = offset / chip_size; 192 CHECK_EQ(maxvalue_offset * chip_size, offset); 193 CHECK_LT(maxvalue(maxvalue_offset) + 1, chip_size); 194 const int value = LogUniform(gen, maxvalue(maxvalue_offset)); 195 data(index) = value; 196 histogram(offset + value + 1) += 1; 197 } 198 199 cdf_tensor->flat_inner_dims<int32, 2>() = 200 histogram_tensor.flat_inner_dims<int32, 2>().cumsum(1); 201 } 202 }; 203 204 TEST_F(RangeCoderOpsTest, NoBroadcast) { 205 constexpr int kPrecision = 14; 206 constexpr int kMaxValue = 10; 207 208 Tensor data{DT_INT16, {1, 32, 32, 16}}; 209 Tensor temp{DT_INT32, {1, 1, 1, 1, kMaxValue + 2}}; 210 Tensor maxvalue{DT_INT16, {1, 1, 1, 1}}; 211 maxvalue.flat<int16>()(0) = kMaxValue; 212 213 ASSERT_LE(data.shape().num_elements(), 1 << kPrecision); 214 215 random::PhiloxRandom philox(random::New64(), random::New64()); 216 random::SimplePhilox gen(&philox); 217 BuildCdf(&gen, &data, &temp, maxvalue); 218 219 const Eigen::array<int32, 5> broadcast = {1, 32, 32, 16, 1}; 220 221 Tensor cdf{DT_INT32, {1, 32, 32, 16, kMaxValue + 2}}; 222 cdf.tensor<int32, 5>() = temp.tensor<int32, 5>().broadcast(broadcast); 223 224 TestEncodeAndDecode(kPrecision, data, cdf); 225 } 226 227 TEST_F(RangeCoderOpsTest, Broadcast1Axis) { 228 constexpr int kPrecision = 9; 229 constexpr int kDimensionSize = 1 << kPrecision; 230 constexpr int kMinMaxValue = 10; 231 constexpr int kMaxMaxValue = 64; 232 233 random::PhiloxRandom philox(random::New64(), random::New64()); 234 random::SimplePhilox gen(&philox); 235 Tensor data{DT_INT16, {1, kDimensionSize, kDimensionSize}}; 236 237 Tensor maxvalue{DT_INT16, {kDimensionSize}}; 238 PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue); 239 240 { 241 // Axis 1. 242 Tensor maxvalue1; 243 ASSERT_TRUE(maxvalue1.CopyFrom(maxvalue, {1, 1, kDimensionSize})); 244 245 Tensor cdf{DT_INT32, {1, 1, kDimensionSize, kMaxMaxValue + 2}}; 246 BuildCdf(&gen, &data, &cdf, maxvalue1); 247 TestEncodeAndDecode(kPrecision, data, cdf); 248 } 249 250 { 251 // Axis 2. 252 Tensor maxvalue2; 253 ASSERT_TRUE(maxvalue2.CopyFrom(maxvalue, {1, kDimensionSize, 1})); 254 255 Tensor cdf{DT_INT32, {1, kDimensionSize, 1, kMaxMaxValue + 2}}; 256 BuildCdf(&gen, &data, &cdf, maxvalue2); 257 TestEncodeAndDecode(kPrecision, data, cdf); 258 } 259 } 260 261 TEST_F(RangeCoderOpsTest, Broadcast2Axes) { 262 constexpr int kPrecision = 13; 263 constexpr int kDimensionSize1 = 1 << (kPrecision / 2); 264 constexpr int kDimensionSize2 = 1 << (kPrecision - kPrecision / 2); 265 constexpr int kMinMaxValue = 10; 266 constexpr int kMaxMaxValue = 64; 267 268 random::PhiloxRandom philox(random::New64(), random::New64()); 269 random::SimplePhilox gen(&philox); 270 Tensor maxvalue{DT_INT16, {2, 1, 1, 7}}; 271 PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue); 272 273 Tensor data{DT_INT16, {2, kDimensionSize1, kDimensionSize2, 7}}; 274 Tensor cdf{DT_INT32, {2, 1, 1, 7, kMaxMaxValue + 2}}; 275 BuildCdf(&gen, &data, &cdf, maxvalue); 276 TestEncodeAndDecode(kPrecision, data, cdf); 277 } 278 279 TEST_F(RangeCoderOpsTest, InvalidCdfShape) { 280 Tensor data{DT_INT16, {3, 3}}; 281 Tensor cdf{DT_INT32, {3, 3}}; 282 283 Tensor unused; 284 { 285 const Status status = RunEncodeOp(10, {data, cdf}, &unused); 286 EXPECT_FALSE(status.ok()); 287 EXPECT_NE(status.error_message().find("`cdf` should have one more axis"), 288 string::npos); 289 } 290 291 Tensor empty{DT_STRING, {}}; 292 Tensor shape{DT_INT32, {2}}; 293 shape.vec<int32>().setValues({3, 3}); 294 { 295 const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused); 296 EXPECT_FALSE(status.ok()); 297 EXPECT_NE(status.error_message().find("`cdf` should have one more axis"), 298 string::npos); 299 } 300 301 cdf = Tensor{DT_INT32, {3, 3, 1}}; 302 { 303 const Status status = RunEncodeOp(10, {data, cdf}, &unused); 304 EXPECT_FALSE(status.ok()); 305 EXPECT_NE( 306 status.error_message().find("last dimension of `cdf` should be > 1"), 307 string::npos); 308 } 309 { 310 const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused); 311 EXPECT_FALSE(status.ok()); 312 EXPECT_NE( 313 status.error_message().find("last dimension of `cdf` should be > 1"), 314 string::npos); 315 } 316 } 317 318 TEST_F(RangeCoderOpsTest, DecoderShapeFn) { 319 Tensor encoded_tensor{DT_STRING, {}}; 320 Tensor shape_tensor{DT_INT32, {3}}; 321 Tensor cdf_tensor{DT_INT32, {4, 6, 8, 2}}; 322 323 shape_tensor.flat<int32>().setValues({4, 6, 8}); 324 325 Graph g{OpRegistry::Global()}; 326 Node* encoded = test::graph::Constant(&g, encoded_tensor); 327 Node* shape = test::graph::Constant(&g, shape_tensor); 328 Node* cdf = test::graph::Constant(&g, cdf_tensor); 329 Node* decode; 330 TF_ASSERT_OK(NodeBuilder("range_decode", "RangeDecode", g.op_registry()) 331 .Input(encoded) 332 .Input(shape) 333 .Input(cdf) 334 .Attr("precision", 10) 335 .Finalize(&g, &decode)); 336 337 ShapeRefiner refiner{g.versions().producer(), g.op_registry()}; 338 TF_ASSERT_OK(refiner.AddNode(encoded)); 339 TF_ASSERT_OK(refiner.AddNode(shape)); 340 TF_ASSERT_OK(refiner.AddNode(cdf)); 341 TF_ASSERT_OK(refiner.AddNode(decode)); 342 343 auto* context = refiner.GetContext(decode); 344 ASSERT_NE(context, nullptr); 345 346 ASSERT_EQ(context->num_outputs(), 1); 347 auto shape_handle = context->output(0); 348 349 ASSERT_EQ(context->Rank(shape_handle), 3); 350 EXPECT_EQ(context->Value(context->Dim(shape_handle, 0)), 4); 351 EXPECT_EQ(context->Value(context->Dim(shape_handle, 1)), 6); 352 EXPECT_EQ(context->Value(context->Dim(shape_handle, 2)), 8); 353 } 354 355 TEST_F(RangeCoderOpsTest, InvalidBroadcast) { 356 Tensor data{DT_INT16, {3, 3}}; 357 Tensor cdf{DT_INT32, {3, 2, 2}}; 358 359 Tensor unused; 360 { 361 const Status status = RunEncodeOp(10, {data, cdf}, &unused); 362 EXPECT_FALSE(status.ok()); 363 EXPECT_NE(status.error_message().find("Cannot broadcast shape"), 364 string::npos); 365 } 366 367 data = Tensor{DT_INT16, {3, 1}}; 368 cdf = Tensor{DT_INT32, {3, 3, 2}}; 369 Tensor empty{DT_STRING, {}}; 370 Tensor shape{DT_INT32, {2}}; 371 shape.vec<int32>().setValues({3, 1}); 372 { 373 const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused); 374 EXPECT_FALSE(status.ok()); 375 EXPECT_NE(status.error_message().find("Cannot broadcast shape"), 376 string::npos); 377 } 378 379 std::vector<int64> shape_vector = {2, 2, 2, 2, 2, 2, 2, 2, 2}; 380 data = Tensor{DT_INT16, TensorShape{shape_vector}}; 381 cdf = Tensor{DT_INT32, {2, 1, 2, 1, 2, 1, 2, 1, 2, 2}}; 382 { 383 const Status status = RunEncodeOp(10, {data, cdf}, &unused); 384 EXPECT_FALSE(status.ok()); 385 EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos); 386 } 387 388 shape = Tensor{DT_INT32, {static_cast<int64>(shape_vector.size())}}; 389 for (int i = 0; i < shape_vector.size(); ++i) { 390 shape.flat<int32>()(i) = shape_vector[i]; 391 } 392 { 393 const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused); 394 EXPECT_FALSE(status.ok()); 395 EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos); 396 } 397 } 398 399 // Benchmark ------------------------------------------------------------- 400 401 // This function creates RangeEncode graph with CDF built from a separate data 402 // sample. 403 Graph* CreateRangeEncodeFullBroadcastGraph(const TensorShape& shape, 404 int precision) { 405 CHECK_EQ(shape.dims(), 4); 406 407 constexpr int kAlphabetSize = 70; 408 409 Tensor histogram{DT_INT32, {kAlphabetSize + 1}}; 410 TTypes<int32>::Vec h = histogram.vec<int32>(); 411 h.setConstant(1); 412 h(0) = 0; 413 414 random::PhiloxRandom philox(random::New64(), random::New64()); 415 random::SimplePhilox gen(&philox); 416 for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) { 417 const int value = LogUniform(&gen, kAlphabetSize - 1); 418 h(value + 1) += 1; 419 } 420 421 Tensor cdf{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}}; 422 cdf.flat<int32>() = h.cumsum(0); 423 424 Tensor data{DT_INT16, shape}; 425 TTypes<int16>::Flat d = data.flat<int16>(); 426 for (int64 i = 0; i < d.size(); ++i) { 427 d(i) = LogUniform(&gen, kAlphabetSize - 1); 428 } 429 430 Graph* g = new Graph(OpRegistry::Global()); 431 TF_CHECK_OK(NodeBuilder("range_encode", "RangeEncode", g->op_registry()) 432 .Input(test::graph::Constant(g, data)) 433 .Input(test::graph::Constant(g, cdf)) 434 .Attr("precision", precision) 435 .Finalize(g, nullptr)); 436 return g; 437 } 438 439 // This function creates RangeDecode graph with CDF built from a separate data 440 // sample. 441 Graph* CreateRangeDecodeFullBroadcastGraph(const TensorShape& shape, 442 int precision) { 443 CHECK_EQ(shape.dims(), 4); 444 445 constexpr int kAlphabetSize = 200; 446 const int64 num_elements = shape.num_elements(); 447 448 Tensor histogram{DT_INT32, {kAlphabetSize + 1}}; 449 TTypes<int32>::Vec h = histogram.vec<int32>(); 450 h.setConstant(1); 451 h(0) = 0; 452 453 random::PhiloxRandom philox(random::New64(), random::New64()); 454 random::SimplePhilox gen(&philox); 455 for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) { 456 const int value = LogUniform(&gen, kAlphabetSize - 1); 457 h(value + 1) += 1; 458 } 459 460 Tensor cdf_tensor{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}}; 461 TTypes<int32>::Flat cdf = cdf_tensor.flat<int32>(); 462 cdf = h.cumsum(0); 463 464 Tensor string_tensor{DT_STRING, TensorShape{}}; 465 string& sink = string_tensor.scalar<string>()(); 466 467 RangeEncoder encoder{precision}; 468 for (int64 i = 0; i < num_elements; ++i) { 469 const int value = LogUniform(&gen, kAlphabetSize - 1); 470 encoder.Encode(cdf(value), cdf(value + 1), &sink); 471 } 472 encoder.Finalize(&sink); 473 474 Tensor shape_tensor{DT_INT32, {shape.dims()}}; 475 for (int i = 0; i < shape.dims(); ++i) { 476 shape_tensor.flat<int32>()(i) = shape.dim_size(i); 477 } 478 479 Graph* g = new Graph(OpRegistry::Global()); 480 TF_CHECK_OK(NodeBuilder("range_decode", "RangeDecode", g->op_registry()) 481 .Input(test::graph::Constant(g, string_tensor)) 482 .Input(test::graph::Constant(g, shape_tensor)) 483 .Input(test::graph::Constant(g, cdf_tensor)) 484 .Attr("precision", precision) 485 .Finalize(g, nullptr)); 486 return g; 487 } 488 489 void RunTensorFlowBenchmark(int iters, Graph* g, int64 num_elements) { 490 SessionOptions opts; 491 opts.config.set_intra_op_parallelism_threads(1); 492 opts.config.set_inter_op_parallelism_threads(1); 493 494 testing::UseRealTime(); 495 test::Benchmark("cpu", g, &opts).Run(iters); 496 497 const int64 num_items = static_cast<int64>(iters) * num_elements; 498 testing::ItemsProcessed(num_items); 499 } 500 501 void BM_RangeEncodeFullBroadcast(int iters, int code_size) { 502 constexpr int kPrecision = 14; 503 const TensorShape shape = {1, code_size, code_size, 256}; 504 Graph* g = CreateRangeEncodeFullBroadcastGraph(shape, kPrecision); 505 RunTensorFlowBenchmark(iters, g, shape.num_elements()); 506 } 507 508 BENCHMARK(BM_RangeEncodeFullBroadcast)->Arg(32)->Arg(64); 509 510 void BM_RangeDecodeFullBroadcast(int iters, int code_size) { 511 constexpr int kPrecision = 14; 512 const TensorShape shape = {1, code_size, code_size, 256}; 513 Graph* g = CreateRangeDecodeFullBroadcastGraph(shape, kPrecision); 514 RunTensorFlowBenchmark(iters, g, shape.num_elements()); 515 } 516 517 BENCHMARK(BM_RangeDecodeFullBroadcast)->Arg(32)->Arg(64); 518 519 } // namespace 520 } // namespace tensorflow 521