Home | History | Annotate | Download | only in util
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/util/tensor_slice_writer.h"
     17 
     18 #include <array>
     19 
     20 #include "tensorflow/core/framework/tensor_shape.pb.h"
     21 #include "tensorflow/core/framework/versions.pb.h"
     22 #include "tensorflow/core/lib/core/status_test_util.h"
     23 #include "tensorflow/core/lib/core/stringpiece.h"
     24 #include "tensorflow/core/lib/io/path.h"
     25 #include "tensorflow/core/platform/logging.h"
     26 #include "tensorflow/core/platform/protobuf.h"
     27 #include "tensorflow/core/platform/test.h"
     28 #include "tensorflow/core/public/version.h"
     29 #include "tensorflow/core/util/saved_tensor_slice_util.h"
     30 #include "tensorflow/core/util/tensor_slice_reader.h"
     31 
     32 namespace tensorflow {
     33 
     34 namespace checkpoint {
     35 
     36 class TensorSliceWriteTestHelper {
     37  public:
     38   static void CheckEntries(const string& fname);
     39   static void GetData(TensorSliceReader::Table* table, const string& name,
     40                       const TensorSlice& slice, SavedSlice* ss);
     41 };
     42 
     43 namespace {
     44 
     45 // Testing that an array is what is expected
     46 void ExpectIdenticalFloatArrays(const float* expected, int size,
     47                                 const float* actual) {
     48   // TODO(yangke): copy some of the Dump* functions over
     49   //  LOG(INFO) << "Expected = " << DumpFloatArray(expected, size);
     50   //  LOG(INFO) << "Actual   = " << DumpFloatArray(actual, size);
     51   for (int i = 0; i < size; ++i) {
     52     EXPECT_NEAR(expected[i], actual[i], 1e-6);
     53   }
     54 }
     55 
     56 template <typename T, typename U>
     57 void ExpectIdenticalIntArrays(const T* expected, int size, const U* actual) {
     58   for (int i = 0; i < size; ++i) {
     59     EXPECT_EQ(expected[i], static_cast<T>(actual[i]));
     60   }
     61 }
     62 
     63 // Nifty routine to get the size of an array
     64 template <typename T, unsigned SIZE>
     65 inline size_t ArraySize(const T (&v)[SIZE]) {
     66   return SIZE;
     67 }
     68 
     69 // A simple test on writing a few tensor slices
     70 // TODO(yangke): refactor into smaller tests: will do as we add more stuff to
     71 // the writer.
     72 TEST(TensorSliceWriteTest, SimpleWrite) {
     73   const string filename = io::JoinPath(testing::TmpDir(), "checkpoint");
     74 
     75   TensorSliceWriter writer(filename, CreateTableTensorSliceBuilder);
     76 
     77   // Add some int32 tensor slices
     78   {
     79     TensorShape shape({5, 10});
     80     TensorSlice slice = TensorSlice::ParseOrDie("-:0,1");
     81     const int32 data[] = {0, 1, 2, 3, 4};
     82     TF_CHECK_OK(writer.Add("test", shape, slice, data));
     83   }
     84 
     85   // Two slices share the same tensor name
     86   {
     87     TensorShape shape({5, 10});
     88     TensorSlice slice = TensorSlice::ParseOrDie("-:3,1");
     89     const int32 data[] = {10, 11, 12, 13, 14};
     90     TF_CHECK_OK(writer.Add("test", shape, slice, data));
     91   }
     92 
     93   // Another slice from a different float tensor -- it has a different name and
     94   // should be inserted in front of the previous tensor
     95   {
     96     TensorShape shape({3, 2});
     97     TensorSlice slice = TensorSlice::ParseOrDie("-:-");
     98     const float data[] = {1.2, 1.3, 1.4, 2.1, 2.2, 2.3};
     99     TF_CHECK_OK(writer.Add("AA", shape, slice, data));
    100   }
    101 
    102   // A slice with int64 data
    103   {
    104     TensorShape shape({5, 10});
    105     TensorSlice slice = TensorSlice::ParseOrDie("-:3,1");
    106     const int64 data[] = {10, 11, 12, 13, 14};
    107     TF_CHECK_OK(writer.Add("int64", shape, slice, data));
    108   }
    109 
    110   // A slice with int16 data
    111   {
    112     TensorShape shape({5, 10});
    113     TensorSlice slice = TensorSlice::ParseOrDie("-:3,1");
    114     const int16 data[] = {10, 11, 12, 13, 14};
    115     TF_CHECK_OK(writer.Add("int16", shape, slice, data));
    116   }
    117 
    118   TF_CHECK_OK(writer.Finish());
    119 
    120   // Now we examine the checkpoint file manually.
    121   TensorSliceWriteTestHelper::CheckEntries(filename);
    122 }
    123 
    124 }  // namespace
    125 
    126 void TensorSliceWriteTestHelper::GetData(TensorSliceReader::Table* table,
    127                                          const string& name,
    128                                          const TensorSlice& slice,
    129                                          SavedSlice* ss) {
    130   string key = EncodeTensorNameSlice(name, slice);
    131   string value;
    132   EXPECT_TRUE(table->Get(key, &value));
    133   SavedTensorSlices sts;
    134   EXPECT_TRUE(ParseProtoUnlimited(&sts, value));
    135   EXPECT_FALSE(sts.has_meta());
    136   *ss = sts.data();
    137   EXPECT_EQ(name, ss->name());
    138   TensorSlice slice2(ss->slice());
    139   EXPECT_EQ(slice.DebugString(), slice2.DebugString());
    140 }
    141 
    142 void TensorSliceWriteTestHelper::CheckEntries(const string& fname) {
    143   TensorSliceReader::Table* tptr;
    144   TF_CHECK_OK(OpenTableTensorSliceReader(fname, &tptr));
    145   std::unique_ptr<TensorSliceReader::Table> table(tptr);
    146   CHECK_NOTNULL(table.get());
    147 
    148   // We expect a block of SavedTensorSlices
    149   string value;
    150   ASSERT_TRUE(table->Get(kSavedTensorSlicesKey, &value));
    151   {
    152     SavedTensorSlices sts;
    153     EXPECT_TRUE(ParseProtoUnlimited(&sts, value));
    154     // We also expect two entries for the tensors
    155     EXPECT_TRUE(sts.has_meta());
    156     EXPECT_EQ(4, sts.meta().tensor_size());
    157     // We should have written nontrivial version information
    158     EXPECT_LT(0, TF_CHECKPOINT_VERSION);
    159     EXPECT_EQ(TF_CHECKPOINT_VERSION, sts.meta().versions().producer());
    160     EXPECT_EQ(TF_CHECKPOINT_VERSION_MIN_CONSUMER,
    161               sts.meta().versions().min_consumer());
    162     // We don't expect any data in the first block.
    163     EXPECT_FALSE(sts.has_data());
    164     // The two tensors should be stored in the same order as they are first
    165     // created.
    166     {
    167       // The two slices of the "test" tensor
    168       const SavedSliceMeta& ssm = sts.meta().tensor(0);
    169       EXPECT_EQ("test", ssm.name());
    170       EXPECT_EQ(
    171           "dim { size: 5 } "
    172           "dim { size: 10 }",
    173           ssm.shape().ShortDebugString());
    174       EXPECT_EQ(DT_INT32, ssm.type());
    175       EXPECT_EQ(2, ssm.slice_size());
    176       TensorSlice s0(ssm.slice(0));
    177       TensorSlice s1(ssm.slice(1));
    178       EXPECT_EQ("-:0,1", s0.DebugString());
    179       EXPECT_EQ("-:3,1", s1.DebugString());
    180     }
    181     {
    182       // The "AA" tensor
    183       const SavedSliceMeta& ssm = sts.meta().tensor(1);
    184       EXPECT_EQ("AA", ssm.name());
    185       EXPECT_EQ(
    186           "dim { size: 3 } "
    187           "dim { size: 2 }",
    188           ssm.shape().ShortDebugString());
    189       EXPECT_EQ(DT_FLOAT, ssm.type());
    190       EXPECT_EQ(1, ssm.slice_size());
    191       TensorSlice s0(ssm.slice(0));
    192       EXPECT_EQ("-:-", s0.DebugString());
    193     }
    194     {
    195       // The "int64" tensor
    196       const SavedSliceMeta& ssm = sts.meta().tensor(2);
    197       EXPECT_EQ("int64", ssm.name());
    198       EXPECT_EQ(
    199           "dim { size: 5 } "
    200           "dim { size: 10 }",
    201           ssm.shape().ShortDebugString());
    202       EXPECT_EQ(DT_INT64, ssm.type());
    203       EXPECT_EQ(1, ssm.slice_size());
    204       TensorSlice s0(ssm.slice(0));
    205       EXPECT_EQ("-:3,1", s0.DebugString());
    206     }
    207     {
    208       // The "int16" tensor
    209       const SavedSliceMeta& ssm = sts.meta().tensor(3);
    210       EXPECT_EQ("int16", ssm.name());
    211       EXPECT_EQ(
    212           "dim { size: 5 } "
    213           "dim { size: 10 }",
    214           ssm.shape().ShortDebugString());
    215       EXPECT_EQ(DT_INT16, ssm.type());
    216       EXPECT_EQ(1, ssm.slice_size());
    217       TensorSlice s0(ssm.slice(0));
    218       EXPECT_EQ("-:3,1", s0.DebugString());
    219     }
    220   }
    221 
    222   // We expect 5 blocks of tensor data
    223   {
    224     // Block 1: we expect it to be the full slice of the "AA" tensor
    225     SavedSlice ss;
    226     GetData(table.get(), "AA", TensorSlice(2), &ss);
    227     const float data[] = {1.2, 1.3, 1.4, 2.1, 2.2, 2.3};
    228     EXPECT_EQ(ArraySize(data), ss.data().float_val_size());
    229     ExpectIdenticalFloatArrays(data, ArraySize(data),
    230                                ss.data().float_val().data());
    231   }
    232 
    233   {
    234     // Block 2: we expect it to be the first slice of the "test" tensor
    235     SavedSlice ss;
    236     GetData(table.get(), "test", TensorSlice({{0, -1}, {0, 1}}), &ss);
    237     const int32 data[] = {0, 1, 2, 3, 4};
    238     EXPECT_EQ(ArraySize(data), ss.data().int_val_size());
    239     ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data());
    240   }
    241 
    242   {
    243     // Block 3: we expect it to be the second slice of the "test" tensor
    244     SavedSlice ss;
    245     GetData(table.get(), "test", TensorSlice({{0, -1}, {3, 1}}), &ss);
    246     const int32 data[] = {10, 11, 12, 13, 14};
    247     EXPECT_EQ(ArraySize(data), ss.data().int_val_size());
    248     ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data());
    249   }
    250 
    251   {
    252     // Block 4: we expect it to be the slice of the "int64" tensor
    253     SavedSlice ss;
    254     GetData(table.get(), "int64", TensorSlice({{0, -1}, {3, 1}}), &ss);
    255     const int64 data[] = {10, 11, 12, 13, 14};
    256     EXPECT_EQ(ArraySize(data), ss.data().int64_val_size());
    257     ExpectIdenticalIntArrays(data, ArraySize(data),
    258                              ss.data().int64_val().data());
    259   }
    260 
    261   {
    262     // Block 5: we expect it to be the slice of the "int16" tensor
    263     SavedSlice ss;
    264     GetData(table.get(), "int16", TensorSlice({{0, -1}, {3, 1}}), &ss);
    265     const int16 data[] = {10, 11, 12, 13, 14};
    266     EXPECT_EQ(ArraySize(data), ss.data().int_val_size());
    267     ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data());
    268   }
    269 }
    270 
    271 template <typename DT>
    272 size_t BytesPerElementHelper(DT value) {
    273   SavedSlice ss;
    274   std::array<DT, 1> lo_data;
    275   std::fill(lo_data.begin(), lo_data.end(), value);
    276   TF_EXPECT_OK(
    277       TensorSliceWriter::SaveData(lo_data.data(), lo_data.size(), &ss));
    278   size_t lo_byte_size = ss.ByteSizeLong();
    279 
    280   std::array<DT, 1001> hi_data;
    281   std::fill(hi_data.begin(), hi_data.end(), value);
    282   TF_EXPECT_OK(
    283       TensorSliceWriter::SaveData(hi_data.data(), hi_data.size(), &ss));
    284   size_t hi_byte_size = ss.ByteSizeLong();
    285 
    286   return (hi_byte_size - lo_byte_size) / (hi_data.size() - lo_data.size());
    287 }
    288 
    289 TEST(TensorSliceWriteTest, CheckpointSize) {
    290   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_BOOL),
    291             BytesPerElementHelper<bool>(false));
    292   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_BOOL),
    293             BytesPerElementHelper<bool>(true));
    294   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_FLOAT),
    295             BytesPerElementHelper<float>(-1.0));
    296   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_DOUBLE),
    297             BytesPerElementHelper<double>(-1.0));
    298   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_COMPLEX64),
    299             BytesPerElementHelper<complex64>(-1.0));
    300   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_COMPLEX128),
    301             BytesPerElementHelper<complex128>(-1.0));
    302   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT32),
    303             BytesPerElementHelper<int32>(-1));
    304   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT64),
    305             BytesPerElementHelper<int64>(-1));
    306   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_UINT16),
    307             BytesPerElementHelper<uint16>(std::numeric_limits<uint16>::max()));
    308   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_UINT8),
    309             BytesPerElementHelper<uint8>(std::numeric_limits<uint8>::max()));
    310   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT8),
    311             BytesPerElementHelper<int8>(-1));
    312   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT16),
    313             BytesPerElementHelper<int16>(-1));
    314   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QINT8),
    315             BytesPerElementHelper<qint8>(-1));
    316   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QUINT8),
    317             BytesPerElementHelper<quint8>(std::numeric_limits<uint8>::max()));
    318   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QINT32),
    319             BytesPerElementHelper<qint32>(-1));
    320   EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_HALF),
    321             BytesPerElementHelper<Eigen::half>(Eigen::half(-1.0)));
    322 }
    323 
    324 TEST(TensorSliceWriteTest, SizeErrors) {
    325   const string filename = io::JoinPath(testing::TmpDir(), "checkpoint");
    326 
    327   TensorSliceWriter writer(filename, CreateTableTensorSliceBuilder);
    328 
    329   // Add a 300MB int8 tensor slice, which will fail because it expands to 3GB.
    330   {
    331     TensorShape shape({300, 1000000});
    332     TensorSlice slice = TensorSlice::ParseOrDie("-:-");
    333     const std::vector<int8> data(300000000, -1);
    334     Status s = writer.Add("test1", shape, slice, data.data());
    335     EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
    336     EXPECT_TRUE(StringPiece(s.error_message())
    337                     .contains("Tensor slice is too large to serialize"));
    338   }
    339 
    340   // Add a large string tensor slice, which will fail.
    341   {
    342     TensorShape shape({256, 1024});
    343     TensorSlice slice = TensorSlice::ParseOrDie("-:-");
    344     const std::vector<string> data(256 * 1024, std::string(8192, 'f'));
    345     Status s = writer.Add("test2", shape, slice, data.data());
    346     EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
    347     EXPECT_TRUE(StringPiece(s.error_message())
    348                     .contains("Tensor slice is too large to serialize"));
    349   }
    350 }
    351 
    352 }  // namespace checkpoint
    353 
    354 }  // namespace tensorflow
    355