1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/util/tensor_slice_writer.h" 17 18 #include <array> 19 20 #include "tensorflow/core/framework/tensor_shape.pb.h" 21 #include "tensorflow/core/framework/versions.pb.h" 22 #include "tensorflow/core/lib/core/status_test_util.h" 23 #include "tensorflow/core/lib/core/stringpiece.h" 24 #include "tensorflow/core/lib/io/path.h" 25 #include "tensorflow/core/platform/logging.h" 26 #include "tensorflow/core/platform/protobuf.h" 27 #include "tensorflow/core/platform/test.h" 28 #include "tensorflow/core/public/version.h" 29 #include "tensorflow/core/util/saved_tensor_slice_util.h" 30 #include "tensorflow/core/util/tensor_slice_reader.h" 31 32 namespace tensorflow { 33 34 namespace checkpoint { 35 36 class TensorSliceWriteTestHelper { 37 public: 38 static void CheckEntries(const string& fname); 39 static void GetData(TensorSliceReader::Table* table, const string& name, 40 const TensorSlice& slice, SavedSlice* ss); 41 }; 42 43 namespace { 44 45 // Testing that an array is what is expected 46 void ExpectIdenticalFloatArrays(const float* expected, int size, 47 const float* actual) { 48 // TODO(yangke): copy some of the Dump* functions over 49 // LOG(INFO) << "Expected = " << DumpFloatArray(expected, size); 50 // LOG(INFO) << "Actual = " << DumpFloatArray(actual, size); 51 for (int i = 0; i < size; ++i) { 52 EXPECT_NEAR(expected[i], actual[i], 1e-6); 53 } 54 } 55 56 template <typename T, typename U> 57 void ExpectIdenticalIntArrays(const T* expected, int size, const U* actual) { 58 for (int i = 0; i < size; ++i) { 59 EXPECT_EQ(expected[i], static_cast<T>(actual[i])); 60 } 61 } 62 63 // Nifty routine to get the size of an array 64 template <typename T, unsigned SIZE> 65 inline size_t ArraySize(const T (&v)[SIZE]) { 66 return SIZE; 67 } 68 69 // A simple test on writing a few tensor slices 70 // TODO(yangke): refactor into smaller tests: will do as we add more stuff to 71 // the writer. 72 TEST(TensorSliceWriteTest, SimpleWrite) { 73 const string filename = io::JoinPath(testing::TmpDir(), "checkpoint"); 74 75 TensorSliceWriter writer(filename, CreateTableTensorSliceBuilder); 76 77 // Add some int32 tensor slices 78 { 79 TensorShape shape({5, 10}); 80 TensorSlice slice = TensorSlice::ParseOrDie("-:0,1"); 81 const int32 data[] = {0, 1, 2, 3, 4}; 82 TF_CHECK_OK(writer.Add("test", shape, slice, data)); 83 } 84 85 // Two slices share the same tensor name 86 { 87 TensorShape shape({5, 10}); 88 TensorSlice slice = TensorSlice::ParseOrDie("-:3,1"); 89 const int32 data[] = {10, 11, 12, 13, 14}; 90 TF_CHECK_OK(writer.Add("test", shape, slice, data)); 91 } 92 93 // Another slice from a different float tensor -- it has a different name and 94 // should be inserted in front of the previous tensor 95 { 96 TensorShape shape({3, 2}); 97 TensorSlice slice = TensorSlice::ParseOrDie("-:-"); 98 const float data[] = {1.2, 1.3, 1.4, 2.1, 2.2, 2.3}; 99 TF_CHECK_OK(writer.Add("AA", shape, slice, data)); 100 } 101 102 // A slice with int64 data 103 { 104 TensorShape shape({5, 10}); 105 TensorSlice slice = TensorSlice::ParseOrDie("-:3,1"); 106 const int64 data[] = {10, 11, 12, 13, 14}; 107 TF_CHECK_OK(writer.Add("int64", shape, slice, data)); 108 } 109 110 // A slice with int16 data 111 { 112 TensorShape shape({5, 10}); 113 TensorSlice slice = TensorSlice::ParseOrDie("-:3,1"); 114 const int16 data[] = {10, 11, 12, 13, 14}; 115 TF_CHECK_OK(writer.Add("int16", shape, slice, data)); 116 } 117 118 TF_CHECK_OK(writer.Finish()); 119 120 // Now we examine the checkpoint file manually. 121 TensorSliceWriteTestHelper::CheckEntries(filename); 122 } 123 124 } // namespace 125 126 void TensorSliceWriteTestHelper::GetData(TensorSliceReader::Table* table, 127 const string& name, 128 const TensorSlice& slice, 129 SavedSlice* ss) { 130 string key = EncodeTensorNameSlice(name, slice); 131 string value; 132 EXPECT_TRUE(table->Get(key, &value)); 133 SavedTensorSlices sts; 134 EXPECT_TRUE(ParseProtoUnlimited(&sts, value)); 135 EXPECT_FALSE(sts.has_meta()); 136 *ss = sts.data(); 137 EXPECT_EQ(name, ss->name()); 138 TensorSlice slice2(ss->slice()); 139 EXPECT_EQ(slice.DebugString(), slice2.DebugString()); 140 } 141 142 void TensorSliceWriteTestHelper::CheckEntries(const string& fname) { 143 TensorSliceReader::Table* tptr; 144 TF_CHECK_OK(OpenTableTensorSliceReader(fname, &tptr)); 145 std::unique_ptr<TensorSliceReader::Table> table(tptr); 146 CHECK_NOTNULL(table.get()); 147 148 // We expect a block of SavedTensorSlices 149 string value; 150 ASSERT_TRUE(table->Get(kSavedTensorSlicesKey, &value)); 151 { 152 SavedTensorSlices sts; 153 EXPECT_TRUE(ParseProtoUnlimited(&sts, value)); 154 // We also expect two entries for the tensors 155 EXPECT_TRUE(sts.has_meta()); 156 EXPECT_EQ(4, sts.meta().tensor_size()); 157 // We should have written nontrivial version information 158 EXPECT_LT(0, TF_CHECKPOINT_VERSION); 159 EXPECT_EQ(TF_CHECKPOINT_VERSION, sts.meta().versions().producer()); 160 EXPECT_EQ(TF_CHECKPOINT_VERSION_MIN_CONSUMER, 161 sts.meta().versions().min_consumer()); 162 // We don't expect any data in the first block. 163 EXPECT_FALSE(sts.has_data()); 164 // The two tensors should be stored in the same order as they are first 165 // created. 166 { 167 // The two slices of the "test" tensor 168 const SavedSliceMeta& ssm = sts.meta().tensor(0); 169 EXPECT_EQ("test", ssm.name()); 170 EXPECT_EQ( 171 "dim { size: 5 } " 172 "dim { size: 10 }", 173 ssm.shape().ShortDebugString()); 174 EXPECT_EQ(DT_INT32, ssm.type()); 175 EXPECT_EQ(2, ssm.slice_size()); 176 TensorSlice s0(ssm.slice(0)); 177 TensorSlice s1(ssm.slice(1)); 178 EXPECT_EQ("-:0,1", s0.DebugString()); 179 EXPECT_EQ("-:3,1", s1.DebugString()); 180 } 181 { 182 // The "AA" tensor 183 const SavedSliceMeta& ssm = sts.meta().tensor(1); 184 EXPECT_EQ("AA", ssm.name()); 185 EXPECT_EQ( 186 "dim { size: 3 } " 187 "dim { size: 2 }", 188 ssm.shape().ShortDebugString()); 189 EXPECT_EQ(DT_FLOAT, ssm.type()); 190 EXPECT_EQ(1, ssm.slice_size()); 191 TensorSlice s0(ssm.slice(0)); 192 EXPECT_EQ("-:-", s0.DebugString()); 193 } 194 { 195 // The "int64" tensor 196 const SavedSliceMeta& ssm = sts.meta().tensor(2); 197 EXPECT_EQ("int64", ssm.name()); 198 EXPECT_EQ( 199 "dim { size: 5 } " 200 "dim { size: 10 }", 201 ssm.shape().ShortDebugString()); 202 EXPECT_EQ(DT_INT64, ssm.type()); 203 EXPECT_EQ(1, ssm.slice_size()); 204 TensorSlice s0(ssm.slice(0)); 205 EXPECT_EQ("-:3,1", s0.DebugString()); 206 } 207 { 208 // The "int16" tensor 209 const SavedSliceMeta& ssm = sts.meta().tensor(3); 210 EXPECT_EQ("int16", ssm.name()); 211 EXPECT_EQ( 212 "dim { size: 5 } " 213 "dim { size: 10 }", 214 ssm.shape().ShortDebugString()); 215 EXPECT_EQ(DT_INT16, ssm.type()); 216 EXPECT_EQ(1, ssm.slice_size()); 217 TensorSlice s0(ssm.slice(0)); 218 EXPECT_EQ("-:3,1", s0.DebugString()); 219 } 220 } 221 222 // We expect 5 blocks of tensor data 223 { 224 // Block 1: we expect it to be the full slice of the "AA" tensor 225 SavedSlice ss; 226 GetData(table.get(), "AA", TensorSlice(2), &ss); 227 const float data[] = {1.2, 1.3, 1.4, 2.1, 2.2, 2.3}; 228 EXPECT_EQ(ArraySize(data), ss.data().float_val_size()); 229 ExpectIdenticalFloatArrays(data, ArraySize(data), 230 ss.data().float_val().data()); 231 } 232 233 { 234 // Block 2: we expect it to be the first slice of the "test" tensor 235 SavedSlice ss; 236 GetData(table.get(), "test", TensorSlice({{0, -1}, {0, 1}}), &ss); 237 const int32 data[] = {0, 1, 2, 3, 4}; 238 EXPECT_EQ(ArraySize(data), ss.data().int_val_size()); 239 ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data()); 240 } 241 242 { 243 // Block 3: we expect it to be the second slice of the "test" tensor 244 SavedSlice ss; 245 GetData(table.get(), "test", TensorSlice({{0, -1}, {3, 1}}), &ss); 246 const int32 data[] = {10, 11, 12, 13, 14}; 247 EXPECT_EQ(ArraySize(data), ss.data().int_val_size()); 248 ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data()); 249 } 250 251 { 252 // Block 4: we expect it to be the slice of the "int64" tensor 253 SavedSlice ss; 254 GetData(table.get(), "int64", TensorSlice({{0, -1}, {3, 1}}), &ss); 255 const int64 data[] = {10, 11, 12, 13, 14}; 256 EXPECT_EQ(ArraySize(data), ss.data().int64_val_size()); 257 ExpectIdenticalIntArrays(data, ArraySize(data), 258 ss.data().int64_val().data()); 259 } 260 261 { 262 // Block 5: we expect it to be the slice of the "int16" tensor 263 SavedSlice ss; 264 GetData(table.get(), "int16", TensorSlice({{0, -1}, {3, 1}}), &ss); 265 const int16 data[] = {10, 11, 12, 13, 14}; 266 EXPECT_EQ(ArraySize(data), ss.data().int_val_size()); 267 ExpectIdenticalIntArrays(data, ArraySize(data), ss.data().int_val().data()); 268 } 269 } 270 271 template <typename DT> 272 size_t BytesPerElementHelper(DT value) { 273 SavedSlice ss; 274 std::array<DT, 1> lo_data; 275 std::fill(lo_data.begin(), lo_data.end(), value); 276 TF_EXPECT_OK( 277 TensorSliceWriter::SaveData(lo_data.data(), lo_data.size(), &ss)); 278 size_t lo_byte_size = ss.ByteSizeLong(); 279 280 std::array<DT, 1001> hi_data; 281 std::fill(hi_data.begin(), hi_data.end(), value); 282 TF_EXPECT_OK( 283 TensorSliceWriter::SaveData(hi_data.data(), hi_data.size(), &ss)); 284 size_t hi_byte_size = ss.ByteSizeLong(); 285 286 return (hi_byte_size - lo_byte_size) / (hi_data.size() - lo_data.size()); 287 } 288 289 TEST(TensorSliceWriteTest, CheckpointSize) { 290 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_BOOL), 291 BytesPerElementHelper<bool>(false)); 292 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_BOOL), 293 BytesPerElementHelper<bool>(true)); 294 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_FLOAT), 295 BytesPerElementHelper<float>(-1.0)); 296 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_DOUBLE), 297 BytesPerElementHelper<double>(-1.0)); 298 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_COMPLEX64), 299 BytesPerElementHelper<complex64>(-1.0)); 300 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_COMPLEX128), 301 BytesPerElementHelper<complex128>(-1.0)); 302 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT32), 303 BytesPerElementHelper<int32>(-1)); 304 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT64), 305 BytesPerElementHelper<int64>(-1)); 306 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_UINT16), 307 BytesPerElementHelper<uint16>(std::numeric_limits<uint16>::max())); 308 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_UINT8), 309 BytesPerElementHelper<uint8>(std::numeric_limits<uint8>::max())); 310 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT8), 311 BytesPerElementHelper<int8>(-1)); 312 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_INT16), 313 BytesPerElementHelper<int16>(-1)); 314 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QINT8), 315 BytesPerElementHelper<qint8>(-1)); 316 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QUINT8), 317 BytesPerElementHelper<quint8>(std::numeric_limits<uint8>::max())); 318 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_QINT32), 319 BytesPerElementHelper<qint32>(-1)); 320 EXPECT_EQ(TensorSliceWriter::MaxBytesPerElement(DT_HALF), 321 BytesPerElementHelper<Eigen::half>(Eigen::half(-1.0))); 322 } 323 324 TEST(TensorSliceWriteTest, SizeErrors) { 325 const string filename = io::JoinPath(testing::TmpDir(), "checkpoint"); 326 327 TensorSliceWriter writer(filename, CreateTableTensorSliceBuilder); 328 329 // Add a 300MB int8 tensor slice, which will fail because it expands to 3GB. 330 { 331 TensorShape shape({300, 1000000}); 332 TensorSlice slice = TensorSlice::ParseOrDie("-:-"); 333 const std::vector<int8> data(300000000, -1); 334 Status s = writer.Add("test1", shape, slice, data.data()); 335 EXPECT_EQ(s.code(), error::INVALID_ARGUMENT); 336 EXPECT_TRUE(StringPiece(s.error_message()) 337 .contains("Tensor slice is too large to serialize")); 338 } 339 340 // Add a large string tensor slice, which will fail. 341 { 342 TensorShape shape({256, 1024}); 343 TensorSlice slice = TensorSlice::ParseOrDie("-:-"); 344 const std::vector<string> data(256 * 1024, std::string(8192, 'f')); 345 Status s = writer.Add("test2", shape, slice, data.data()); 346 EXPECT_EQ(s.code(), error::INVALID_ARGUMENT); 347 EXPECT_TRUE(StringPiece(s.error_message()) 348 .contains("Tensor slice is too large to serialize")); 349 } 350 } 351 352 } // namespace checkpoint 353 354 } // namespace tensorflow 355