1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Tests for layers.feature_column_ops.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import os 22 23 import numpy as np 24 25 from tensorflow.contrib import layers 26 from tensorflow.contrib.layers.python.layers import feature_column 27 from tensorflow.contrib.layers.python.layers import feature_column_ops 28 from tensorflow.core.example import example_pb2 29 from tensorflow.core.example import feature_pb2 30 from tensorflow.python.feature_column import feature_column as fc_core 31 from tensorflow.python.framework import constant_op 32 from tensorflow.python.framework import dtypes 33 from tensorflow.python.framework import ops 34 from tensorflow.python.framework import sparse_tensor 35 from tensorflow.python.ops import array_ops 36 from tensorflow.python.ops import gradients_impl 37 from tensorflow.python.ops import init_ops 38 from tensorflow.python.ops import lookup_ops 39 from tensorflow.python.ops import partitioned_variables 40 from tensorflow.python.ops import random_ops 41 from tensorflow.python.ops import variable_scope 42 from tensorflow.python.ops import variables as variables_lib 43 from tensorflow.python.platform import test 44 45 46 class TransformerTest(test.TestCase): 47 48 def testRealValuedColumnIsIdentityTransformation(self): 49 real_valued = feature_column.real_valued_column("price") 50 features = {"price": constant_op.constant([[20.], [110], [-3]])} 51 output = feature_column_ops._Transformer(features).transform(real_valued) 52 with self.test_session(): 53 self.assertAllEqual(output.eval(), [[20.], [110], [-3]]) 54 55 def testSparseRealValuedColumnIdentityTransformation(self): 56 sparse_real_valued = feature_column._real_valued_var_len_column( 57 "rating", is_sparse=True) 58 rating_tensor = sparse_tensor.SparseTensor( 59 values=[2.0, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1]) 60 features = {"rating": rating_tensor} 61 output = feature_column_ops._Transformer(features).transform( 62 sparse_real_valued) 63 with self.test_session(): 64 self.assertAllEqual(output.values.eval(), rating_tensor.values.eval()) 65 self.assertAllEqual(output.indices.eval(), rating_tensor.indices.eval()) 66 self.assertAllEqual(output.dense_shape.eval(), 67 rating_tensor.dense_shape.eval()) 68 69 def testSparseRealValuedColumnWithTransformation(self): 70 71 def square_fn(x): 72 return x**2 73 74 sparse_real_valued = feature_column._real_valued_var_len_column( 75 "rating", normalizer=square_fn, is_sparse=True) 76 rating_tensor = sparse_tensor.SparseTensor( 77 values=[2.0, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1]) 78 features = {"rating": rating_tensor} 79 output_dict = feature_column_ops.transform_features(features, 80 [sparse_real_valued]) 81 self.assertTrue(sparse_real_valued in output_dict) 82 output = output_dict[sparse_real_valued] 83 with self.test_session(): 84 self.assertArrayNear(output.values.eval(), [4.0, 25.0], 1e-5) 85 self.assertAllEqual(output.indices.eval(), rating_tensor.indices.eval()) 86 self.assertAllEqual(output.dense_shape.eval(), 87 rating_tensor.dense_shape.eval()) 88 89 def testBucketizedColumn(self): 90 bucket = feature_column.bucketized_column( 91 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 92 # buckets 2, 3, 0 93 features = {"price": constant_op.constant([[20.], [110], [-3]])} 94 95 # Test transform features. 96 output = feature_column_ops.transform_features( 97 features=features, feature_columns=[bucket]) 98 self.assertEqual(len(output), 1) 99 self.assertIn(bucket, output) 100 with self.test_session(): 101 self.assertAllEqual(output[bucket].eval(), [[2], [3], [0]]) 102 103 def testBucketizedColumnWithMultiDimensions(self): 104 bucket = feature_column.bucketized_column( 105 feature_column.real_valued_column("price", 2), 106 boundaries=[0., 10., 100.]) 107 # buckets 2, 3, 0 108 features = { 109 "price": constant_op.constant([[20., 110], [110., 20], [-3, -3]]) 110 } 111 output = feature_column_ops._Transformer(features).transform(bucket) 112 with self.test_session(): 113 self.assertAllEqual(output.eval(), [[2, 3], [3, 2], [0, 0]]) 114 115 def testCachedTransformation(self): 116 bucket = feature_column.bucketized_column( 117 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 118 # buckets 2, 3, 0 119 features = {"price": constant_op.constant([[20.], [110], [-3]])} 120 transformer = feature_column_ops._Transformer(features) 121 with self.test_session() as sess: 122 transformer.transform(bucket) 123 num_of_ops = len(sess.graph.get_operations()) 124 # Verify that the second call to transform the same feature 125 # doesn't increase the number of ops. 126 transformer.transform(bucket) 127 self.assertEqual(num_of_ops, len(sess.graph.get_operations())) 128 129 def testSparseColumnWithHashBucket(self): 130 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 131 wire_tensor = sparse_tensor.SparseTensor( 132 values=["omar", "stringer", "marlo"], 133 indices=[[0, 0], [1, 0], [1, 1]], 134 dense_shape=[2, 2]) 135 features = {"wire": wire_tensor} 136 # Test transform features. 137 output = feature_column_ops.transform_features( 138 features=features, feature_columns=[hashed_sparse]) 139 self.assertEqual(len(output), 1) 140 self.assertIn(hashed_sparse, output) 141 with self.test_session(): 142 self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int64) 143 self.assertTrue( 144 all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval())) 145 self.assertAllEqual(output[hashed_sparse].indices.eval(), 146 wire_tensor.indices.eval()) 147 self.assertAllEqual(output[hashed_sparse].dense_shape.eval(), 148 wire_tensor.dense_shape.eval()) 149 150 def testSparseIntColumnWithHashBucket(self): 151 """Tests a sparse column with int values.""" 152 hashed_sparse = feature_column.sparse_column_with_hash_bucket( 153 "wire", 10, dtype=dtypes.int64) 154 wire_tensor = sparse_tensor.SparseTensor( 155 values=[101, 201, 301], 156 indices=[[0, 0], [1, 0], [1, 1]], 157 dense_shape=[2, 2]) 158 features = {"wire": wire_tensor} 159 # Test transform features. 160 output = feature_column_ops.transform_features( 161 features=features, feature_columns=[hashed_sparse]) 162 self.assertEqual(len(output), 1) 163 self.assertIn(hashed_sparse, output) 164 with self.test_session(): 165 self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int64) 166 self.assertTrue( 167 all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval())) 168 self.assertAllEqual(output[hashed_sparse].indices.eval(), 169 wire_tensor.indices.eval()) 170 self.assertAllEqual(output[hashed_sparse].dense_shape.eval(), 171 wire_tensor.dense_shape.eval()) 172 173 def testSparseColumnWithHashBucketWithDenseInputTensor(self): 174 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 175 wire_tensor = constant_op.constant( 176 [["omar", "stringer"], ["marlo", "rick"]]) 177 features = {"wire": wire_tensor} 178 output = feature_column_ops._Transformer(features).transform(hashed_sparse) 179 180 with self.test_session(): 181 # While the input is a dense Tensor, the output should be a SparseTensor. 182 self.assertIsInstance(output, sparse_tensor.SparseTensor) 183 self.assertEqual(output.values.dtype, dtypes.int64) 184 self.assertTrue(all(x < 10 and x >= 0 for x in output.values.eval())) 185 self.assertAllEqual(output.indices.eval(), 186 [[0, 0], [0, 1], [1, 0], [1, 1]]) 187 self.assertAllEqual(output.dense_shape.eval(), [2, 2]) 188 189 def testEmbeddingColumn(self): 190 wire_tensor = sparse_tensor.SparseTensor( 191 values=["omar", "stringer", "marlo"], 192 indices=[[0, 0], [1, 0], [1, 1]], 193 dense_shape=[2, 2]) 194 features = {"wire": wire_tensor} 195 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 196 wire_embedding = feature_column.embedding_column(hashed_sparse, 10) 197 198 # Test transform features. 199 output = feature_column_ops.transform_features( 200 features=features, feature_columns=[hashed_sparse, wire_embedding]) 201 # Check that features dict haven't changed 202 self.assertEqual({"wire": wire_tensor}, features) 203 self.assertEqual(len(output), 2) 204 self.assertIn(hashed_sparse, output) 205 self.assertIn(wire_embedding, output) 206 with self.test_session(): 207 self.assertAllEqual(output[wire_embedding].indices.eval(), 208 wire_tensor.indices.eval()) 209 self.assertAllEqual(output[wire_embedding].dense_shape.eval(), [2, 2]) 210 self.assertAllEqual(output[wire_embedding].values.eval(), 211 output[hashed_sparse].values.eval()) 212 213 def testSparseColumnWithKeys(self): 214 keys_sparse = feature_column.sparse_column_with_keys( 215 "wire", ["marlo", "omar", "stringer"]) 216 wire_tensor = sparse_tensor.SparseTensor( 217 values=["omar", "stringer", "marlo"], 218 indices=[[0, 0], [1, 0], [1, 1]], 219 dense_shape=[2, 2]) 220 features = {"wire": wire_tensor} 221 # Test transform features. 222 output = feature_column_ops.transform_features( 223 features=features, feature_columns=[keys_sparse]) 224 self.assertEqual(len(output), 1) 225 self.assertIn(keys_sparse, output) 226 with self.test_session(): 227 lookup_ops.tables_initializer().run() 228 self.assertEqual(output[keys_sparse].values.dtype, dtypes.int64) 229 self.assertAllEqual(output[keys_sparse].values.eval(), [1, 2, 0]) 230 self.assertAllEqual(output[keys_sparse].indices.eval(), 231 wire_tensor.indices.eval()) 232 self.assertAllEqual(output[keys_sparse].dense_shape.eval(), 233 wire_tensor.dense_shape.eval()) 234 235 def testSparseColumnWithKeysWithDenseInputTensor(self): 236 keys_sparse = feature_column.sparse_column_with_keys( 237 "wire", ["marlo", "omar", "stringer", "rick"]) 238 wire_tensor = constant_op.constant( 239 [["omar", "stringer"], ["marlo", "rick"]]) 240 241 features = {"wire": wire_tensor} 242 output = feature_column_ops._Transformer(features).transform(keys_sparse) 243 244 with self.test_session(): 245 lookup_ops.tables_initializer().run() 246 # While the input is a dense Tensor, the output should be a SparseTensor. 247 self.assertIsInstance(output, sparse_tensor.SparseTensor) 248 self.assertEqual(output.dtype, dtypes.int64) 249 self.assertAllEqual(output.values.eval(), [1, 2, 0, 3]) 250 self.assertAllEqual(output.indices.eval(), 251 [[0, 0], [0, 1], [1, 0], [1, 1]]) 252 self.assertAllEqual(output.dense_shape.eval(), [2, 2]) 253 254 def testSparseColumnWithHashBucket_IsIntegerized(self): 255 hashed_sparse = feature_column.sparse_column_with_integerized_feature( 256 "wire", 10) 257 wire_tensor = sparse_tensor.SparseTensor( 258 values=[100, 1, 25], 259 indices=[[0, 0], [1, 0], [1, 1]], 260 dense_shape=[2, 2]) 261 features = {"wire": wire_tensor} 262 # Test transform features. 263 output = feature_column_ops.transform_features( 264 features=features, feature_columns=[hashed_sparse]) 265 self.assertEqual(len(output), 1) 266 self.assertIn(hashed_sparse, output) 267 with self.test_session(): 268 self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int32) 269 self.assertTrue( 270 all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval())) 271 self.assertAllEqual(output[hashed_sparse].indices.eval(), 272 wire_tensor.indices.eval()) 273 self.assertAllEqual(output[hashed_sparse].dense_shape.eval(), 274 wire_tensor.dense_shape.eval()) 275 276 def testSparseColumnWithHashBucketWithDenseInputTensor_IsIntegerized(self): 277 hashed_sparse = feature_column.sparse_column_with_integerized_feature( 278 "wire", 10) 279 # wire_tensor = tf.SparseTensor(values=[100, 1, 25], 280 # indices=[[0, 0], [1, 0], [1, 1]], 281 # dense_shape=[2, 2]) 282 wire_tensor = constant_op.constant([[100, 0], [1, 25]]) 283 features = {"wire": wire_tensor} 284 output = feature_column_ops._Transformer(features).transform(hashed_sparse) 285 with self.test_session(): 286 # While the input is a dense Tensor, the output should be a SparseTensor. 287 self.assertIsInstance(output, sparse_tensor.SparseTensor) 288 self.assertEqual(output.values.dtype, dtypes.int32) 289 self.assertTrue(all(x < 10 and x >= 0 for x in output.values.eval())) 290 self.assertAllEqual(output.indices.eval(), 291 [[0, 0], [0, 1], [1, 0], [1, 1]]) 292 self.assertAllEqual(output.dense_shape.eval(), [2, 2]) 293 294 def testWeightedSparseColumn(self): 295 ids = feature_column.sparse_column_with_keys("ids", 296 ["marlo", "omar", "stringer"]) 297 ids_tensor = sparse_tensor.SparseTensor( 298 values=["stringer", "stringer", "marlo"], 299 indices=[[0, 0], [1, 0], [1, 1]], 300 dense_shape=[2, 2]) 301 weighted_ids = feature_column.weighted_sparse_column(ids, "weights") 302 weights_tensor = sparse_tensor.SparseTensor( 303 values=[10.0, 20.0, 30.0], 304 indices=[[0, 0], [1, 0], [1, 1]], 305 dense_shape=[2, 2]) 306 features = {"ids": ids_tensor, "weights": weights_tensor} 307 # Test transform features. 308 output = feature_column_ops.transform_features( 309 features=features, feature_columns=[weighted_ids]) 310 self.assertEqual(len(output), 1) 311 self.assertIn(weighted_ids, output) 312 313 with self.test_session(): 314 lookup_ops.tables_initializer().run() 315 self.assertAllEqual(output[weighted_ids][0].dense_shape.eval(), 316 ids_tensor.dense_shape.eval()) 317 self.assertAllEqual(output[weighted_ids][0].indices.eval(), 318 ids_tensor.indices.eval()) 319 self.assertAllEqual(output[weighted_ids][0].values.eval(), [2, 2, 0]) 320 self.assertAllEqual(output[weighted_ids][1].dense_shape.eval(), 321 weights_tensor.dense_shape.eval()) 322 self.assertAllEqual(output[weighted_ids][1].indices.eval(), 323 weights_tensor.indices.eval()) 324 self.assertEqual(output[weighted_ids][1].values.dtype, dtypes.float32) 325 self.assertAllEqual(output[weighted_ids][1].values.eval(), 326 weights_tensor.values.eval()) 327 328 def testSparseColumnWithVocabulary(self): 329 vocabulary_file = os.path.join(self.get_temp_dir(), "movies.txt") 330 with open(vocabulary_file, "w") as f: 331 f.write("\n".join(["marlo", "omar", "stringer"]) + "\n") 332 vocab_sparse = feature_column.sparse_column_with_vocabulary_file( 333 "wire", vocabulary_file, vocab_size=3) 334 wire_tensor = sparse_tensor.SparseTensor( 335 values=["omar", "stringer", "marlo"], 336 indices=[[0, 0], [1, 0], [1, 1]], 337 dense_shape=[2, 2]) 338 features = {"wire": wire_tensor} 339 output = feature_column_ops.transform_features( 340 features=features, feature_columns=[vocab_sparse]) 341 self.assertEqual(len(output), 1) 342 self.assertIn(vocab_sparse, output) 343 with self.test_session(): 344 lookup_ops.tables_initializer().run() 345 self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64) 346 self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0]) 347 self.assertAllEqual(output[vocab_sparse].indices.eval(), 348 wire_tensor.indices.eval()) 349 self.assertAllEqual(output[vocab_sparse].dense_shape.eval(), 350 wire_tensor.dense_shape.eval()) 351 352 def testSparseColumnWithVocabularyWithDenseInputTensor(self): 353 vocabulary_file = os.path.join(self.get_temp_dir(), "movies.txt") 354 with open(vocabulary_file, "w") as f: 355 f.write("\n".join(["marlo", "omar", "stringer"]) + "\n") 356 vocab_sparse = feature_column.sparse_column_with_vocabulary_file( 357 "wire", vocabulary_file, vocab_size=3) 358 wire_tensor = constant_op.constant( 359 [["omar", "stringer"], ["marlo", "omar"]]) 360 features = {"wire": wire_tensor} 361 output = feature_column_ops.transform_features( 362 features=features, feature_columns=[vocab_sparse]) 363 self.assertEqual(len(output), 1) 364 self.assertIn(vocab_sparse, output) 365 with self.test_session(): 366 lookup_ops.tables_initializer().run() 367 self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64) 368 self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0, 1]) 369 self.assertAllEqual(output[vocab_sparse].indices.eval(), 370 [[0, 0], [0, 1], [1, 0], [1, 1]]) 371 self.assertAllEqual(output[vocab_sparse].dense_shape.eval(), [2, 2]) 372 373 def testSparseIntColumnWithVocabulary(self): 374 """Tests a sparse integer column with vocabulary.""" 375 vocabulary_file = os.path.join(self.get_temp_dir(), "courses.txt") 376 with open(vocabulary_file, "w") as f: 377 f.write("\n".join(["101", "201", "301"]) + "\n") 378 vocab_sparse = feature_column.sparse_column_with_vocabulary_file( 379 "wire", vocabulary_file, vocab_size=3, dtype=dtypes.int64) 380 wire_tensor = sparse_tensor.SparseTensor( 381 values=[201, 301, 101], 382 indices=[[0, 0], [1, 0], [1, 1]], 383 dense_shape=[2, 2]) 384 features = {"wire": wire_tensor} 385 output = feature_column_ops.transform_features( 386 features=features, feature_columns=[vocab_sparse]) 387 self.assertEqual(len(output), 1) 388 self.assertIn(vocab_sparse, output) 389 with self.test_session(): 390 lookup_ops.tables_initializer().run() 391 self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64) 392 self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0]) 393 self.assertAllEqual(output[vocab_sparse].indices.eval(), 394 wire_tensor.indices.eval()) 395 self.assertAllEqual(output[vocab_sparse].dense_shape.eval(), 396 wire_tensor.dense_shape.eval()) 397 398 def testSparseIntColumnWithVocabularyWithDenseInputTensor(self): 399 """Tests a sparse integer column with vocabulary.""" 400 vocabulary_file = os.path.join(self.get_temp_dir(), "courses.txt") 401 with open(vocabulary_file, "w") as f: 402 f.write("\n".join(["101", "201", "301"]) + "\n") 403 vocab_sparse = feature_column.sparse_column_with_vocabulary_file( 404 "wire", vocabulary_file, vocab_size=3, dtype=dtypes.int64) 405 wire_tensor = constant_op.constant([[201, 301], [101, 201]]) 406 features = {"wire": wire_tensor} 407 output = feature_column_ops.transform_features( 408 features=features, feature_columns=[vocab_sparse]) 409 self.assertEqual(len(output), 1) 410 self.assertIn(vocab_sparse, output) 411 with self.test_session(): 412 lookup_ops.tables_initializer().run() 413 self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64) 414 self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0, 1]) 415 self.assertAllEqual(output[vocab_sparse].indices.eval(), 416 [[0, 0], [0, 1], [1, 0], [1, 1]]) 417 self.assertAllEqual(output[vocab_sparse].dense_shape.eval(), [2, 2]) 418 419 def testCrossColumn(self): 420 language = feature_column.sparse_column_with_hash_bucket( 421 "language", hash_bucket_size=3) 422 country = feature_column.sparse_column_with_hash_bucket( 423 "country", hash_bucket_size=5) 424 country_language = feature_column.crossed_column( 425 [language, country], hash_bucket_size=15) 426 features = { 427 "language": 428 sparse_tensor.SparseTensor( 429 values=["english", "spanish"], 430 indices=[[0, 0], [1, 0]], 431 dense_shape=[2, 1]), 432 "country": 433 sparse_tensor.SparseTensor( 434 values=["US", "SV"], 435 indices=[[0, 0], [1, 0]], 436 dense_shape=[2, 1]) 437 } 438 # Test transform features. 439 output = feature_column_ops.transform_features( 440 features=features, feature_columns=[country_language]) 441 self.assertEqual(len(output), 1) 442 self.assertIn(country_language, output) 443 with self.test_session(): 444 self.assertEqual(output[country_language].values.dtype, dtypes.int64) 445 self.assertTrue( 446 all(x < 15 and x >= 0 for x in output[country_language].values.eval( 447 ))) 448 449 def testCrossWithBucketizedColumn(self): 450 price_bucket = feature_column.bucketized_column( 451 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 452 country = feature_column.sparse_column_with_hash_bucket( 453 "country", hash_bucket_size=5) 454 country_price = feature_column.crossed_column( 455 [country, price_bucket], hash_bucket_size=15) 456 features = { 457 "price": 458 constant_op.constant([[20.]]), 459 "country": 460 sparse_tensor.SparseTensor( 461 values=["US", "SV"], 462 indices=[[0, 0], [0, 1]], 463 dense_shape=[1, 2]) 464 } 465 # Test transform features. 466 output = feature_column_ops.transform_features( 467 features=features, feature_columns=[country_price]) 468 self.assertEqual(len(output), 1) 469 self.assertIn(country_price, output) 470 with self.test_session(): 471 self.assertEqual(output[country_price].values.dtype, dtypes.int64) 472 self.assertTrue( 473 all(x < 15 and x >= 0 for x in output[country_price].values.eval())) 474 475 def testCrossWithMultiDimensionBucketizedColumn(self): 476 country = feature_column.sparse_column_with_hash_bucket( 477 "country", hash_bucket_size=5) 478 price_bucket = feature_column.bucketized_column( 479 feature_column.real_valued_column("price", 2), 480 boundaries=[0., 10., 100.]) 481 country_price = feature_column.crossed_column( 482 [country, price_bucket], hash_bucket_size=1000) 483 484 with ops.Graph().as_default(): 485 features = { 486 "price": 487 constant_op.constant([[20., 210.], [110., 50.], [-3., -30.]]), 488 "country": 489 sparse_tensor.SparseTensor( 490 values=["US", "SV", "US"], 491 indices=[[0, 0], [1, 0], [2, 0]], 492 dense_shape=[3, 2]) 493 } 494 output, column_to_variable, _ = ( 495 feature_column_ops.weighted_sum_from_feature_columns( 496 features, [country_price], num_outputs=1)) 497 498 weights = column_to_variable[country_price][0] 499 grad = array_ops.squeeze( 500 gradients_impl.gradients(output, weights)[0].values) 501 with self.test_session(): 502 variables_lib.global_variables_initializer().run() 503 self.assertEqual(len(grad.eval()), 6) 504 505 # Test transform features. 506 output = feature_column_ops.transform_features( 507 features=features, feature_columns=[country_price]) 508 self.assertEqual(len(output), 1) 509 self.assertIn(country_price, output) 510 511 def testCrossWithCrossedColumn(self): 512 price_bucket = feature_column.bucketized_column( 513 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 514 country = feature_column.sparse_column_with_hash_bucket( 515 "country", hash_bucket_size=5) 516 country_price = feature_column.crossed_column( 517 [country, price_bucket], hash_bucket_size=15) 518 wire = feature_column.sparse_column_with_hash_bucket("wire", 10) 519 wire_country_price = feature_column.crossed_column( 520 [wire, country_price], hash_bucket_size=15) 521 features = { 522 "price": 523 constant_op.constant([[20.]]), 524 "country": 525 sparse_tensor.SparseTensor( 526 values=["US", "SV"], 527 indices=[[0, 0], [0, 1]], 528 dense_shape=[1, 2]), 529 "wire": 530 sparse_tensor.SparseTensor( 531 values=["omar", "stringer", "marlo"], 532 indices=[[0, 0], [0, 1], [0, 2]], 533 dense_shape=[1, 3]) 534 } 535 # Test transform features. 536 output = feature_column_ops.transform_features( 537 features=features, feature_columns=[wire_country_price]) 538 self.assertEqual(len(output), 1) 539 self.assertIn(wire_country_price, output) 540 with self.test_session(): 541 self.assertEqual(output[wire_country_price].values.dtype, dtypes.int64) 542 self.assertTrue( 543 all(x < 15 and x >= 0 for x in output[wire_country_price].values.eval( 544 ))) 545 546 def testIfFeatureTableContainsTransformationReturnIt(self): 547 any_column = feature_column.sparse_column_with_hash_bucket("sparse", 10) 548 features = {any_column: "any-thing-even-not-a-tensor"} 549 output = feature_column_ops._Transformer(features).transform(any_column) 550 self.assertEqual(output, "any-thing-even-not-a-tensor") 551 552 553 class CreateInputLayersForDNNsTest(test.TestCase): 554 555 def testFeatureColumnDictFails(self): 556 real_valued = feature_column.real_valued_column("price") 557 features = {"price": constant_op.constant([[20.], [110], [-3]])} 558 with self.assertRaisesRegexp( 559 ValueError, 560 "Expected feature_columns to be iterable, found dict"): 561 feature_column_ops.input_from_feature_columns( 562 features, {"feature": real_valued}) 563 564 def testSparseTensorRealValuedColumn(self): 565 var_len_sparse_real_valued_column = ( 566 feature_column._real_valued_var_len_column("rating", is_sparse=True)) 567 features = { 568 "ids": 569 sparse_tensor.SparseTensor( 570 values=["c", "b", "a"], 571 indices=[[0, 0], [1, 0], [2, 0]], 572 dense_shape=[3, 1]), 573 "income": 574 constant_op.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]), 575 "rating": 576 sparse_tensor.SparseTensor( 577 values=[3.5, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1]) 578 } 579 with self.assertRaisesRegexp( 580 ValueError, 581 "dd"): 582 feature_column_ops.input_from_feature_columns( 583 features, [var_len_sparse_real_valued_column]) 584 585 def testAllDNNColumns(self): 586 sparse_column = feature_column.sparse_column_with_keys( 587 "ids", ["a", "b", "c", "unseen"]) 588 real_valued_column = feature_column.real_valued_column("income", 2) 589 one_hot_column = feature_column.one_hot_column(sparse_column) 590 embedding_column = feature_column.embedding_column(sparse_column, 10) 591 features = { 592 "ids": 593 sparse_tensor.SparseTensor( 594 values=["c", "b", "a"], 595 indices=[[0, 0], [1, 0], [2, 0]], 596 dense_shape=[3, 1]), 597 "income": 598 constant_op.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]), 599 } 600 columns = [one_hot_column, embedding_column, real_valued_column] 601 output = feature_column_ops.input_from_feature_columns(features, columns) 602 output_core = fc_core.input_layer(features, columns) 603 with self.test_session(): 604 variables_lib.global_variables_initializer().run() 605 lookup_ops.tables_initializer().run() 606 self.assertAllEqual(output.eval().shape, [3, 2 + 4 + 10]) 607 # Verify cross compatibility: Core builder output should equal to contrib. 608 self.assertAllEqual(output.eval().shape, output_core.eval().shape) 609 610 def testAllDNNColumnsWithColumnwiseOutputs(self): 611 sparse_column = feature_column.sparse_column_with_keys( 612 "ids", ["a", "b", "c", "unseen"]) 613 real_valued_column = feature_column.real_valued_column("income", 2) 614 one_hot_column = feature_column.one_hot_column(sparse_column) 615 embedding_column = feature_column.embedding_column(sparse_column, 10) 616 features = { 617 "ids": 618 sparse_tensor.SparseTensor( 619 values=["c", "b", "a"], 620 indices=[[0, 0], [1, 0], [2, 0]], 621 dense_shape=[3, 1]), 622 "income": 623 constant_op.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]), 624 } 625 columns = [one_hot_column, embedding_column, real_valued_column] 626 cols_to_outs = {} 627 feature_column_ops.input_from_feature_columns( 628 features, columns, cols_to_outs=cols_to_outs) 629 with self.test_session(): 630 variables_lib.global_variables_initializer().run() 631 lookup_ops.tables_initializer().run() 632 for column in columns: 633 self.assertTrue(column in cols_to_outs) 634 635 def testRealValuedColumn(self): 636 real_valued = feature_column.real_valued_column("price") 637 features = {"price": constant_op.constant([[20.], [110], [-3]])} 638 output = feature_column_ops.input_from_feature_columns(features, 639 [real_valued]) 640 with self.test_session(): 641 self.assertAllClose(output.eval(), features["price"].eval()) 642 # Verify cross compatibility: Core builder output should equal to contrib. 643 self.assertAllClose(output.eval(), 644 fc_core.input_layer(features, [real_valued]).eval()) 645 646 def testRealValuedColumnWithMultiDimensions(self): 647 real_valued = feature_column.real_valued_column("price", 2) 648 features = { 649 "price": constant_op.constant([[20., 10.], [110, 0.], [-3, 30]]) 650 } 651 output = feature_column_ops.input_from_feature_columns(features, 652 [real_valued]) 653 with self.test_session(): 654 self.assertAllClose(output.eval(), features["price"].eval()) 655 # Verify cross compatibility: Core builder output should equal to contrib. 656 self.assertAllClose(output.eval(), 657 fc_core.input_layer(features, [real_valued]).eval()) 658 659 def testRealValuedColumnDense(self): 660 var_len_real_valued = feature_column._real_valued_var_len_column( 661 "rating", default_value=-1) 662 rating = np.array([[0., 1., 2., -1.], 663 [3., 4., 5., 6.]]) 664 features = {"rating": constant_op.constant(rating)} 665 with self.test_session() as sess: 666 output = sess.run(feature_column_ops.input_from_feature_columns( 667 features, [var_len_real_valued])) 668 self.assertAllClose(rating, output) 669 670 def testRealValuedColumnTypeConversion(self): 671 var_len_real_valued = feature_column._real_valued_var_len_column( 672 "rating", default_value=-1) 673 rating = np.array([[0, 1, 2, -1], 674 [3, 4, 5, 6]]) 675 features = {"rating": constant_op.constant(rating, dtype=dtypes.int64)} 676 with self.test_session() as sess: 677 output = sess.run(feature_column_ops.input_from_feature_columns( 678 features, [var_len_real_valued])) 679 self.assertAllClose(rating.astype(np.float32), output) 680 681 def testRealValuedColumnWithNormalizer(self): 682 real_valued = feature_column.real_valued_column( 683 "price", normalizer=lambda x: x - 2) 684 features = {"price": constant_op.constant([[20.], [110], [-3]])} 685 output = feature_column_ops.input_from_feature_columns(features, 686 [real_valued]) 687 with self.test_session(): 688 self.assertAllClose(output.eval(), features["price"].eval() - 2) 689 # Verify cross compatibility: Core builder output should equal to contrib. 690 self.assertAllClose(output.eval(), 691 fc_core.input_layer(features, [real_valued]).eval()) 692 693 def testRealValuedColumnWithMultiDimensionsAndNormalizer(self): 694 real_valued = feature_column.real_valued_column( 695 "price", 2, normalizer=lambda x: x - 2) 696 features = { 697 "price": constant_op.constant([[20., 10.], [110, 0.], [-3, 30]]) 698 } 699 output = feature_column_ops.input_from_feature_columns(features, 700 [real_valued]) 701 with self.test_session(): 702 self.assertAllClose(output.eval(), features["price"].eval() - 2) 703 # Verify cross compatibility: Core builder output should equal to contrib. 704 self.assertAllClose(output.eval(), 705 fc_core.input_layer(features, [real_valued]).eval()) 706 707 def testBucketizedColumnWithNormalizerSucceedsForDNN(self): 708 bucket = feature_column.bucketized_column( 709 feature_column.real_valued_column( 710 "price", normalizer=lambda x: x - 15), 711 boundaries=[0., 10., 100.]) 712 # buckets 2, 3, 0 713 features = {"price": constant_op.constant([[20.], [110], [-3]])} 714 output = feature_column_ops.input_from_feature_columns(features, [bucket]) 715 expected = [[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]] 716 with self.test_session(): 717 self.assertAllClose(output.eval(), expected) 718 self.assertAllClose(output.eval(), 719 fc_core.input_layer(features, [bucket]).eval()) 720 721 def testBucketizedColumnWithMultiDimensionsSucceedsForDNN(self): 722 bucket = feature_column.bucketized_column( 723 feature_column.real_valued_column("price", 2), 724 boundaries=[0., 10., 100.]) 725 # buckets [2, 3], [3, 2], [0, 0]. dimension = 2 726 features = { 727 "price": constant_op.constant([[20., 200], [110, 50], [-3, -3]]) 728 } 729 output = feature_column_ops.input_from_feature_columns(features, [bucket]) 730 expected = [[0, 0, 1, 0, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 1, 0], 731 [1, 0, 0, 0, 1, 0, 0, 0]] 732 with self.test_session(): 733 self.assertAllClose(output.eval(), expected) 734 self.assertAllClose(output.eval(), 735 fc_core.input_layer(features, [bucket]).eval()) 736 737 def testOneHotColumnFromWeightedSparseColumnSucceedsForDNN(self): 738 ids_column = feature_column.sparse_column_with_keys( 739 "ids", ["a", "b", "c", "unseen"]) 740 ids_tensor = sparse_tensor.SparseTensor( 741 values=["c", "b", "a", "c"], 742 indices=[[0, 0], [1, 0], [2, 0], [2, 1]], 743 dense_shape=[3, 2]) 744 weighted_ids_column = feature_column.weighted_sparse_column(ids_column, 745 "weights") 746 weights_tensor = sparse_tensor.SparseTensor( 747 values=[10.0, 20.0, 30.0, 40.0], 748 indices=[[0, 0], [1, 0], [2, 0], [2, 1]], 749 dense_shape=[3, 2]) 750 features = {"ids": ids_tensor, "weights": weights_tensor} 751 one_hot_column = feature_column.one_hot_column(weighted_ids_column) 752 output = feature_column_ops.input_from_feature_columns(features, 753 [one_hot_column]) 754 output_core = fc_core.input_layer(features, [one_hot_column]) 755 with self.test_session(): 756 variables_lib.global_variables_initializer().run() 757 lookup_ops.tables_initializer().run() 758 self.assertAllEqual([[0, 0, 10., 0], [0, 20., 0, 0], [30., 0, 40., 0]], 759 output.eval()) 760 # Verify cross compatibility: Core builder output should equal to contrib. 761 self.assertAllEqual(output.eval(), output_core.eval()) 762 763 def testOneHotColumnFromSparseColumnWithKeysSucceedsForDNN(self): 764 ids_column = feature_column.sparse_column_with_keys( 765 "ids", ["a", "b", "c", "unseen"]) 766 ids_tensor = sparse_tensor.SparseTensor( 767 values=["c", "b", "a"], 768 indices=[[0, 0], [1, 0], [2, 0]], 769 dense_shape=[3, 1]) 770 one_hot_sparse = feature_column.one_hot_column(ids_column) 771 features = {"ids": ids_tensor} 772 output = feature_column_ops.input_from_feature_columns(features, 773 [one_hot_sparse]) 774 output_core = fc_core.input_layer(features, [one_hot_sparse]) 775 776 with self.test_session(): 777 variables_lib.global_variables_initializer().run() 778 lookup_ops.tables_initializer().run() 779 self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]], 780 output.eval()) 781 # Verify cross compatibility: Core builder output should equal to contrib. 782 self.assertAllEqual(output.eval(), output_core.eval()) 783 784 def testOneHotColumnFromMultivalentSparseColumnWithKeysSucceedsForDNN(self): 785 ids_column = feature_column.sparse_column_with_keys( 786 "ids", ["a", "b", "c", "unseen"]) 787 ids_tensor = sparse_tensor.SparseTensor( 788 values=["c", "b", "a", "c"], 789 indices=[[0, 0], [1, 0], [2, 0], [2, 1]], 790 dense_shape=[3, 2]) 791 one_hot_sparse = feature_column.one_hot_column(ids_column) 792 features = {"ids": ids_tensor} 793 output = feature_column_ops.input_from_feature_columns(features, 794 [one_hot_sparse]) 795 output_core = fc_core.input_layer(features, [one_hot_sparse]) 796 797 with self.test_session(): 798 variables_lib.global_variables_initializer().run() 799 lookup_ops.tables_initializer().run() 800 self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]], 801 output.eval()) 802 # Verify cross compatibility: Core builder output should equal to contrib. 803 self.assertAllEqual(output.eval(), output_core.eval()) 804 805 def testOneHotColumnFromSparseColumnWithIntegerizedFeaturePassesForDNN(self): 806 ids_column = feature_column.sparse_column_with_integerized_feature( 807 "ids", bucket_size=4) 808 one_hot_sparse = feature_column.one_hot_column(ids_column) 809 features = { 810 "ids": 811 sparse_tensor.SparseTensor( 812 values=[2, 1, 0, 2], 813 indices=[[0, 0], [1, 0], [2, 0], [2, 1]], 814 dense_shape=[3, 2]) 815 } 816 output = feature_column_ops.input_from_feature_columns(features, 817 [one_hot_sparse]) 818 output_core = fc_core.input_layer(features, [one_hot_sparse]) 819 with self.test_session(): 820 variables_lib.global_variables_initializer().run() 821 self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]], 822 output.eval()) 823 # Verify cross compatibility: Core builder output should equal to contrib. 824 self.assertAllEqual(output.eval(), output_core.eval()) 825 826 def testOneHotColumnFromSparseColumnWithHashBucketSucceedsForDNN(self): 827 hashed_sparse = feature_column.sparse_column_with_hash_bucket("feat", 10) 828 wire_tensor = sparse_tensor.SparseTensor( 829 values=["a", "b", "c1", "c2"], 830 indices=[[0, 0], [1, 0], [2, 0], [2, 1]], 831 dense_shape=[3, 2]) 832 features = {"feat": wire_tensor} 833 one_hot_sparse = feature_column.one_hot_column(hashed_sparse) 834 output = feature_column_ops.input_from_feature_columns(features, 835 [one_hot_sparse]) 836 output_core = fc_core.input_layer(features, [one_hot_sparse]) 837 with self.test_session(): 838 variables_lib.global_variables_initializer().run() 839 lookup_ops.tables_initializer().run() 840 self.assertAllEqual([3, 10], output.eval().shape) 841 # Verify cross compatibility: Core builder output should equal to contrib. 842 self.assertAllEqual(output.eval(), output_core.eval()) 843 844 def testEmbeddingColumnSucceedsForDNN(self): 845 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 846 wire_tensor = sparse_tensor.SparseTensor( 847 values=["omar", "stringer", "marlo", "xx", "yy"], 848 indices=[[0, 0], [1, 0], [1, 1], [2, 0], [3, 0]], 849 dense_shape=[4, 2]) 850 features = {"wire": wire_tensor} 851 embeded_sparse = feature_column.embedding_column(hashed_sparse, 10) 852 output = feature_column_ops.input_from_feature_columns(features, 853 [embeded_sparse]) 854 output_core = fc_core.input_layer(features, [embeded_sparse]) 855 with self.test_session(): 856 variables_lib.global_variables_initializer().run() 857 self.assertAllEqual(output.eval().shape, [4, 10]) 858 # Verify cross compatibility: Core builder output should equal to contrib. 859 self.assertAllEqual(output.eval().shape, output_core.eval().shape) 860 861 def testScatteredEmbeddingColumnSucceedsForDNN(self): 862 wire_tensor = sparse_tensor.SparseTensor( 863 values=["omar", "stringer", "marlo", "omar"], 864 indices=[[0, 0], [1, 0], [1, 1], [2, 0]], 865 dense_shape=[3, 2]) 866 867 features = {"wire": wire_tensor} 868 # Big enough hash space so that hopefully there is no collision 869 embedded_sparse = feature_column.scattered_embedding_column( 870 "wire", 1000, 3, layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY) 871 output = feature_column_ops.input_from_feature_columns( 872 features, [embedded_sparse], weight_collections=["my_collection"]) 873 weights = ops.get_collection("my_collection") 874 grad = gradients_impl.gradients(output, weights) 875 # Calcuates the tensors calculated by FC core libs. Later, the values will 876 # be compared with the contrib version. 877 output_core = fc_core.input_layer( 878 features, [embedded_sparse], weight_collections=["my_collection_core"]) 879 weights_core = ops.get_collection("my_collection_core") 880 grad_core = gradients_impl.gradients(output_core, weights_core) 881 with self.test_session(): 882 variables_lib.global_variables_initializer().run() 883 gradient_values = [] 884 gradient_values_core = [] 885 # Collect the gradient from the different partitions (one in this test) 886 for p in range(len(grad)): 887 gradient_values.extend(grad[p].values.eval()) 888 gradient_values_core.extend(grad_core[p].values.eval()) 889 gradient_values.sort() 890 gradient_values_core.sort() 891 self.assertAllEqual(gradient_values, [0.5] * 6 + [2] * 3) 892 self.assertAllEqual(gradient_values, gradient_values_core) 893 894 def testEmbeddingColumnWithInitializerSucceedsForDNN(self): 895 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 896 wire_tensor = sparse_tensor.SparseTensor( 897 values=["omar", "stringer", "marlo"], 898 indices=[[0, 0], [1, 0], [1, 1]], 899 dense_shape=[2, 2]) 900 features = {"wire": wire_tensor} 901 init_value = 133.7 902 embeded_sparse = feature_column.embedding_column( 903 hashed_sparse, 904 10, 905 initializer=init_ops.constant_initializer(init_value)) 906 output = feature_column_ops.input_from_feature_columns(features, 907 [embeded_sparse]) 908 output_core = fc_core.input_layer(features, [embeded_sparse]) 909 910 with self.test_session(): 911 variables_lib.global_variables_initializer().run() 912 output_eval = output.eval() 913 self.assertAllEqual(output_eval.shape, [2, 10]) 914 self.assertAllClose(output_eval, np.tile(init_value, [2, 10])) 915 # Verify cross compatibility: Core builder output should equal to contrib. 916 self.assertAllEqual(output.eval(), output_core.eval()) 917 918 def testEmbeddingColumnWithMultipleInitializersFails(self): 919 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 920 wire_tensor = sparse_tensor.SparseTensor( 921 values=["omar", "stringer", "marlo"], 922 indices=[[0, 0], [1, 0], [1, 1]], 923 dense_shape=[2, 2]) 924 features = {"wire": wire_tensor} 925 embedded_sparse = feature_column.embedding_column( 926 hashed_sparse, 927 10, 928 initializer=init_ops.truncated_normal_initializer( 929 mean=42, stddev=1337)) 930 embedded_sparse_alternate = feature_column.embedding_column( 931 hashed_sparse, 932 10, 933 initializer=init_ops.truncated_normal_initializer( 934 mean=1337, stddev=42)) 935 936 # Makes sure that trying to use different initializers with the same 937 # embedding column explicitly fails. 938 with self.test_session(): 939 with self.assertRaisesRegexp( 940 ValueError, 941 "Duplicate feature column key found for column: wire_embedding"): 942 feature_column_ops.input_from_feature_columns( 943 features, [embedded_sparse, embedded_sparse_alternate]) 944 945 def testEmbeddingColumnWithWeightedSparseColumnSucceedsForDNN(self): 946 """Tests DNN input with embedded weighted sparse column.""" 947 ids = feature_column.sparse_column_with_keys("ids", 948 ["marlo", "omar", "stringer"]) 949 ids_tensor = sparse_tensor.SparseTensor( 950 values=["stringer", "stringer", "marlo"], 951 indices=[[0, 0], [1, 0], [1, 1]], 952 dense_shape=[2, 2]) 953 weighted_ids = feature_column.weighted_sparse_column(ids, "weights") 954 weights_tensor = sparse_tensor.SparseTensor( 955 values=[10.0, 20.0, 30.0], 956 indices=[[0, 0], [1, 0], [1, 1]], 957 dense_shape=[2, 2]) 958 features = {"ids": ids_tensor, "weights": weights_tensor} 959 embeded_sparse = feature_column.embedding_column(weighted_ids, 10) 960 output = feature_column_ops.input_from_feature_columns(features, 961 [embeded_sparse]) 962 output_core = fc_core.input_layer(features, [embeded_sparse]) 963 964 with self.test_session(): 965 variables_lib.global_variables_initializer().run() 966 lookup_ops.tables_initializer().run() 967 self.assertAllEqual(output.eval().shape, [2, 10]) 968 # Verify cross compatibility: Core builder output should equal to contrib. 969 self.assertAllEqual(output.eval().shape, output_core.eval().shape) 970 971 def testEmbeddingColumnWithIntegerWeightedSparseColumnSucceedsForDNN(self): 972 """Same as the previous test, but with integer weights.""" 973 ids = feature_column.sparse_column_with_keys("ids", 974 ["marlo", "omar", "stringer"]) 975 ids_tensor = sparse_tensor.SparseTensor( 976 values=["stringer", "stringer", "marlo"], 977 indices=[[0, 0], [1, 0], [1, 1]], 978 dense_shape=[2, 2]) 979 weighted_ids = feature_column.weighted_sparse_column( 980 ids, "weights", dtype=dtypes.int32) 981 weights_tensor = sparse_tensor.SparseTensor( 982 values=constant_op.constant([10, 20, 30], dtype=dtypes.int32), 983 indices=[[0, 0], [1, 0], [1, 1]], 984 dense_shape=[2, 2]) 985 features = {"ids": ids_tensor, "weights": weights_tensor} 986 embeded_sparse = feature_column.embedding_column(weighted_ids, 10) 987 output = feature_column_ops.input_from_feature_columns(features, 988 [embeded_sparse]) 989 with self.test_session(): 990 variables_lib.global_variables_initializer().run() 991 lookup_ops.tables_initializer().run() 992 self.assertAllEqual(output.eval().shape, [2, 10]) 993 994 def testEmbeddingColumnWithCrossedColumnSucceedsForDNN(self): 995 a = feature_column.sparse_column_with_hash_bucket( 996 "aaa", hash_bucket_size=100) 997 b = feature_column.sparse_column_with_hash_bucket( 998 "bbb", hash_bucket_size=100) 999 crossed = feature_column.crossed_column(set([a, b]), hash_bucket_size=10000) 1000 wire_tensor = sparse_tensor.SparseTensor( 1001 values=["omar", "stringer", "marlo"], 1002 indices=[[0, 0], [1, 0], [1, 1]], 1003 dense_shape=[2, 2]) 1004 features = {"aaa": wire_tensor, "bbb": wire_tensor} 1005 embeded_sparse = feature_column.embedding_column(crossed, 10) 1006 output = feature_column_ops.input_from_feature_columns(features, 1007 [embeded_sparse]) 1008 with self.test_session(): 1009 variables_lib.global_variables_initializer().run() 1010 self.assertAllEqual(output.eval().shape, [2, 10]) 1011 1012 def testSparseColumnFailsForDNN(self): 1013 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1014 wire_tensor = sparse_tensor.SparseTensor( 1015 values=["omar", "stringer", "marlo"], 1016 indices=[[0, 0], [1, 0], [1, 1]], 1017 dense_shape=[2, 2]) 1018 features = {"wire": wire_tensor} 1019 with self.test_session(): 1020 with self.assertRaisesRegexp( 1021 ValueError, "Error creating input layer for column: wire"): 1022 variables_lib.global_variables_initializer().run() 1023 feature_column_ops.input_from_feature_columns(features, [hashed_sparse]) 1024 1025 def testWeightedSparseColumnFailsForDNN(self): 1026 ids = feature_column.sparse_column_with_keys("ids", 1027 ["marlo", "omar", "stringer"]) 1028 ids_tensor = sparse_tensor.SparseTensor( 1029 values=["stringer", "stringer", "marlo"], 1030 indices=[[0, 0], [1, 0], [1, 1]], 1031 dense_shape=[2, 2]) 1032 weighted_ids = feature_column.weighted_sparse_column(ids, "weights") 1033 weights_tensor = sparse_tensor.SparseTensor( 1034 values=[10.0, 20.0, 30.0], 1035 indices=[[0, 0], [1, 0], [1, 1]], 1036 dense_shape=[2, 2]) 1037 features = {"ids": ids_tensor, "weights": weights_tensor} 1038 with self.test_session(): 1039 with self.assertRaisesRegexp( 1040 ValueError, 1041 "Error creating input layer for column: ids_weighted_by_weights"): 1042 lookup_ops.tables_initializer().run() 1043 feature_column_ops.input_from_feature_columns(features, [weighted_ids]) 1044 1045 def testCrossedColumnFailsForDNN(self): 1046 a = feature_column.sparse_column_with_hash_bucket( 1047 "aaa", hash_bucket_size=100) 1048 b = feature_column.sparse_column_with_hash_bucket( 1049 "bbb", hash_bucket_size=100) 1050 crossed = feature_column.crossed_column(set([a, b]), hash_bucket_size=10000) 1051 wire_tensor = sparse_tensor.SparseTensor( 1052 values=["omar", "stringer", "marlo"], 1053 indices=[[0, 0], [1, 0], [1, 1]], 1054 dense_shape=[2, 2]) 1055 features = {"aaa": wire_tensor, "bbb": wire_tensor} 1056 with self.test_session(): 1057 with self.assertRaisesRegexp( 1058 ValueError, "Error creating input layer for column: aaa_X_bbb"): 1059 variables_lib.global_variables_initializer().run() 1060 feature_column_ops.input_from_feature_columns(features, [crossed]) 1061 1062 def testDeepColumnsSucceedForDNN(self): 1063 real_valued = feature_column.real_valued_column("income", 3) 1064 bucket = feature_column.bucketized_column( 1065 feature_column.real_valued_column("price", 2), 1066 boundaries=[0., 10., 100.]) 1067 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1068 features = { 1069 "income": 1070 constant_op.constant([[20., 10, -5], [110, 0, -7], [-3, 30, 50]]), 1071 "price": 1072 constant_op.constant([[20., 200], [110, 2], [-20, -30]]), 1073 "wire": 1074 sparse_tensor.SparseTensor( 1075 values=["omar", "stringer", "marlo"], 1076 indices=[[0, 0], [1, 0], [2, 0]], 1077 dense_shape=[3, 1]) 1078 } 1079 embeded_sparse = feature_column.embedding_column( 1080 hashed_sparse, 10, initializer=init_ops.constant_initializer(133.7)) 1081 output = feature_column_ops.input_from_feature_columns( 1082 features, [real_valued, bucket, embeded_sparse]) 1083 with self.test_session(): 1084 variables_lib.global_variables_initializer().run() 1085 # size of output = 3 (real_valued) + 2 * 4 (bucket) + 10 (embedding) = 21 1086 self.assertAllEqual(output.eval().shape, [3, 21]) 1087 1088 def testEmbeddingColumnForDNN(self): 1089 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1090 wire_tensor = sparse_tensor.SparseTensor( 1091 values=["omar", "stringer", "marlo"], 1092 indices=[[0, 0], [1, 0], [1, 1]], 1093 dense_shape=[3, 2]) 1094 features = {"wire": wire_tensor} 1095 embeded_sparse = feature_column.embedding_column( 1096 hashed_sparse, 1097 1, 1098 combiner="sum", 1099 initializer=init_ops.ones_initializer()) 1100 output = feature_column_ops.input_from_feature_columns(features, 1101 [embeded_sparse]) 1102 with self.test_session(): 1103 variables_lib.global_variables_initializer().run() 1104 # score: (number of values) 1105 self.assertAllEqual(output.eval(), [[1.], [2.], [0.]]) 1106 1107 def testEmbeddingColumnWithMaxNormForDNN(self): 1108 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1109 wire_tensor = sparse_tensor.SparseTensor( 1110 values=["omar", "stringer", "marlo"], 1111 indices=[[0, 0], [1, 0], [1, 1]], 1112 dense_shape=[3, 2]) 1113 features = {"wire": wire_tensor} 1114 embedded_sparse = feature_column.embedding_column( 1115 hashed_sparse, 1116 1, 1117 combiner="sum", 1118 initializer=init_ops.ones_initializer(), 1119 max_norm=0.5) 1120 output = feature_column_ops.input_from_feature_columns(features, 1121 [embedded_sparse]) 1122 with self.test_session(): 1123 variables_lib.global_variables_initializer().run() 1124 # score: (number of values * 0.5) 1125 self.assertAllClose(output.eval(), [[0.5], [1.], [0.]]) 1126 1127 def testEmbeddingColumnWithWeightedSparseColumnForDNN(self): 1128 ids = feature_column.sparse_column_with_keys("ids", 1129 ["marlo", "omar", "stringer"]) 1130 ids_tensor = sparse_tensor.SparseTensor( 1131 values=["stringer", "stringer", "marlo"], 1132 indices=[[0, 0], [1, 0], [1, 1]], 1133 dense_shape=[3, 2]) 1134 weighted_ids = feature_column.weighted_sparse_column(ids, "weights") 1135 weights_tensor = sparse_tensor.SparseTensor( 1136 values=[10.0, 20.0, 30.0], 1137 indices=[[0, 0], [1, 0], [1, 1]], 1138 dense_shape=[3, 2]) 1139 features = {"ids": ids_tensor, "weights": weights_tensor} 1140 embeded_sparse = feature_column.embedding_column( 1141 weighted_ids, 1142 1, 1143 combiner="sum", 1144 initializer=init_ops.ones_initializer()) 1145 output = feature_column_ops.input_from_feature_columns(features, 1146 [embeded_sparse]) 1147 with self.test_session(): 1148 variables_lib.global_variables_initializer().run() 1149 lookup_ops.tables_initializer().run() 1150 # score: (sum of weights) 1151 self.assertAllEqual(output.eval(), [[10.], [50.], [0.]]) 1152 1153 def testInputLayerWithCollectionsForDNN(self): 1154 real_valued = feature_column.real_valued_column("price") 1155 bucket = feature_column.bucketized_column( 1156 real_valued, boundaries=[0., 10., 100.]) 1157 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1158 features = { 1159 "price": 1160 constant_op.constant([[20.], [110], [-3]]), 1161 "wire": 1162 sparse_tensor.SparseTensor( 1163 values=["omar", "stringer", "marlo"], 1164 indices=[[0, 0], [1, 0], [2, 0]], 1165 dense_shape=[3, 1]) 1166 } 1167 embeded_sparse = feature_column.embedding_column(hashed_sparse, 10) 1168 feature_column_ops.input_from_feature_columns( 1169 features, [real_valued, bucket, embeded_sparse], 1170 weight_collections=["my_collection"]) 1171 weights = ops.get_collection("my_collection") 1172 # one variable for embeded sparse 1173 self.assertEqual(1, len(weights)) 1174 1175 def testInputLayerWithTrainableArgForDNN(self): 1176 real_valued = feature_column.real_valued_column("price") 1177 bucket = feature_column.bucketized_column( 1178 real_valued, boundaries=[0., 10., 100.]) 1179 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1180 features = { 1181 "price": 1182 constant_op.constant([[20.], [110], [-3]]), 1183 "wire": 1184 sparse_tensor.SparseTensor( 1185 values=["omar", "stringer", "marlo"], 1186 indices=[[0, 0], [1, 0], [2, 0]], 1187 dense_shape=[3, 1]) 1188 } 1189 embeded_sparse = feature_column.embedding_column(hashed_sparse, 10) 1190 feature_column_ops.input_from_feature_columns( 1191 features, [real_valued, bucket, embeded_sparse], 1192 weight_collections=["my_collection"], 1193 trainable=False) 1194 # There should not be any trainable variables 1195 self.assertEqual(0, len(variables_lib.trainable_variables())) 1196 1197 feature_column_ops.input_from_feature_columns( 1198 features, [real_valued, bucket, embeded_sparse], 1199 weight_collections=["my_collection"], 1200 trainable=True) 1201 # There should one trainable variable for embeded sparse 1202 self.assertEqual(1, len(variables_lib.trainable_variables())) 1203 1204 def testInputLayerWithNonTrainableEmbeddingForDNN(self): 1205 sparse_1 = feature_column.sparse_column_with_hash_bucket("wire_1", 10) 1206 sparse_2 = feature_column.sparse_column_with_hash_bucket("wire_2", 10) 1207 features = { 1208 "wire_1": 1209 sparse_tensor.SparseTensor( 1210 values=["omar", "stringer", "marlo"], 1211 indices=[[0, 0], [1, 0], [2, 0]], 1212 dense_shape=[3, 1]), 1213 "wire_2": 1214 sparse_tensor.SparseTensor( 1215 values=["jack", "jill"], 1216 indices=[[0, 0], [1, 0]], 1217 dense_shape=[4, 1]) 1218 } 1219 dims_1 = 10 1220 init_1 = 3.14 1221 embeded_1 = feature_column.embedding_column( 1222 sparse_1, dims_1, initializer=init_ops.constant_initializer(init_1), 1223 trainable=False) 1224 output_1 = feature_column_ops.input_from_feature_columns( 1225 features, [embeded_1]) 1226 # There should be no trainable variables for sparse_1 1227 self.assertEqual(0, len(variables_lib.trainable_variables())) 1228 1229 dims_2 = 7 1230 init_2 = 6.14 1231 embeded_2 = feature_column.embedding_column( 1232 sparse_2, dims_2, initializer=init_ops.constant_initializer(init_2), 1233 trainable=True) 1234 output_2 = feature_column_ops.input_from_feature_columns( 1235 features, [embeded_2]) 1236 # There should be one trainable variables for sparse_2 1237 self.assertEqual(1, len(variables_lib.trainable_variables())) 1238 1239 with self.test_session(): 1240 variables_lib.global_variables_initializer().run() 1241 output_1_eval = output_1.eval() 1242 output_2_eval = output_2.eval() 1243 self.assertAllEqual(output_1_eval.shape, [3, dims_1]) 1244 self.assertAllClose(output_1_eval, np.tile(init_1, [3, dims_1])) 1245 self.assertAllEqual(output_2_eval.shape, [4, dims_2]) 1246 self.assertAllClose(output_2_eval, np.concatenate( 1247 (np.tile(init_2, [2, dims_2]), np.tile(0, [2, dims_2])))) 1248 1249 1250 class SequenceInputFromFeatureColumnTest(test.TestCase): 1251 1252 def testSupportedColumns(self): 1253 measurement = feature_column.real_valued_column("measurements") 1254 country = feature_column.sparse_column_with_hash_bucket("country", 100) 1255 pets = feature_column.sparse_column_with_hash_bucket("pets", 100) 1256 ids = feature_column.sparse_column_with_integerized_feature("id", 100) 1257 1258 country_x_pets = feature_column.crossed_column([country, pets], 100) 1259 country_x_pets_onehot = feature_column.one_hot_column(country_x_pets) 1260 bucketized_measurement = feature_column.bucketized_column(measurement, 1261 [.25, .5, .75]) 1262 embedded_id = feature_column.embedding_column(ids, 100) 1263 1264 # `_BucketizedColumn` is not supported. 1265 self.assertRaisesRegexp( 1266 ValueError, 1267 "FeatureColumn type _BucketizedColumn is not currently supported", 1268 feature_column_ops.sequence_input_from_feature_columns, {}, 1269 [measurement, bucketized_measurement]) 1270 1271 # `_CrossedColumn` is not supported. 1272 self.assertRaisesRegexp( 1273 ValueError, 1274 "FeatureColumn type _CrossedColumn is not currently supported", 1275 feature_column_ops.sequence_input_from_feature_columns, {}, 1276 [embedded_id, country_x_pets]) 1277 1278 # `country_x_pets_onehot` depends on a `_CrossedColumn` which is forbidden. 1279 self.assertRaisesRegexp( 1280 ValueError, "Column country_X_pets .* _CrossedColumn", 1281 feature_column_ops.sequence_input_from_feature_columns, {}, 1282 [embedded_id, country_x_pets_onehot]) 1283 1284 def testRealValuedColumn(self): 1285 batch_size = 4 1286 sequence_length = 8 1287 dimension = 3 1288 1289 np.random.seed(1111) 1290 measurement_input = np.random.rand(batch_size, sequence_length, dimension) 1291 measurement_column = feature_column.real_valued_column("measurements") 1292 columns_to_tensors = { 1293 "measurements": constant_op.constant(measurement_input) 1294 } 1295 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1296 columns_to_tensors, [measurement_column]) 1297 1298 with self.test_session() as sess: 1299 model_inputs = sess.run(model_input_tensor) 1300 self.assertAllClose(measurement_input, model_inputs) 1301 1302 def testRealValuedVarLenColumn(self): 1303 var_len_real_valued = feature_column._real_valued_var_len_column( 1304 "rating", default_value=-1) 1305 rating = np.array([[0., 1., 2., -1.], 1306 [3., 4., 5., 6.]]) 1307 features = {"rating": constant_op.constant(rating)} 1308 with self.test_session() as sess: 1309 output = sess.run( 1310 feature_column_ops.sequence_input_from_feature_columns( 1311 features, [var_len_real_valued])) 1312 reshaped_rating = np.reshape(rating, [2, 4, 1]) 1313 self.assertAllClose(reshaped_rating, output) 1314 1315 def testRealValuedColumnWithExtraDimensions(self): 1316 batch_size = 4 1317 sequence_length = 8 1318 dimensions = [3, 4, 5] 1319 1320 np.random.seed(2222) 1321 measurement_input = np.random.rand(batch_size, sequence_length, *dimensions) 1322 measurement_column = feature_column.real_valued_column("measurements") 1323 columns_to_tensors = { 1324 "measurements": constant_op.constant(measurement_input) 1325 } 1326 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1327 columns_to_tensors, [measurement_column]) 1328 1329 expected_shape = [batch_size, sequence_length, np.prod(dimensions)] 1330 reshaped_measurements = np.reshape(measurement_input, expected_shape) 1331 1332 with self.test_session() as sess: 1333 model_inputs = sess.run(model_input_tensor) 1334 1335 self.assertAllClose(reshaped_measurements, model_inputs) 1336 1337 def testRealValuedColumnWithNormalizer(self): 1338 batch_size = 4 1339 sequence_length = 8 1340 dimension = 3 1341 normalizer = lambda x: x - 2 1342 1343 np.random.seed(3333) 1344 measurement_input = np.random.rand(batch_size, sequence_length, dimension) 1345 measurement_column = feature_column.real_valued_column( 1346 "measurements", normalizer=normalizer) 1347 columns_to_tensors = { 1348 "measurements": constant_op.constant(measurement_input) 1349 } 1350 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1351 columns_to_tensors, [measurement_column]) 1352 1353 with self.test_session() as sess: 1354 model_inputs = sess.run(model_input_tensor) 1355 self.assertAllClose(normalizer(measurement_input), model_inputs) 1356 1357 def testRealValuedColumnWithMultiDimensionsAndNormalizer(self): 1358 batch_size = 4 1359 sequence_length = 8 1360 dimensions = [3, 4, 5] 1361 normalizer = lambda x: x / 2.0 1362 1363 np.random.seed(1234) 1364 measurement_input = np.random.rand(batch_size, sequence_length, *dimensions) 1365 measurement_column = feature_column.real_valued_column( 1366 "measurements", normalizer=normalizer) 1367 columns_to_tensors = { 1368 "measurements": constant_op.constant(measurement_input) 1369 } 1370 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1371 columns_to_tensors, [measurement_column]) 1372 1373 expected_shape = [batch_size, sequence_length, np.prod(dimensions)] 1374 reshaped_measurements = np.reshape(measurement_input, expected_shape) 1375 1376 with self.test_session() as sess: 1377 model_inputs = sess.run(model_input_tensor) 1378 1379 self.assertAllClose(normalizer(reshaped_measurements), model_inputs) 1380 1381 def testOneHotColumnFromSparseColumnWithKeys(self): 1382 ids_tensor = sparse_tensor.SparseTensor( 1383 values=["c", "b", 1384 "a", "c", "b", 1385 "b"], 1386 indices=[[0, 0, 0], [0, 1, 0], 1387 [1, 0, 0], [1, 0, 1], [1, 1, 0], 1388 [3, 2, 0]], 1389 dense_shape=[4, 3, 2]) 1390 1391 ids_column = feature_column.sparse_column_with_keys( 1392 "ids", ["a", "b", "c", "unseen"]) 1393 one_hot_column = feature_column.one_hot_column(ids_column) 1394 columns_to_tensors = {"ids": ids_tensor} 1395 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1396 columns_to_tensors, [one_hot_column]) 1397 1398 with self.test_session() as sess: 1399 variables_lib.global_variables_initializer().run() 1400 lookup_ops.tables_initializer().run() 1401 model_input = sess.run(model_input_tensor) 1402 1403 expected_input_shape = np.array([4, 3, 4]) 1404 expected_model_input = np.array( 1405 [[[0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 0]], 1406 [[1, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 0]], 1407 [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], 1408 [[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0]]], 1409 dtype=np.float32) 1410 1411 self.assertAllEqual(expected_input_shape, model_input.shape) 1412 self.assertAllClose(expected_model_input, model_input) 1413 1414 def testOneHotColumnFromSparseColumnWithHashBucket(self): 1415 hash_buckets = 10 1416 ids_tensor = sparse_tensor.SparseTensor( 1417 values=["c", "b", 1418 "a", "c", "b", 1419 "b"], 1420 indices=[[0, 0, 0], [0, 1, 0], 1421 [1, 0, 0], [1, 0, 1], [1, 1, 0], 1422 [3, 2, 0]], 1423 dense_shape=[4, 3, 2]) 1424 1425 hashed_ids_column = feature_column.sparse_column_with_hash_bucket( 1426 "ids", hash_buckets) 1427 one_hot_column = feature_column.one_hot_column(hashed_ids_column) 1428 columns_to_tensors = {"ids": ids_tensor} 1429 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1430 columns_to_tensors, [one_hot_column]) 1431 1432 with self.test_session() as sess: 1433 variables_lib.global_variables_initializer().run() 1434 lookup_ops.tables_initializer().run() 1435 model_input = sess.run(model_input_tensor) 1436 1437 expected_input_shape = np.array([4, 3, hash_buckets]) 1438 self.assertAllEqual(expected_input_shape, model_input.shape) 1439 1440 def testEmbeddingColumn(self): 1441 hash_buckets = 10 1442 embedding_dimension = 5 1443 ids_tensor = sparse_tensor.SparseTensor( 1444 values=["c", "b", 1445 "a", "c", "b", 1446 "b"], 1447 indices=[[0, 0, 0], [0, 1, 0], 1448 [1, 0, 0], [1, 0, 1], [1, 1, 0], 1449 [3, 2, 0]], 1450 dense_shape=[4, 3, 2]) 1451 1452 expected_input_shape = np.array([4, 3, embedding_dimension]) 1453 1454 hashed_ids_column = feature_column.sparse_column_with_hash_bucket( 1455 "ids", hash_buckets) 1456 embedded_column = feature_column.embedding_column(hashed_ids_column, 1457 embedding_dimension) 1458 columns_to_tensors = {"ids": ids_tensor} 1459 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1460 columns_to_tensors, [embedded_column]) 1461 1462 with self.test_session() as sess: 1463 variables_lib.global_variables_initializer().run() 1464 lookup_ops.tables_initializer().run() 1465 model_input = sess.run(model_input_tensor) 1466 1467 self.assertAllEqual(expected_input_shape, model_input.shape) 1468 1469 def testEmbeddingColumnWithAutoReshape(self): 1470 hash_buckets = 10 1471 embedding_dimension = 5 1472 ids_tensor = sparse_tensor.SparseTensor( 1473 values=["c", "b", 1474 "a", "c", "b", 1475 "b"], 1476 indices=[[0, 0], [0, 1], 1477 [1, 0], [1, 1], [1, 2], 1478 [3, 2]], 1479 dense_shape=[4, 3]) 1480 1481 expected_input_shape = np.array([4, 3, embedding_dimension]) 1482 1483 hashed_ids_column = feature_column.sparse_column_with_hash_bucket( 1484 "ids", hash_buckets) 1485 embedded_column = feature_column.embedding_column(hashed_ids_column, 1486 embedding_dimension) 1487 columns_to_tensors = {"ids": ids_tensor} 1488 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1489 columns_to_tensors, [embedded_column]) 1490 1491 with self.test_session() as sess: 1492 variables_lib.global_variables_initializer().run() 1493 lookup_ops.tables_initializer().run() 1494 model_input = sess.run(model_input_tensor) 1495 1496 self.assertAllEqual(expected_input_shape, model_input.shape) 1497 1498 def testEmbeddingColumnGradient(self): 1499 hash_buckets = 1000 1500 embedding_dimension = 3 1501 ids_tensor = sparse_tensor.SparseTensor( 1502 values=["c", "b", 1503 "a", "c", "b", 1504 "b"], 1505 indices=[[0, 0, 0], [0, 1, 0], 1506 [1, 0, 0], [1, 0, 1], [1, 1, 0], 1507 [3, 2, 0]], 1508 dense_shape=[4, 3, 2]) 1509 1510 hashed_ids_column = feature_column.sparse_column_with_hash_bucket( 1511 "ids", hash_buckets) 1512 embedded_column = feature_column.embedding_column( 1513 hashed_ids_column, embedding_dimension, combiner="sum") 1514 columns_to_tensors = {"ids": ids_tensor} 1515 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1516 columns_to_tensors, [embedded_column], 1517 weight_collections=["my_collection"]) 1518 embedding_weights = ops.get_collection("my_collection") 1519 gradient_tensor = gradients_impl.gradients(model_input_tensor, 1520 embedding_weights) 1521 with self.test_session() as sess: 1522 variables_lib.global_variables_initializer().run() 1523 lookup_ops.tables_initializer().run() 1524 model_input, gradients = sess.run([model_input_tensor, gradient_tensor]) 1525 1526 expected_input_shape = [4, 3, embedding_dimension] 1527 self.assertAllEqual(expected_input_shape, model_input.shape) 1528 1529 # `ids_tensor` consists of 7 instances of <empty>, 3 occurrences of "b", 1530 # 2 occurrences of "c" and 1 instance of "a". 1531 expected_gradient_values = sorted([0., 3., 2., 1.] * embedding_dimension) 1532 actual_gradient_values = np.sort(gradients[0].values, axis=None) 1533 self.assertAllClose(expected_gradient_values, actual_gradient_values) 1534 1535 def testMultipleColumns(self): 1536 batch_size = 4 1537 sequence_length = 3 1538 measurement_dimension = 5 1539 country_hash_size = 10 1540 max_id = 200 1541 id_embedding_dimension = 11 1542 normalizer = lambda x: x / 10.0 1543 1544 measurement_tensor = random_ops.random_uniform( 1545 [batch_size, sequence_length, measurement_dimension]) 1546 country_tensor = sparse_tensor.SparseTensor( 1547 values=["us", "ca", 1548 "ru", "fr", "ca", 1549 "mx"], 1550 indices=[[0, 0, 0], [0, 1, 0], 1551 [1, 0, 0], [1, 0, 1], [1, 1, 0], 1552 [3, 2, 0]], 1553 dense_shape=[4, 3, 2]) 1554 id_tensor = sparse_tensor.SparseTensor( 1555 values=[2, 5, 1556 26, 123, 1, 1557 0], 1558 indices=[[0, 0, 0], [0, 0, 1], 1559 [0, 1, 1], [1, 0, 0], [1, 1, 0], 1560 [3, 2, 0]], 1561 dense_shape=[4, 3, 2]) 1562 1563 columns_to_tensors = { 1564 "measurements": measurement_tensor, 1565 "country": country_tensor, 1566 "id": id_tensor 1567 } 1568 1569 measurement_column = feature_column.real_valued_column( 1570 "measurements", normalizer=normalizer) 1571 country_column = feature_column.sparse_column_with_hash_bucket( 1572 "country", country_hash_size) 1573 id_column = feature_column.sparse_column_with_integerized_feature("id", 1574 max_id) 1575 1576 onehot_country_column = feature_column.one_hot_column(country_column) 1577 embedded_id_column = feature_column.embedding_column(id_column, 1578 id_embedding_dimension) 1579 1580 model_input_columns = [ 1581 measurement_column, onehot_country_column, embedded_id_column 1582 ] 1583 1584 model_input_tensor = feature_column_ops.sequence_input_from_feature_columns( 1585 columns_to_tensors, model_input_columns) 1586 self.assertEqual(dtypes.float32, model_input_tensor.dtype) 1587 1588 with self.test_session() as sess: 1589 variables_lib.global_variables_initializer().run() 1590 lookup_ops.tables_initializer().run() 1591 model_input = sess.run(model_input_tensor) 1592 1593 expected_input_shape = [ 1594 batch_size, sequence_length, 1595 measurement_dimension + country_hash_size + id_embedding_dimension 1596 ] 1597 self.assertAllEqual(expected_input_shape, model_input.shape) 1598 1599 1600 class WeightedSumTest(test.TestCase): 1601 1602 def testFeatureColumnDictFails(self): 1603 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1604 wire_tensor = sparse_tensor.SparseTensor( 1605 values=["omar", "stringer", "marlo"], 1606 indices=[[0, 0], [1, 0], [1, 1]], 1607 dense_shape=[2, 2]) 1608 features = {"wire": wire_tensor} 1609 with self.assertRaisesRegexp( 1610 ValueError, 1611 "Expected feature_columns to be iterable, found dict"): 1612 feature_column_ops.weighted_sum_from_feature_columns( 1613 features, {"feature": hashed_sparse}, num_outputs=5) 1614 1615 def testSparseColumn(self): 1616 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1617 wire_tensor = sparse_tensor.SparseTensor( 1618 values=["omar", "stringer", "marlo"], 1619 indices=[[0, 0], [1, 0], [1, 1]], 1620 dense_shape=[2, 2]) 1621 features = {"wire": wire_tensor} 1622 logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1623 features, [hashed_sparse], num_outputs=5) 1624 logits_core = fc_core.linear_model(features, [hashed_sparse], units=5) 1625 with self.test_session(): 1626 variables_lib.global_variables_initializer().run() 1627 self.assertAllEqual(logits.eval().shape, [2, 5]) 1628 # Verify cross compatibility: Core builder output should equal to contrib. 1629 self.assertAllEqual(logits.eval(), logits_core.eval()) 1630 1631 def testSparseIntColumn(self): 1632 """Tests a sparse column with int values.""" 1633 hashed_sparse = feature_column.sparse_column_with_hash_bucket( 1634 "wire", 10, dtype=dtypes.int64) 1635 wire_tensor = sparse_tensor.SparseTensor( 1636 values=[101, 201, 301], 1637 indices=[[0, 0], [1, 0], [1, 1]], 1638 dense_shape=[2, 2]) 1639 features = {"wire": wire_tensor} 1640 logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1641 features, [hashed_sparse], num_outputs=5) 1642 logits_core = fc_core.linear_model(features, [hashed_sparse], units=5) 1643 with self.test_session(): 1644 variables_lib.global_variables_initializer().run() 1645 self.assertAllEqual(logits.eval().shape, [2, 5]) 1646 # Verify cross compatibility: Core builder output should equal to contrib. 1647 self.assertAllEqual(logits.eval(), logits_core.eval()) 1648 1649 def testSparseColumnWithDenseInputTensor(self): 1650 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1651 wire_tensor = constant_op.constant( 1652 [["omar", "stringer"], ["marlo", "rick"]]) 1653 features = {"wire": wire_tensor} 1654 logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1655 features, [hashed_sparse], num_outputs=5) 1656 logits_core = fc_core.linear_model(features, [hashed_sparse], units=5) 1657 with self.test_session(): 1658 variables_lib.global_variables_initializer().run() 1659 self.assertAllEqual(logits.eval().shape, [2, 5]) 1660 # Verify cross compatibility: Core builder output should equal to contrib. 1661 self.assertAllEqual(logits.eval(), logits_core.eval()) 1662 1663 def testWeightedSparseColumn(self): 1664 ids = feature_column.sparse_column_with_keys("ids", 1665 ["marlo", "omar", "stringer"]) 1666 ids_tensor = sparse_tensor.SparseTensor( 1667 values=["stringer", "stringer", "marlo"], 1668 indices=[[0, 0], [1, 0], [1, 1]], 1669 dense_shape=[2, 2]) 1670 weighted_ids = feature_column.weighted_sparse_column(ids, "weights") 1671 weights_tensor = sparse_tensor.SparseTensor( 1672 values=[10.0, 20.0, 30.0], 1673 indices=[[0, 0], [1, 0], [1, 1]], 1674 dense_shape=[2, 2]) 1675 features = {"ids": ids_tensor, "weights": weights_tensor} 1676 logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1677 features, [weighted_ids], num_outputs=5) 1678 logits_core = fc_core.linear_model(features, [weighted_ids], units=5) 1679 with self.test_session(): 1680 variables_lib.global_variables_initializer().run() 1681 lookup_ops.tables_initializer().run() 1682 self.assertAllEqual(logits.eval().shape, [2, 5]) 1683 # Verify cross compatibility: Core builder output should equal to contrib. 1684 self.assertAllEqual(logits.eval(), logits_core.eval()) 1685 1686 def testWeightedSparseColumnWithDenseInputTensor(self): 1687 ids = feature_column.sparse_column_with_keys( 1688 "ids", ["marlo", "omar", "stringer", "rick"]) 1689 ids_tensor = constant_op.constant([["omar", "stringer"], ["marlo", "rick"]]) 1690 weighted_ids = feature_column.weighted_sparse_column(ids, "weights") 1691 weights_tensor = constant_op.constant([[10.0, 20.0], [30.0, 40.0]]) 1692 1693 features = {"ids": ids_tensor, "weights": weights_tensor} 1694 logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1695 features, [weighted_ids], num_outputs=5) 1696 logits_core = fc_core.linear_model(features, [weighted_ids], units=5) 1697 1698 with self.test_session(): 1699 variables_lib.global_variables_initializer().run() 1700 lookup_ops.tables_initializer().run() 1701 self.assertAllEqual(logits.eval().shape, [2, 5]) 1702 # Verify cross compatibility: Core builder output should equal to contrib. 1703 self.assertAllEqual(logits.eval(), logits_core.eval()) 1704 1705 def testCrossedColumn(self): 1706 a = feature_column.sparse_column_with_hash_bucket( 1707 "aaa", hash_bucket_size=100) 1708 b = feature_column.sparse_column_with_hash_bucket( 1709 "bbb", hash_bucket_size=100) 1710 crossed = feature_column.crossed_column(set([a, b]), hash_bucket_size=10000) 1711 wire_tensor = sparse_tensor.SparseTensor( 1712 values=["omar", "stringer", "marlo"], 1713 indices=[[0, 0], [1, 0], [1, 1]], 1714 dense_shape=[2, 2]) 1715 features = {"aaa": wire_tensor, "bbb": wire_tensor} 1716 logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1717 features, [crossed], num_outputs=5) 1718 logits_core = fc_core.linear_model(features, [crossed], units=5) 1719 with self.test_session(): 1720 variables_lib.global_variables_initializer().run() 1721 self.assertAllEqual(logits.eval().shape, [2, 5]) 1722 # Verify cross compatibility: Core builder output should equal to contrib. 1723 self.assertAllEqual(logits.eval(), logits_core.eval()) 1724 1725 def testEmbeddingColumn(self): 1726 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1727 wire_tensor = sparse_tensor.SparseTensor( 1728 values=["omar", "stringer", "marlo"], 1729 indices=[[0, 0], [1, 0], [1, 1]], 1730 dense_shape=[2, 2]) 1731 features = {"wire": wire_tensor} 1732 embeded_sparse = feature_column.embedding_column(hashed_sparse, 10) 1733 with self.test_session(): 1734 with self.assertRaisesRegexp( 1735 ValueError, "Error creating weighted sum for column: wire_embedding"): 1736 variables_lib.global_variables_initializer().run() 1737 feature_column_ops.weighted_sum_from_feature_columns( 1738 features, [embeded_sparse], num_outputs=5) 1739 1740 def testSparseFeatureColumnWithVocabularyFile(self): 1741 vocabulary_file = os.path.join(self.get_temp_dir(), "movies.txt") 1742 with open(vocabulary_file, "w") as f: 1743 f.write("\n".join(["head-on", "matrix", "winter sleep"]) + "\n") 1744 movies = feature_column.sparse_column_with_vocabulary_file( 1745 column_name="movies", vocabulary_file=vocabulary_file, vocab_size=3) 1746 with ops.Graph().as_default(): 1747 features = { 1748 "movies": 1749 sparse_tensor.SparseTensor( 1750 values=["matrix", "head-on", "winter sleep"], 1751 indices=[[0, 0], [0, 1], [1, 0]], 1752 dense_shape=[2, 2]) 1753 } 1754 output, column_to_variable, _ = ( 1755 feature_column_ops.weighted_sum_from_feature_columns( 1756 features, [movies], num_outputs=1)) 1757 logits_core = fc_core.linear_model(features, [movies]) 1758 1759 with self.test_session() as sess: 1760 variables_lib.initialize_all_variables().run() 1761 lookup_ops.tables_initializer().run() 1762 1763 weights = column_to_variable[movies][0] 1764 self.assertEqual(weights.get_shape(), (3, 1)) 1765 sess.run(weights.assign([[0.1], [0.3], [0.5]])) 1766 # score for first example = 0.3 (matrix) + 0.1 (head-on) = 0.4 1767 # score for second example = 0.5 (winter sleep) 1768 self.assertAllClose(output.eval(), [[0.4], [0.5]]) 1769 # Cross compatibility: Core builder output should equal to contrib. 1770 self.assertAllEqual(output.eval().shape, logits_core.eval().shape) 1771 1772 def testRealValuedColumnWithMultiDimensions(self): 1773 real_valued = feature_column.real_valued_column("price", 2) 1774 features = { 1775 "price": constant_op.constant([[20., 10.], [110, 0.], [-3, 30]]) 1776 } 1777 logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1778 features, [real_valued], num_outputs=5) 1779 with self.test_session(): 1780 variables_lib.global_variables_initializer().run() 1781 self.assertAllEqual(logits.eval().shape, [3, 5]) 1782 1783 def testBucketizedColumnWithMultiDimensions(self): 1784 bucket = feature_column.bucketized_column( 1785 feature_column.real_valued_column("price", 2), 1786 boundaries=[0., 10., 100.]) 1787 features = { 1788 "price": constant_op.constant([[20., 10.], [110, 0.], [-3, 30]]) 1789 } 1790 logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1791 features, [bucket], num_outputs=5) 1792 with self.test_session(): 1793 variables_lib.global_variables_initializer().run() 1794 self.assertAllEqual(logits.eval().shape, [3, 5]) 1795 1796 def testAllWideColumns(self): 1797 real_valued = feature_column.real_valued_column("income", 2) 1798 bucket = feature_column.bucketized_column( 1799 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 1800 hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10) 1801 crossed = feature_column.crossed_column([bucket, hashed_sparse], 100) 1802 features = { 1803 "income": 1804 constant_op.constant([[20., 10], [110, 0], [-3, 30]]), 1805 "price": 1806 constant_op.constant([[20.], [110], [-3]]), 1807 "wire": 1808 sparse_tensor.SparseTensor( 1809 values=["omar", "stringer", "marlo"], 1810 indices=[[0, 0], [1, 0], [2, 0]], 1811 dense_shape=[3, 1]) 1812 } 1813 output, _, _ = feature_column_ops.weighted_sum_from_feature_columns( 1814 features, [real_valued, bucket, hashed_sparse, crossed], num_outputs=5) 1815 output_core = fc_core.linear_model( 1816 features, [real_valued, bucket, hashed_sparse, crossed], units=5) 1817 with self.test_session(): 1818 variables_lib.global_variables_initializer().run() 1819 self.assertAllEqual(output.eval().shape, [3, 5]) 1820 # Verify cross compatibility: Core builder output should equal to contrib. 1821 self.assertAllEqual(output.eval(), output_core.eval()) 1822 1823 def testPredictions(self): 1824 language = feature_column.sparse_column_with_keys( 1825 column_name="language", keys=["english", "finnish", "hindi"]) 1826 age = feature_column.real_valued_column("age") 1827 with ops.Graph().as_default(): 1828 features = { 1829 "age": 1830 constant_op.constant([[1], [2]]), 1831 "language": 1832 sparse_tensor.SparseTensor( 1833 values=["hindi", "english"], 1834 indices=[[0, 0], [1, 0]], 1835 dense_shape=[2, 1]), 1836 } 1837 output, column_to_variable, bias = ( 1838 feature_column_ops.weighted_sum_from_feature_columns( 1839 features, [age, language], num_outputs=1)) 1840 with self.test_session() as sess: 1841 variables_lib.global_variables_initializer().run() 1842 lookup_ops.tables_initializer().run() 1843 1844 self.assertAllClose(output.eval(), [[0.], [0.]]) 1845 1846 sess.run(bias.assign([0.1])) 1847 self.assertAllClose(output.eval(), [[0.1], [0.1]]) 1848 1849 # score: 0.1 + age*0.1 1850 sess.run(column_to_variable[age][0].assign([[0.2]])) 1851 self.assertAllClose(output.eval(), [[0.3], [0.5]]) 1852 1853 # score: 0.1 + age*0.1 + language_weight[language_index] 1854 sess.run(column_to_variable[language][0].assign([[0.1], [0.3], [0.2]])) 1855 self.assertAllClose(output.eval(), [[0.5], [0.6]]) 1856 1857 def testJointPredictions(self): 1858 country = feature_column.sparse_column_with_keys( 1859 column_name="country", keys=["us", "finland"]) 1860 language = feature_column.sparse_column_with_keys( 1861 column_name="language", keys=["english", "finnish", "hindi"]) 1862 with ops.Graph().as_default(): 1863 features = { 1864 "country": 1865 sparse_tensor.SparseTensor( 1866 values=["finland", "us"], 1867 indices=[[0, 0], [1, 0]], 1868 dense_shape=[2, 1]), 1869 "language": 1870 sparse_tensor.SparseTensor( 1871 values=["hindi", "english"], 1872 indices=[[0, 0], [1, 0]], 1873 dense_shape=[2, 1]), 1874 } 1875 output, variables, bias = ( 1876 feature_column_ops.joint_weighted_sum_from_feature_columns( 1877 features, [country, language], num_outputs=1)) 1878 # Assert that only a single weight is created. 1879 self.assertEqual(len(variables), 1) 1880 with self.test_session() as sess: 1881 variables_lib.global_variables_initializer().run() 1882 lookup_ops.tables_initializer().run() 1883 1884 self.assertAllClose(output.eval(), [[0.], [0.]]) 1885 1886 sess.run(bias.assign([0.1])) 1887 self.assertAllClose(output.eval(), [[0.1], [0.1]]) 1888 1889 # shape is [5,1] because 1 class and 2 + 3 features. 1890 self.assertEquals(variables[0].get_shape().as_list(), [5, 1]) 1891 1892 # score: bias + country_weight + language_weight 1893 sess.run(variables[0].assign([[0.1], [0.2], [0.3], [0.4], [0.5]])) 1894 self.assertAllClose(output.eval(), [[0.8], [0.5]]) 1895 1896 def testJointPredictionsWeightedFails(self): 1897 language = feature_column.weighted_sparse_column( 1898 feature_column.sparse_column_with_keys( 1899 column_name="language", keys=["english", "finnish", "hindi"]), 1900 "weight") 1901 with ops.Graph().as_default(): 1902 features = { 1903 "weight": 1904 constant_op.constant([[1], [2]]), 1905 "language": 1906 sparse_tensor.SparseTensor( 1907 values=["hindi", "english"], 1908 indices=[[0, 0], [1, 0]], 1909 dense_shape=[2, 1]), 1910 } 1911 with self.assertRaises(AssertionError): 1912 feature_column_ops.joint_weighted_sum_from_feature_columns( 1913 features, [language], num_outputs=1) 1914 1915 def testJointPredictionsRealFails(self): 1916 age = feature_column.real_valued_column("age") 1917 with ops.Graph().as_default(): 1918 features = {"age": constant_op.constant([[1], [2]]),} 1919 with self.assertRaises(NotImplementedError): 1920 feature_column_ops.joint_weighted_sum_from_feature_columns( 1921 features, [age], num_outputs=1) 1922 1923 def testPredictionsWithWeightedSparseColumn(self): 1924 language = feature_column.sparse_column_with_keys( 1925 column_name="language", keys=["english", "finnish", "hindi"]) 1926 weighted_language = feature_column.weighted_sparse_column( 1927 sparse_id_column=language, weight_column_name="age") 1928 with ops.Graph().as_default(): 1929 features = { 1930 "language": 1931 sparse_tensor.SparseTensor( 1932 values=["hindi", "english"], 1933 indices=[[0, 0], [1, 0]], 1934 dense_shape=[2, 1]), 1935 "age": 1936 sparse_tensor.SparseTensor( 1937 values=[10.0, 20.0], 1938 indices=[[0, 0], [1, 0]], 1939 dense_shape=[2, 1]) 1940 } 1941 output, column_to_variable, bias = ( 1942 feature_column_ops.weighted_sum_from_feature_columns( 1943 features, [weighted_language], num_outputs=1)) 1944 with self.test_session() as sess: 1945 variables_lib.global_variables_initializer().run() 1946 lookup_ops.tables_initializer().run() 1947 1948 self.assertAllClose(output.eval(), [[0.], [0.]]) 1949 1950 sess.run(bias.assign([0.1])) 1951 self.assertAllClose(output.eval(), [[0.1], [0.1]]) 1952 1953 # score: bias + age*language_weight[index] 1954 sess.run(column_to_variable[weighted_language][0].assign([[0.1], [0.2], 1955 [0.3]])) 1956 self.assertAllClose(output.eval(), [[3.1], [2.1]]) 1957 1958 def testPredictionsWithMultivalentColumnButNoCross(self): 1959 language = feature_column.sparse_column_with_keys( 1960 column_name="language", keys=["english", "turkish", "hindi"]) 1961 with ops.Graph().as_default(): 1962 features = { 1963 "language": 1964 sparse_tensor.SparseTensor( 1965 values=["hindi", "english"], 1966 indices=[[0, 0], [0, 1]], 1967 dense_shape=[1, 2]) 1968 } 1969 output, column_to_variable, bias = ( 1970 feature_column_ops.weighted_sum_from_feature_columns( 1971 features, [language], num_outputs=1)) 1972 with self.test_session() as sess: 1973 variables_lib.global_variables_initializer().run() 1974 lookup_ops.tables_initializer().run() 1975 1976 # score: 0.1 + language_weight['hindi'] + language_weight['english'] 1977 sess.run(bias.assign([0.1])) 1978 sess.run(column_to_variable[language][0].assign([[0.1], [0.3], [0.2]])) 1979 self.assertAllClose(output.eval(), [[0.4]]) 1980 1981 def testSparseFeatureColumnWithHashedBucketSize(self): 1982 movies = feature_column.sparse_column_with_hash_bucket( 1983 column_name="movies", hash_bucket_size=15) 1984 with ops.Graph().as_default(): 1985 features = { 1986 "movies": 1987 sparse_tensor.SparseTensor( 1988 values=["matrix", "head-on", "winter sleep"], 1989 indices=[[0, 0], [0, 1], [1, 0]], 1990 dense_shape=[2, 2]) 1991 } 1992 output, column_to_variable, _ = ( 1993 feature_column_ops.weighted_sum_from_feature_columns( 1994 features, [movies], num_outputs=1)) 1995 with self.test_session() as sess: 1996 variables_lib.global_variables_initializer().run() 1997 lookup_ops.tables_initializer().run() 1998 1999 weights = column_to_variable[movies][0] 2000 self.assertEqual(weights.get_shape(), (15, 1)) 2001 sess.run(weights.assign(weights + 0.4)) 2002 # score for first example = 0.4 (matrix) + 0.4 (head-on) = 0.8 2003 # score for second example = 0.4 (winter sleep) 2004 self.assertAllClose(output.eval(), [[0.8], [0.4]]) 2005 2006 def testCrossUsageInPredictions(self): 2007 language = feature_column.sparse_column_with_hash_bucket( 2008 "language", hash_bucket_size=3) 2009 country = feature_column.sparse_column_with_hash_bucket( 2010 "country", hash_bucket_size=5) 2011 country_language = feature_column.crossed_column( 2012 [language, country], hash_bucket_size=10) 2013 with ops.Graph().as_default(): 2014 features = { 2015 "language": 2016 sparse_tensor.SparseTensor( 2017 values=["english", "spanish"], 2018 indices=[[0, 0], [1, 0]], 2019 dense_shape=[2, 1]), 2020 "country": 2021 sparse_tensor.SparseTensor( 2022 values=["US", "SV"], 2023 indices=[[0, 0], [1, 0]], 2024 dense_shape=[2, 1]) 2025 } 2026 output, column_to_variable, _ = ( 2027 feature_column_ops.weighted_sum_from_feature_columns( 2028 features, [country_language], num_outputs=1)) 2029 with self.test_session() as sess: 2030 variables_lib.global_variables_initializer().run() 2031 lookup_ops.tables_initializer().run() 2032 2033 weights = column_to_variable[country_language][0] 2034 sess.run(weights.assign(weights + 0.4)) 2035 self.assertAllClose(output.eval(), [[0.4], [0.4]]) 2036 2037 def testCrossColumnByItself(self): 2038 language = feature_column.sparse_column_with_hash_bucket( 2039 "language", hash_bucket_size=3) 2040 language_language = feature_column.crossed_column( 2041 [language, language], hash_bucket_size=10) 2042 with ops.Graph().as_default(): 2043 features = { 2044 "language": 2045 sparse_tensor.SparseTensor( 2046 values=["english", "spanish"], 2047 indices=[[0, 0], [0, 1]], 2048 dense_shape=[1, 2]), 2049 } 2050 output, column_to_variable, _ = ( 2051 feature_column_ops.weighted_sum_from_feature_columns( 2052 features, [language_language], num_outputs=1)) 2053 with self.test_session() as sess: 2054 variables_lib.global_variables_initializer().run() 2055 lookup_ops.tables_initializer().run() 2056 2057 weights = column_to_variable[language_language][0] 2058 sess.run(weights.assign(weights + 0.4)) 2059 # There are two features inside language. If we cross it by itself we'll 2060 # have four crossed features. 2061 self.assertAllClose(output.eval(), [[1.6]]) 2062 2063 def testMultivalentCrossUsageInPredictions(self): 2064 language = feature_column.sparse_column_with_hash_bucket( 2065 "language", hash_bucket_size=3) 2066 country = feature_column.sparse_column_with_hash_bucket( 2067 "country", hash_bucket_size=5) 2068 country_language = feature_column.crossed_column( 2069 [language, country], hash_bucket_size=10) 2070 with ops.Graph().as_default(): 2071 features = { 2072 "language": 2073 sparse_tensor.SparseTensor( 2074 values=["english", "spanish"], 2075 indices=[[0, 0], [0, 1]], 2076 dense_shape=[1, 2]), 2077 "country": 2078 sparse_tensor.SparseTensor( 2079 values=["US", "SV"], 2080 indices=[[0, 0], [0, 1]], 2081 dense_shape=[1, 2]) 2082 } 2083 output, column_to_variable, _ = ( 2084 feature_column_ops.weighted_sum_from_feature_columns( 2085 features, [country_language], num_outputs=1)) 2086 with self.test_session() as sess: 2087 variables_lib.global_variables_initializer().run() 2088 lookup_ops.tables_initializer().run() 2089 2090 weights = column_to_variable[country_language][0] 2091 sess.run(weights.assign(weights + 0.4)) 2092 # There are four crosses each with 0.4 weight. 2093 # score = 0.4 + 0.4 + 0.4 + 0.4 2094 self.assertAllClose(output.eval(), [[1.6]]) 2095 2096 def testMultivalentCrossUsageInPredictionsWithPartition(self): 2097 # bucket size has to be big enough to allow sharding. 2098 language = feature_column.sparse_column_with_hash_bucket( 2099 "language", hash_bucket_size=64 << 19) 2100 country = feature_column.sparse_column_with_hash_bucket( 2101 "country", hash_bucket_size=64 << 18) 2102 country_language = feature_column.crossed_column( 2103 [language, country], hash_bucket_size=64 << 18) 2104 with ops.Graph().as_default(): 2105 features = { 2106 "language": 2107 sparse_tensor.SparseTensor( 2108 values=["english", "spanish"], 2109 indices=[[0, 0], [0, 1]], 2110 dense_shape=[1, 2]), 2111 "country": 2112 sparse_tensor.SparseTensor( 2113 values=["US", "SV"], 2114 indices=[[0, 0], [0, 1]], 2115 dense_shape=[1, 2]) 2116 } 2117 with variable_scope.variable_scope( 2118 "weighted_sum_from_feature_columns", 2119 features.values(), 2120 partitioner=partitioned_variables.min_max_variable_partitioner( 2121 max_partitions=10, min_slice_size=((64 << 20) - 1))) as scope: 2122 output, column_to_variable, _ = ( 2123 feature_column_ops.weighted_sum_from_feature_columns( 2124 features, [country, language, country_language], 2125 num_outputs=1, 2126 scope=scope)) 2127 with self.test_session() as sess: 2128 variables_lib.global_variables_initializer().run() 2129 lookup_ops.tables_initializer().run() 2130 2131 self.assertEqual(2, len(column_to_variable[country])) 2132 self.assertEqual(3, len(column_to_variable[language])) 2133 self.assertEqual(2, len(column_to_variable[country_language])) 2134 2135 weights = column_to_variable[country_language] 2136 for partition_variable in weights: 2137 sess.run(partition_variable.assign(partition_variable + 0.4)) 2138 # There are four crosses each with 0.4 weight. 2139 # score = 0.4 + 0.4 + 0.4 + 0.4 2140 self.assertAllClose(output.eval(), [[1.6]]) 2141 2142 def testRealValuedColumnHavingMultiDimensions(self): 2143 country = feature_column.sparse_column_with_hash_bucket( 2144 "country", hash_bucket_size=5) 2145 age = feature_column.real_valued_column("age") 2146 # The following RealValuedColumn has 3 dimensions. 2147 incomes = feature_column.real_valued_column("incomes", 3) 2148 2149 with ops.Graph().as_default(): 2150 features = { 2151 "age": 2152 constant_op.constant([[1], [1]]), 2153 "incomes": 2154 constant_op.constant([[100., 200., 300.], [10., 20., 30.]]), 2155 "country": 2156 sparse_tensor.SparseTensor( 2157 values=["US", "SV"], 2158 indices=[[0, 0], [1, 0]], 2159 dense_shape=[2, 2]) 2160 } 2161 output, column_to_variable, _ = ( 2162 feature_column_ops.weighted_sum_from_feature_columns( 2163 features, [country, age, incomes], num_outputs=1)) 2164 with self.test_session() as sess: 2165 variables_lib.global_variables_initializer().run() 2166 lookup_ops.tables_initializer().run() 2167 2168 incomes_weights = column_to_variable[incomes][0] 2169 sess.run(incomes_weights.assign([[0.1], [0.2], [0.3]])) 2170 self.assertAllClose(output.eval(), [[140.], [14.]]) 2171 2172 def testMulticlassWithRealValuedColumnHavingMultiDimensionsAndSparse(self): 2173 country = feature_column.sparse_column_with_hash_bucket( 2174 "country", hash_bucket_size=5) 2175 age = feature_column.real_valued_column("age") 2176 # The following RealValuedColumn has no predefined dimension so it 2177 # can be missing. 2178 height = feature_column._real_valued_var_len_column("height", 2179 default_value=0, 2180 is_sparse=False) 2181 # The following RealValuedColumn has 3 dimensions. 2182 incomes = feature_column.real_valued_column("incomes", 3) 2183 with ops.Graph().as_default(): 2184 features = { 2185 "age": 2186 constant_op.constant([[1], [1]]), 2187 "incomes": 2188 constant_op.constant([[100., 200., 300.], [10., 20., 30.]]), 2189 "height": 2190 constant_op.constant([[5., 4.], [0., 6.]]), 2191 "country": 2192 sparse_tensor.SparseTensor( 2193 values=["US", "SV"], 2194 indices=[[0, 0], [1, 0]], 2195 dense_shape=[2, 2]) 2196 } 2197 output, column_to_variable, _ = ( 2198 feature_column_ops.weighted_sum_from_feature_columns( 2199 features, [country, age, height, incomes], num_outputs=5)) 2200 with self.test_session() as sess: 2201 variables_lib.global_variables_initializer().run() 2202 lookup_ops.tables_initializer().run() 2203 2204 height_weights = column_to_variable[height][0] 2205 sess.run( 2206 height_weights.assign( 2207 [[1., 2., 3., 5., 10.], [1., 2., 3., 5., 10.]])) 2208 self.assertAllClose(output.eval(), [[9., 18., 27., 45., 90.], 2209 [6., 12., 18., 30., 60.]]) 2210 2211 incomes_weights = column_to_variable[incomes][0] 2212 sess.run( 2213 incomes_weights.assign([[0.01, 0.1, 1., 10., 100.], 2214 [0.02, 0.2, 2., 20., 200.], 2215 [0.03, 0.3, 3., 30., 300.]])) 2216 self.assertAllClose( 2217 output.eval(), 2218 [[14. + 9., 140. + 18., 1400. + 27., 14000. + 45., 140000. + 90.], 2219 [1.4 + 6., 14. + 12., 140. + 18., 1400. + 30., 14000. + 60.]]) 2220 2221 def testBucketizedColumn(self): 2222 bucket = feature_column.bucketized_column( 2223 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 2224 with ops.Graph().as_default(): 2225 # buckets 2, 3, 0 2226 features = {"price": constant_op.constant([[20.], [110], [-3]])} 2227 output, column_to_variable, _ = ( 2228 feature_column_ops.weighted_sum_from_feature_columns( 2229 features, [bucket], num_outputs=1)) 2230 output_core = fc_core.linear_model(features, [bucket]) 2231 with self.test_session() as sess: 2232 variables_lib.global_variables_initializer().run() 2233 lookup_ops.tables_initializer().run() 2234 # Cross compatibility: Core builder output should equal to contrib. 2235 self.assertAllEqual(output.eval(), output_core.eval()) 2236 2237 sess.run(column_to_variable[bucket][0].assign([[0.1], [0.2], [0.3], 2238 [0.4]])) 2239 self.assertAllClose(output.eval(), [[0.3], [0.4], [0.1]]) 2240 2241 def testBucketizedColumnHavingMultiDimensions(self): 2242 country = feature_column.sparse_column_with_hash_bucket( 2243 "country", hash_bucket_size=5) 2244 bucket = feature_column.bucketized_column( 2245 feature_column.real_valued_column("price", 2), 2246 boundaries=[0., 10., 100.]) 2247 with ops.Graph().as_default(): 2248 # buckets 2, 3, 0 2249 features = { 2250 "price": 2251 constant_op.constant([[20., 210], [110, 50], [-3, -30]]), 2252 "country": 2253 sparse_tensor.SparseTensor( 2254 values=["US", "SV"], 2255 indices=[[0, 0], [1, 0]], 2256 dense_shape=[3, 2]) 2257 } 2258 output, column_to_variable, _ = ( 2259 feature_column_ops.weighted_sum_from_feature_columns( 2260 features, [bucket, country], num_outputs=1)) 2261 output_core = fc_core.linear_model(features, [bucket, country]) 2262 with self.test_session() as sess: 2263 variables_lib.global_variables_initializer().run() 2264 lookup_ops.tables_initializer().run() 2265 # Cross compatibility: Core builder output should equal to contrib. 2266 self.assertAllEqual(output.eval(), output_core.eval()) 2267 2268 # dimension = 2, bucket_size = 4, num_classes = 1 2269 sess.run(column_to_variable[bucket][0].assign( 2270 [[0.1], [0.2], [0.3], [0.4], [1], [2], [3], [4]])) 2271 self.assertAllClose(output.eval(), [[0.3 + 4], [0.4 + 3], [0.1 + 1]]) 2272 2273 def testMulticlassWithBucketizedColumnHavingMultiDimensions(self): 2274 country = feature_column.sparse_column_with_hash_bucket( 2275 "country", hash_bucket_size=5) 2276 bucket = feature_column.bucketized_column( 2277 feature_column.real_valued_column("price", 2), 2278 boundaries=[0., 10., 100.]) 2279 with ops.Graph().as_default(): 2280 # buckets 2, 3, 0 2281 features = { 2282 "price": 2283 constant_op.constant([[20., 210], [110, 50], [-3, -30]]), 2284 "country": 2285 sparse_tensor.SparseTensor( 2286 values=["US", "SV"], 2287 indices=[[0, 0], [1, 0]], 2288 dense_shape=[3, 2]) 2289 } 2290 output, column_to_variable, _ = ( 2291 feature_column_ops.weighted_sum_from_feature_columns( 2292 features, [bucket, country], num_outputs=5)) 2293 with self.test_session() as sess: 2294 variables_lib.global_variables_initializer().run() 2295 lookup_ops.tables_initializer().run() 2296 2297 # dimension = 2, bucket_size = 4, num_classes = 5 2298 sess.run(column_to_variable[bucket][0].assign( 2299 [[0.1, 1, 10, 100, 1000], [0.2, 2, 20, 200, 2000], 2300 [0.3, 3, 30, 300, 3000], [0.4, 4, 40, 400, 4000], 2301 [5, 50, 500, 5000, 50000], [6, 60, 600, 6000, 60000], 2302 [7, 70, 700, 7000, 70000], [8, 80, 800, 8000, 80000]])) 2303 self.assertAllClose( 2304 output.eval(), 2305 [[0.3 + 8, 3 + 80, 30 + 800, 300 + 8000, 3000 + 80000], 2306 [0.4 + 7, 4 + 70, 40 + 700, 400 + 7000, 4000 + 70000], 2307 [0.1 + 5, 1 + 50, 10 + 500, 100 + 5000, 1000 + 50000]]) 2308 2309 def testCrossWithBucketizedColumn(self): 2310 price_bucket = feature_column.bucketized_column( 2311 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 2312 country = feature_column.sparse_column_with_hash_bucket( 2313 "country", hash_bucket_size=5) 2314 country_price = feature_column.crossed_column( 2315 [country, price_bucket], hash_bucket_size=10) 2316 with ops.Graph().as_default(): 2317 features = { 2318 "price": 2319 constant_op.constant([[20.]]), 2320 "country": 2321 sparse_tensor.SparseTensor( 2322 values=["US", "SV"], 2323 indices=[[0, 0], [0, 1]], 2324 dense_shape=[1, 2]) 2325 } 2326 output, column_to_variable, _ = ( 2327 feature_column_ops.weighted_sum_from_feature_columns( 2328 features, [country_price], num_outputs=1)) 2329 with self.test_session() as sess: 2330 variables_lib.global_variables_initializer().run() 2331 lookup_ops.tables_initializer().run() 2332 2333 weights = column_to_variable[country_price][0] 2334 sess.run(weights.assign(weights + 0.4)) 2335 # There are two crosses each with 0.4 weight. 2336 # score = 0.4 + 0.4 2337 self.assertAllClose(output.eval(), [[0.8]]) 2338 2339 def testCrossWithCrossedColumn(self): 2340 price_bucket = feature_column.bucketized_column( 2341 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 2342 language = feature_column.sparse_column_with_hash_bucket( 2343 "language", hash_bucket_size=3) 2344 country = feature_column.sparse_column_with_hash_bucket( 2345 "country", hash_bucket_size=5) 2346 country_language = feature_column.crossed_column( 2347 [language, country], hash_bucket_size=10) 2348 country_language_price = feature_column.crossed_column( 2349 set([country_language, price_bucket]), hash_bucket_size=15) 2350 with ops.Graph().as_default(): 2351 features = { 2352 "price": 2353 constant_op.constant([[20.]]), 2354 "country": 2355 sparse_tensor.SparseTensor( 2356 values=["US", "SV"], 2357 indices=[[0, 0], [0, 1]], 2358 dense_shape=[1, 2]), 2359 "language": 2360 sparse_tensor.SparseTensor( 2361 values=["english", "spanish"], 2362 indices=[[0, 0], [0, 1]], 2363 dense_shape=[1, 2]) 2364 } 2365 output, column_to_variable, _ = ( 2366 feature_column_ops.weighted_sum_from_feature_columns( 2367 features, [country_language_price], num_outputs=1)) 2368 with self.test_session() as sess: 2369 variables_lib.global_variables_initializer().run() 2370 lookup_ops.tables_initializer().run() 2371 2372 weights = column_to_variable[country_language_price][0] 2373 sess.run(weights.assign(weights + 0.4)) 2374 # There are two crosses each with 0.4 weight. 2375 # score = 0.4 + 0.4 + 0.4 + 0.4 2376 self.assertAllClose(output.eval(), [[1.6]]) 2377 2378 def testIntegerizedColumn(self): 2379 product = feature_column.sparse_column_with_integerized_feature( 2380 "product", bucket_size=5) 2381 with ops.Graph().as_default(): 2382 features = { 2383 "product": 2384 sparse_tensor.SparseTensor( 2385 values=[0, 4, 2], 2386 indices=[[0, 0], [1, 0], [2, 0]], 2387 dense_shape=[3, 1]) 2388 } 2389 output, column_to_variable, _ = ( 2390 feature_column_ops.weighted_sum_from_feature_columns( 2391 features, [product], num_outputs=1)) 2392 with self.test_session() as sess: 2393 variables_lib.global_variables_initializer().run() 2394 lookup_ops.tables_initializer().run() 2395 product_weights = column_to_variable[product][0] 2396 sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]])) 2397 self.assertAllClose(output.eval(), [[0.1], [0.5], [0.3]]) 2398 2399 def testIntegerizedColumnWithDenseInputTensor(self): 2400 product = feature_column.sparse_column_with_integerized_feature( 2401 "product", bucket_size=5) 2402 with ops.Graph().as_default(): 2403 features = {"product": constant_op.constant([[0], [4], [2]])} 2404 output, column_to_variable, _ = ( 2405 feature_column_ops.weighted_sum_from_feature_columns( 2406 features, [product], num_outputs=1)) 2407 with self.test_session() as sess: 2408 variables_lib.global_variables_initializer().run() 2409 lookup_ops.tables_initializer().run() 2410 product_weights = column_to_variable[product][0] 2411 sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]])) 2412 self.assertAllClose(output.eval(), [[0.1], [0.5], [0.3]]) 2413 2414 def testIntegerizedColumnWithDenseInputTensor2(self): 2415 product = feature_column.sparse_column_with_integerized_feature( 2416 "product", bucket_size=5) 2417 with ops.Graph().as_default(): 2418 features = {"product": constant_op.constant([[0, 4], [2, 3]])} 2419 output, column_to_variable, _ = ( 2420 feature_column_ops.weighted_sum_from_feature_columns( 2421 features, [product], num_outputs=1)) 2422 with self.test_session() as sess: 2423 variables_lib.global_variables_initializer().run() 2424 lookup_ops.tables_initializer().run() 2425 product_weights = column_to_variable[product][0] 2426 sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]])) 2427 self.assertAllClose(output.eval(), [[0.6], [0.7]]) 2428 2429 def testIntegerizedColumnWithInvalidId(self): 2430 product = feature_column.sparse_column_with_integerized_feature( 2431 "product", bucket_size=5) 2432 with ops.Graph().as_default(): 2433 features = { 2434 "product": 2435 sparse_tensor.SparseTensor( 2436 values=[5, 4, 7], 2437 indices=[[0, 0], [1, 0], [2, 0]], 2438 dense_shape=[3, 1]) 2439 } 2440 output, column_to_variable, _ = ( 2441 feature_column_ops.weighted_sum_from_feature_columns( 2442 features, [product], num_outputs=1)) 2443 with self.test_session() as sess: 2444 variables_lib.global_variables_initializer().run() 2445 lookup_ops.tables_initializer().run() 2446 product_weights = column_to_variable[product][0] 2447 sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]])) 2448 self.assertAllClose(output.eval(), [[0.1], [0.5], [0.3]]) 2449 2450 def testMulticlassWithOnlyBias(self): 2451 with ops.Graph().as_default(): 2452 features = {"age": constant_op.constant([[10.], [20.], [30.], [40.]])} 2453 output, _, bias = feature_column_ops.weighted_sum_from_feature_columns( 2454 features, [feature_column.real_valued_column("age")], num_outputs=3) 2455 with self.test_session() as sess: 2456 variables_lib.global_variables_initializer().run() 2457 lookup_ops.tables_initializer().run() 2458 sess.run(bias.assign([0.1, 0.2, 0.3])) 2459 self.assertAllClose(output.eval(), [[0.1, 0.2, 0.3], [0.1, 0.2, 0.3], 2460 [0.1, 0.2, 0.3], [0.1, 0.2, 0.3]]) 2461 2462 def testMulticlassWithRealValuedColumn(self): 2463 with ops.Graph().as_default(): 2464 column = feature_column.real_valued_column("age") 2465 features = {"age": constant_op.constant([[10.], [20.], [30.], [40.]])} 2466 output, column_to_variable, _ = ( 2467 feature_column_ops.weighted_sum_from_feature_columns( 2468 features, [column], num_outputs=3)) 2469 with self.test_session() as sess: 2470 variables_lib.global_variables_initializer().run() 2471 lookup_ops.tables_initializer().run() 2472 weights = column_to_variable[column][0] 2473 self.assertEqual(weights.get_shape(), (1, 3)) 2474 sess.run(weights.assign([[0.01, 0.03, 0.05]])) 2475 self.assertAllClose(output.eval(), [[0.1, 0.3, 0.5], [0.2, 0.6, 1.0], 2476 [0.3, 0.9, 1.5], [0.4, 1.2, 2.0]]) 2477 2478 def testMulticlassWithSparseColumn(self): 2479 with ops.Graph().as_default(): 2480 column = feature_column.sparse_column_with_keys( 2481 column_name="language", 2482 keys=["english", "arabic", "hindi", "russian", "swahili"]) 2483 features = { 2484 "language": 2485 sparse_tensor.SparseTensor( 2486 values=["hindi", "english", "arabic", "russian"], 2487 indices=[[0, 0], [1, 0], [2, 0], [3, 0]], 2488 dense_shape=[4, 1]) 2489 } 2490 output, column_to_variable, _ = ( 2491 feature_column_ops.weighted_sum_from_feature_columns( 2492 features, [column], num_outputs=3)) 2493 with self.test_session() as sess: 2494 variables_lib.global_variables_initializer().run() 2495 lookup_ops.tables_initializer().run() 2496 weights = column_to_variable[column][0] 2497 self.assertEqual(weights.get_shape(), (5, 3)) 2498 sess.run( 2499 weights.assign([[0.1, 0.4, 0.7], 2500 [0.2, 0.5, 0.8], 2501 [0.3, 0.6, 0.9], 2502 [0.4, 0.7, 1.0], 2503 [0.5, 0.8, 1.1]])) 2504 self.assertAllClose(output.eval(), [[0.3, 0.6, 0.9], 2505 [0.1, 0.4, 0.7], 2506 [0.2, 0.5, 0.8], 2507 [0.4, 0.7, 1.0]]) 2508 2509 def testMulticlassWithBucketizedColumn(self): 2510 column = feature_column.bucketized_column( 2511 feature_column.real_valued_column("price"), 2512 boundaries=[0., 100., 500., 1000.]) 2513 with ops.Graph().as_default(): 2514 # buckets 0, 2, 1, 2 2515 features = {"price": constant_op.constant([[-3], [110], [20.], [210]])} 2516 output, column_to_variable, _ = ( 2517 feature_column_ops.weighted_sum_from_feature_columns( 2518 features, [column], num_outputs=3)) 2519 with self.test_session() as sess: 2520 variables_lib.global_variables_initializer().run() 2521 lookup_ops.tables_initializer().run() 2522 2523 weights = column_to_variable[column][0] 2524 self.assertEqual(weights.get_shape(), (5, 3)) 2525 sess.run( 2526 weights.assign([[0.1, 0.4, 0.7], 2527 [0.2, 0.5, 0.8], 2528 [0.3, 0.6, 0.9], 2529 [0.4, 0.7, 1.0], 2530 [0.5, 0.8, 1.1]])) 2531 self.assertAllClose(output.eval(), [[0.1, 0.4, 0.7], 2532 [0.3, 0.6, 0.9], 2533 [0.2, 0.5, 0.8], 2534 [0.3, 0.6, 0.9]]) 2535 2536 def testMulticlassWithCrossedColumn(self): 2537 language = feature_column.sparse_column_with_hash_bucket( 2538 "language", hash_bucket_size=3) 2539 country = feature_column.sparse_column_with_hash_bucket( 2540 "country", hash_bucket_size=2) 2541 column = feature_column.crossed_column( 2542 {language, country}, hash_bucket_size=5) 2543 with ops.Graph().as_default(): 2544 features = { 2545 "language": 2546 sparse_tensor.SparseTensor( 2547 values=["english", "spanish", "russian", "swahili"], 2548 indices=[[0, 0], [1, 0], [2, 0], [3, 0]], 2549 dense_shape=[4, 1]), 2550 "country": 2551 sparse_tensor.SparseTensor( 2552 values=["US", "SV", "RU", "KE"], 2553 indices=[[0, 0], [1, 0], [2, 0], [3, 0]], 2554 dense_shape=[4, 1]) 2555 } 2556 output, column_to_variable, _ = ( 2557 feature_column_ops.weighted_sum_from_feature_columns( 2558 features, [column], num_outputs=3)) 2559 with self.test_session() as sess: 2560 variables_lib.global_variables_initializer().run() 2561 lookup_ops.tables_initializer().run() 2562 2563 weights = column_to_variable[column][0] 2564 self.assertEqual(weights.get_shape(), (5, 3)) 2565 sess.run( 2566 weights.assign([[0.1, 0.4, 0.7], 2567 [0.2, 0.5, 0.8], 2568 [0.3, 0.6, 0.9], 2569 [0.4, 0.7, 1.0], 2570 [0.5, 0.8, 1.1]])) 2571 self.assertAllClose(array_ops.shape(output).eval(), [4, 3]) 2572 2573 def testMulticlassWithMultivalentColumn(self): 2574 column = feature_column.sparse_column_with_keys( 2575 column_name="language", 2576 keys=["english", "turkish", "hindi", "russian", "swahili"]) 2577 with ops.Graph().as_default(): 2578 features = { 2579 "language": 2580 sparse_tensor.SparseTensor( 2581 values=["hindi", "english", "turkish", "turkish", "english"], 2582 indices=[[0, 0], [0, 1], [1, 0], [2, 0], [3, 0]], 2583 dense_shape=[4, 2]) 2584 } 2585 output, column_to_variable, _ = ( 2586 feature_column_ops.weighted_sum_from_feature_columns( 2587 features, [column], num_outputs=3)) 2588 with self.test_session() as sess: 2589 variables_lib.global_variables_initializer().run() 2590 lookup_ops.tables_initializer().run() 2591 2592 weights = column_to_variable[column][0] 2593 self.assertEqual(weights.get_shape(), (5, 3)) 2594 sess.run( 2595 weights.assign([[0.1, 0.4, 0.7], 2596 [0.2, 0.5, 0.8], 2597 [0.3, 0.6, 0.9], 2598 [0.4, 0.7, 1.0], 2599 [0.5, 0.8, 1.1]])) 2600 self.assertAllClose(output.eval(), [[0.4, 1.0, 1.6], 2601 [0.2, 0.5, 0.8], 2602 [0.2, 0.5, 0.8], 2603 [0.1, 0.4, 0.7]]) 2604 2605 def testVariablesAddedToCollection(self): 2606 price_bucket = feature_column.bucketized_column( 2607 feature_column.real_valued_column("price"), boundaries=[0., 10., 100.]) 2608 country = feature_column.sparse_column_with_hash_bucket( 2609 "country", hash_bucket_size=5) 2610 country_price = feature_column.crossed_column( 2611 [country, price_bucket], hash_bucket_size=10) 2612 with ops.Graph().as_default(): 2613 features = { 2614 "price": 2615 constant_op.constant([[20.]]), 2616 "country": 2617 sparse_tensor.SparseTensor( 2618 values=["US", "SV"], 2619 indices=[[0, 0], [0, 1]], 2620 dense_shape=[1, 2]) 2621 } 2622 feature_column_ops.weighted_sum_from_feature_columns( 2623 features, [country_price, price_bucket], 2624 num_outputs=1, 2625 weight_collections=["my_collection"]) 2626 weights = ops.get_collection("my_collection") 2627 # 3 = bias + price_bucket + country_price 2628 self.assertEqual(3, len(weights)) 2629 2630 2631 class ParseExampleTest(test.TestCase): 2632 2633 def testParseExample(self): 2634 bucket = feature_column.bucketized_column( 2635 feature_column.real_valued_column( 2636 "price", dimension=3), 2637 boundaries=[0., 10., 100.]) 2638 wire_cast = feature_column.sparse_column_with_keys( 2639 "wire_cast", ["marlo", "omar", "stringer"]) 2640 # buckets 2, 3, 0 2641 data = example_pb2.Example(features=feature_pb2.Features(feature={ 2642 "price": 2643 feature_pb2.Feature(float_list=feature_pb2.FloatList( 2644 value=[20., 110, -3])), 2645 "wire_cast": 2646 feature_pb2.Feature(bytes_list=feature_pb2.BytesList( 2647 value=[b"stringer", b"marlo"])), 2648 })) 2649 output = feature_column_ops.parse_feature_columns_from_examples( 2650 serialized=[data.SerializeToString()], 2651 feature_columns=[bucket, wire_cast]) 2652 self.assertIn(bucket, output) 2653 self.assertIn(wire_cast, output) 2654 with self.test_session(): 2655 lookup_ops.tables_initializer().run() 2656 self.assertAllEqual(output[bucket].eval(), [[2, 3, 0]]) 2657 self.assertAllEqual(output[wire_cast].indices.eval(), [[0, 0], [0, 1]]) 2658 self.assertAllEqual(output[wire_cast].values.eval(), [2, 0]) 2659 2660 def testParseSequenceExample(self): 2661 location_keys = ["east_side", "west_side", "nyc"] 2662 embedding_dimension = 10 2663 2664 location = feature_column.sparse_column_with_keys( 2665 "location", keys=location_keys) 2666 location_onehot = feature_column.one_hot_column(location) 2667 wire_cast = feature_column.sparse_column_with_keys( 2668 "wire_cast", ["marlo", "omar", "stringer"]) 2669 wire_cast_embedded = feature_column.embedding_column( 2670 wire_cast, dimension=embedding_dimension) 2671 measurements = feature_column.real_valued_column( 2672 "measurements", dimension=2) 2673 2674 context_feature_columns = [location_onehot] 2675 sequence_feature_columns = [wire_cast_embedded, measurements] 2676 2677 sequence_example = example_pb2.SequenceExample( 2678 context=feature_pb2.Features(feature={ 2679 "location": 2680 feature_pb2.Feature(bytes_list=feature_pb2.BytesList( 2681 value=[b"west_side"])), 2682 }), 2683 feature_lists=feature_pb2.FeatureLists(feature_list={ 2684 "wire_cast": 2685 feature_pb2.FeatureList(feature=[ 2686 feature_pb2.Feature(bytes_list=feature_pb2.BytesList( 2687 value=[b"marlo", b"stringer"])), 2688 feature_pb2.Feature(bytes_list=feature_pb2.BytesList( 2689 value=[b"omar", b"stringer", b"marlo"])), 2690 feature_pb2.Feature(bytes_list=feature_pb2.BytesList( 2691 value=[b"marlo"])), 2692 ]), 2693 "measurements": 2694 feature_pb2.FeatureList(feature=[ 2695 feature_pb2.Feature(float_list=feature_pb2.FloatList( 2696 value=[0.2, 0.3])), 2697 feature_pb2.Feature(float_list=feature_pb2.FloatList( 2698 value=[0.1, 0.8])), 2699 feature_pb2.Feature(float_list=feature_pb2.FloatList( 2700 value=[0.5, 0.0])), 2701 ]) 2702 })) 2703 2704 ctx, seq = feature_column_ops.parse_feature_columns_from_sequence_examples( 2705 serialized=sequence_example.SerializeToString(), 2706 context_feature_columns=context_feature_columns, 2707 sequence_feature_columns=sequence_feature_columns) 2708 2709 self.assertIn("location", ctx) 2710 self.assertIsInstance(ctx["location"], sparse_tensor.SparseTensor) 2711 self.assertIn("wire_cast", seq) 2712 self.assertIsInstance(seq["wire_cast"], sparse_tensor.SparseTensor) 2713 self.assertIn("measurements", seq) 2714 self.assertIsInstance(seq["measurements"], ops.Tensor) 2715 2716 with self.test_session() as sess: 2717 location_val, wire_cast_val, measurement_val = sess.run( 2718 [ctx["location"], seq["wire_cast"], seq["measurements"]]) 2719 2720 self.assertAllEqual(location_val.indices, np.array([[0]])) 2721 self.assertAllEqual(location_val.values, np.array([b"west_side"])) 2722 self.assertAllEqual(location_val.dense_shape, np.array([1])) 2723 2724 self.assertAllEqual(wire_cast_val.indices, 2725 np.array( 2726 [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0]])) 2727 self.assertAllEqual( 2728 wire_cast_val.values, 2729 np.array( 2730 [b"marlo", b"stringer", b"omar", b"stringer", b"marlo", b"marlo"])) 2731 self.assertAllEqual(wire_cast_val.dense_shape, np.array([3, 3])) 2732 2733 self.assertAllClose(measurement_val, 2734 np.array([[0.2, 0.3], [0.1, 0.8], [0.5, 0.0]])) 2735 2736 2737 class InferRealValuedColumnTest(test.TestCase): 2738 2739 def testTensorInt32(self): 2740 self.assertEqual( 2741 feature_column_ops.infer_real_valued_columns( 2742 array_ops.zeros( 2743 shape=[33, 4], dtype=dtypes.int32)), [ 2744 feature_column.real_valued_column( 2745 "", dimension=4, dtype=dtypes.int32) 2746 ]) 2747 2748 def testTensorInt64(self): 2749 self.assertEqual( 2750 feature_column_ops.infer_real_valued_columns( 2751 array_ops.zeros( 2752 shape=[33, 4], dtype=dtypes.int64)), [ 2753 feature_column.real_valued_column( 2754 "", dimension=4, dtype=dtypes.int64) 2755 ]) 2756 2757 def testTensorFloat32(self): 2758 self.assertEqual( 2759 feature_column_ops.infer_real_valued_columns( 2760 array_ops.zeros( 2761 shape=[33, 4], dtype=dtypes.float32)), [ 2762 feature_column.real_valued_column( 2763 "", dimension=4, dtype=dtypes.float32) 2764 ]) 2765 2766 def testTensorFloat64(self): 2767 self.assertEqual( 2768 feature_column_ops.infer_real_valued_columns( 2769 array_ops.zeros( 2770 shape=[33, 4], dtype=dtypes.float64)), [ 2771 feature_column.real_valued_column( 2772 "", dimension=4, dtype=dtypes.float64) 2773 ]) 2774 2775 def testDictionary(self): 2776 self.assertItemsEqual( 2777 feature_column_ops.infer_real_valued_columns({ 2778 "a": array_ops.zeros( 2779 shape=[33, 4], dtype=dtypes.int32), 2780 "b": array_ops.zeros( 2781 shape=[3, 2], dtype=dtypes.float32) 2782 }), [ 2783 feature_column.real_valued_column( 2784 "a", dimension=4, dtype=dtypes.int32), 2785 feature_column.real_valued_column( 2786 "b", dimension=2, dtype=dtypes.float32) 2787 ]) 2788 2789 def testNotGoodDtype(self): 2790 with self.assertRaises(ValueError): 2791 feature_column_ops.infer_real_valued_columns( 2792 constant_op.constant( 2793 [["a"]], dtype=dtypes.string)) 2794 2795 def testSparseTensor(self): 2796 with self.assertRaises(ValueError): 2797 feature_column_ops.infer_real_valued_columns( 2798 sparse_tensor.SparseTensor( 2799 indices=[[0, 0]], values=["a"], dense_shape=[1, 1])) 2800 2801 2802 if __name__ == "__main__": 2803 test.main() 2804