Home | History | Annotate | Download | only in layers
      1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for layers.feature_column_ops."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import os
     22 
     23 import numpy as np
     24 
     25 from tensorflow.contrib import layers
     26 from tensorflow.contrib.layers.python.layers import feature_column
     27 from tensorflow.contrib.layers.python.layers import feature_column_ops
     28 from tensorflow.core.example import example_pb2
     29 from tensorflow.core.example import feature_pb2
     30 from tensorflow.python.feature_column import feature_column as fc_core
     31 from tensorflow.python.framework import constant_op
     32 from tensorflow.python.framework import dtypes
     33 from tensorflow.python.framework import ops
     34 from tensorflow.python.framework import sparse_tensor
     35 from tensorflow.python.ops import array_ops
     36 from tensorflow.python.ops import gradients_impl
     37 from tensorflow.python.ops import init_ops
     38 from tensorflow.python.ops import lookup_ops
     39 from tensorflow.python.ops import partitioned_variables
     40 from tensorflow.python.ops import random_ops
     41 from tensorflow.python.ops import variable_scope
     42 from tensorflow.python.ops import variables as variables_lib
     43 from tensorflow.python.platform import test
     44 
     45 
     46 class TransformerTest(test.TestCase):
     47 
     48   def testRealValuedColumnIsIdentityTransformation(self):
     49     real_valued = feature_column.real_valued_column("price")
     50     features = {"price": constant_op.constant([[20.], [110], [-3]])}
     51     output = feature_column_ops._Transformer(features).transform(real_valued)
     52     with self.test_session():
     53       self.assertAllEqual(output.eval(), [[20.], [110], [-3]])
     54 
     55   def testSparseRealValuedColumnIdentityTransformation(self):
     56     sparse_real_valued = feature_column._real_valued_var_len_column(
     57         "rating", is_sparse=True)
     58     rating_tensor = sparse_tensor.SparseTensor(
     59         values=[2.0, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1])
     60     features = {"rating": rating_tensor}
     61     output = feature_column_ops._Transformer(features).transform(
     62         sparse_real_valued)
     63     with self.test_session():
     64       self.assertAllEqual(output.values.eval(), rating_tensor.values.eval())
     65       self.assertAllEqual(output.indices.eval(), rating_tensor.indices.eval())
     66       self.assertAllEqual(output.dense_shape.eval(),
     67                           rating_tensor.dense_shape.eval())
     68 
     69   def testSparseRealValuedColumnWithTransformation(self):
     70 
     71     def square_fn(x):
     72       return x**2
     73 
     74     sparse_real_valued = feature_column._real_valued_var_len_column(
     75         "rating", normalizer=square_fn, is_sparse=True)
     76     rating_tensor = sparse_tensor.SparseTensor(
     77         values=[2.0, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1])
     78     features = {"rating": rating_tensor}
     79     output_dict = feature_column_ops.transform_features(features,
     80                                                         [sparse_real_valued])
     81     self.assertTrue(sparse_real_valued in output_dict)
     82     output = output_dict[sparse_real_valued]
     83     with self.test_session():
     84       self.assertArrayNear(output.values.eval(), [4.0, 25.0], 1e-5)
     85       self.assertAllEqual(output.indices.eval(), rating_tensor.indices.eval())
     86       self.assertAllEqual(output.dense_shape.eval(),
     87                           rating_tensor.dense_shape.eval())
     88 
     89   def testBucketizedColumn(self):
     90     bucket = feature_column.bucketized_column(
     91         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
     92     # buckets 2, 3, 0
     93     features = {"price": constant_op.constant([[20.], [110], [-3]])}
     94 
     95     # Test transform features.
     96     output = feature_column_ops.transform_features(
     97         features=features, feature_columns=[bucket])
     98     self.assertEqual(len(output), 1)
     99     self.assertIn(bucket, output)
    100     with self.test_session():
    101       self.assertAllEqual(output[bucket].eval(), [[2], [3], [0]])
    102 
    103   def testBucketizedColumnWithMultiDimensions(self):
    104     bucket = feature_column.bucketized_column(
    105         feature_column.real_valued_column("price", 2),
    106         boundaries=[0., 10., 100.])
    107     # buckets 2, 3, 0
    108     features = {
    109         "price": constant_op.constant([[20., 110], [110., 20], [-3, -3]])
    110     }
    111     output = feature_column_ops._Transformer(features).transform(bucket)
    112     with self.test_session():
    113       self.assertAllEqual(output.eval(), [[2, 3], [3, 2], [0, 0]])
    114 
    115   def testCachedTransformation(self):
    116     bucket = feature_column.bucketized_column(
    117         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
    118     # buckets 2, 3, 0
    119     features = {"price": constant_op.constant([[20.], [110], [-3]])}
    120     transformer = feature_column_ops._Transformer(features)
    121     with self.test_session() as sess:
    122       transformer.transform(bucket)
    123       num_of_ops = len(sess.graph.get_operations())
    124       # Verify that the second call to transform the same feature
    125       # doesn't increase the number of ops.
    126       transformer.transform(bucket)
    127       self.assertEqual(num_of_ops, len(sess.graph.get_operations()))
    128 
    129   def testSparseColumnWithHashBucket(self):
    130     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
    131     wire_tensor = sparse_tensor.SparseTensor(
    132         values=["omar", "stringer", "marlo"],
    133         indices=[[0, 0], [1, 0], [1, 1]],
    134         dense_shape=[2, 2])
    135     features = {"wire": wire_tensor}
    136     # Test transform features.
    137     output = feature_column_ops.transform_features(
    138         features=features, feature_columns=[hashed_sparse])
    139     self.assertEqual(len(output), 1)
    140     self.assertIn(hashed_sparse, output)
    141     with self.test_session():
    142       self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int64)
    143       self.assertTrue(
    144           all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval()))
    145       self.assertAllEqual(output[hashed_sparse].indices.eval(),
    146                           wire_tensor.indices.eval())
    147       self.assertAllEqual(output[hashed_sparse].dense_shape.eval(),
    148                           wire_tensor.dense_shape.eval())
    149 
    150   def testSparseIntColumnWithHashBucket(self):
    151     """Tests a sparse column with int values."""
    152     hashed_sparse = feature_column.sparse_column_with_hash_bucket(
    153         "wire", 10, dtype=dtypes.int64)
    154     wire_tensor = sparse_tensor.SparseTensor(
    155         values=[101, 201, 301],
    156         indices=[[0, 0], [1, 0], [1, 1]],
    157         dense_shape=[2, 2])
    158     features = {"wire": wire_tensor}
    159     # Test transform features.
    160     output = feature_column_ops.transform_features(
    161         features=features, feature_columns=[hashed_sparse])
    162     self.assertEqual(len(output), 1)
    163     self.assertIn(hashed_sparse, output)
    164     with self.test_session():
    165       self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int64)
    166       self.assertTrue(
    167           all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval()))
    168       self.assertAllEqual(output[hashed_sparse].indices.eval(),
    169                           wire_tensor.indices.eval())
    170       self.assertAllEqual(output[hashed_sparse].dense_shape.eval(),
    171                           wire_tensor.dense_shape.eval())
    172 
    173   def testSparseColumnWithHashBucketWithDenseInputTensor(self):
    174     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
    175     wire_tensor = constant_op.constant(
    176         [["omar", "stringer"], ["marlo", "rick"]])
    177     features = {"wire": wire_tensor}
    178     output = feature_column_ops._Transformer(features).transform(hashed_sparse)
    179 
    180     with self.test_session():
    181       # While the input is a dense Tensor, the output should be a SparseTensor.
    182       self.assertIsInstance(output, sparse_tensor.SparseTensor)
    183       self.assertEqual(output.values.dtype, dtypes.int64)
    184       self.assertTrue(all(x < 10 and x >= 0 for x in output.values.eval()))
    185       self.assertAllEqual(output.indices.eval(),
    186                           [[0, 0], [0, 1], [1, 0], [1, 1]])
    187       self.assertAllEqual(output.dense_shape.eval(), [2, 2])
    188 
    189   def testEmbeddingColumn(self):
    190     wire_tensor = sparse_tensor.SparseTensor(
    191         values=["omar", "stringer", "marlo"],
    192         indices=[[0, 0], [1, 0], [1, 1]],
    193         dense_shape=[2, 2])
    194     features = {"wire": wire_tensor}
    195     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
    196     wire_embedding = feature_column.embedding_column(hashed_sparse, 10)
    197 
    198     # Test transform features.
    199     output = feature_column_ops.transform_features(
    200         features=features, feature_columns=[hashed_sparse, wire_embedding])
    201     # Check that features dict haven't changed
    202     self.assertEqual({"wire": wire_tensor}, features)
    203     self.assertEqual(len(output), 2)
    204     self.assertIn(hashed_sparse, output)
    205     self.assertIn(wire_embedding, output)
    206     with self.test_session():
    207       self.assertAllEqual(output[wire_embedding].indices.eval(),
    208                           wire_tensor.indices.eval())
    209       self.assertAllEqual(output[wire_embedding].dense_shape.eval(), [2, 2])
    210       self.assertAllEqual(output[wire_embedding].values.eval(),
    211                           output[hashed_sparse].values.eval())
    212 
    213   def testSparseColumnWithKeys(self):
    214     keys_sparse = feature_column.sparse_column_with_keys(
    215         "wire", ["marlo", "omar", "stringer"])
    216     wire_tensor = sparse_tensor.SparseTensor(
    217         values=["omar", "stringer", "marlo"],
    218         indices=[[0, 0], [1, 0], [1, 1]],
    219         dense_shape=[2, 2])
    220     features = {"wire": wire_tensor}
    221     # Test transform features.
    222     output = feature_column_ops.transform_features(
    223         features=features, feature_columns=[keys_sparse])
    224     self.assertEqual(len(output), 1)
    225     self.assertIn(keys_sparse, output)
    226     with self.test_session():
    227       lookup_ops.tables_initializer().run()
    228       self.assertEqual(output[keys_sparse].values.dtype, dtypes.int64)
    229       self.assertAllEqual(output[keys_sparse].values.eval(), [1, 2, 0])
    230       self.assertAllEqual(output[keys_sparse].indices.eval(),
    231                           wire_tensor.indices.eval())
    232       self.assertAllEqual(output[keys_sparse].dense_shape.eval(),
    233                           wire_tensor.dense_shape.eval())
    234 
    235   def testSparseColumnWithKeysWithDenseInputTensor(self):
    236     keys_sparse = feature_column.sparse_column_with_keys(
    237         "wire", ["marlo", "omar", "stringer", "rick"])
    238     wire_tensor = constant_op.constant(
    239         [["omar", "stringer"], ["marlo", "rick"]])
    240 
    241     features = {"wire": wire_tensor}
    242     output = feature_column_ops._Transformer(features).transform(keys_sparse)
    243 
    244     with self.test_session():
    245       lookup_ops.tables_initializer().run()
    246       # While the input is a dense Tensor, the output should be a SparseTensor.
    247       self.assertIsInstance(output, sparse_tensor.SparseTensor)
    248       self.assertEqual(output.dtype, dtypes.int64)
    249       self.assertAllEqual(output.values.eval(), [1, 2, 0, 3])
    250       self.assertAllEqual(output.indices.eval(),
    251                           [[0, 0], [0, 1], [1, 0], [1, 1]])
    252       self.assertAllEqual(output.dense_shape.eval(), [2, 2])
    253 
    254   def testSparseColumnWithHashBucket_IsIntegerized(self):
    255     hashed_sparse = feature_column.sparse_column_with_integerized_feature(
    256         "wire", 10)
    257     wire_tensor = sparse_tensor.SparseTensor(
    258         values=[100, 1, 25],
    259         indices=[[0, 0], [1, 0], [1, 1]],
    260         dense_shape=[2, 2])
    261     features = {"wire": wire_tensor}
    262     # Test transform features.
    263     output = feature_column_ops.transform_features(
    264         features=features, feature_columns=[hashed_sparse])
    265     self.assertEqual(len(output), 1)
    266     self.assertIn(hashed_sparse, output)
    267     with self.test_session():
    268       self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int32)
    269       self.assertTrue(
    270           all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval()))
    271       self.assertAllEqual(output[hashed_sparse].indices.eval(),
    272                           wire_tensor.indices.eval())
    273       self.assertAllEqual(output[hashed_sparse].dense_shape.eval(),
    274                           wire_tensor.dense_shape.eval())
    275 
    276   def testSparseColumnWithHashBucketWithDenseInputTensor_IsIntegerized(self):
    277     hashed_sparse = feature_column.sparse_column_with_integerized_feature(
    278         "wire", 10)
    279     # wire_tensor = tf.SparseTensor(values=[100, 1, 25],
    280     #                               indices=[[0, 0], [1, 0], [1, 1]],
    281     #                               dense_shape=[2, 2])
    282     wire_tensor = constant_op.constant([[100, 0], [1, 25]])
    283     features = {"wire": wire_tensor}
    284     output = feature_column_ops._Transformer(features).transform(hashed_sparse)
    285     with self.test_session():
    286       # While the input is a dense Tensor, the output should be a SparseTensor.
    287       self.assertIsInstance(output, sparse_tensor.SparseTensor)
    288       self.assertEqual(output.values.dtype, dtypes.int32)
    289       self.assertTrue(all(x < 10 and x >= 0 for x in output.values.eval()))
    290       self.assertAllEqual(output.indices.eval(),
    291                           [[0, 0], [0, 1], [1, 0], [1, 1]])
    292       self.assertAllEqual(output.dense_shape.eval(), [2, 2])
    293 
    294   def testWeightedSparseColumn(self):
    295     ids = feature_column.sparse_column_with_keys("ids",
    296                                                  ["marlo", "omar", "stringer"])
    297     ids_tensor = sparse_tensor.SparseTensor(
    298         values=["stringer", "stringer", "marlo"],
    299         indices=[[0, 0], [1, 0], [1, 1]],
    300         dense_shape=[2, 2])
    301     weighted_ids = feature_column.weighted_sparse_column(ids, "weights")
    302     weights_tensor = sparse_tensor.SparseTensor(
    303         values=[10.0, 20.0, 30.0],
    304         indices=[[0, 0], [1, 0], [1, 1]],
    305         dense_shape=[2, 2])
    306     features = {"ids": ids_tensor, "weights": weights_tensor}
    307     # Test transform features.
    308     output = feature_column_ops.transform_features(
    309         features=features, feature_columns=[weighted_ids])
    310     self.assertEqual(len(output), 1)
    311     self.assertIn(weighted_ids, output)
    312 
    313     with self.test_session():
    314       lookup_ops.tables_initializer().run()
    315       self.assertAllEqual(output[weighted_ids][0].dense_shape.eval(),
    316                           ids_tensor.dense_shape.eval())
    317       self.assertAllEqual(output[weighted_ids][0].indices.eval(),
    318                           ids_tensor.indices.eval())
    319       self.assertAllEqual(output[weighted_ids][0].values.eval(), [2, 2, 0])
    320       self.assertAllEqual(output[weighted_ids][1].dense_shape.eval(),
    321                           weights_tensor.dense_shape.eval())
    322       self.assertAllEqual(output[weighted_ids][1].indices.eval(),
    323                           weights_tensor.indices.eval())
    324       self.assertEqual(output[weighted_ids][1].values.dtype, dtypes.float32)
    325       self.assertAllEqual(output[weighted_ids][1].values.eval(),
    326                           weights_tensor.values.eval())
    327 
    328   def testSparseColumnWithVocabulary(self):
    329     vocabulary_file = os.path.join(self.get_temp_dir(), "movies.txt")
    330     with open(vocabulary_file, "w") as f:
    331       f.write("\n".join(["marlo", "omar", "stringer"]) + "\n")
    332     vocab_sparse = feature_column.sparse_column_with_vocabulary_file(
    333         "wire", vocabulary_file, vocab_size=3)
    334     wire_tensor = sparse_tensor.SparseTensor(
    335         values=["omar", "stringer", "marlo"],
    336         indices=[[0, 0], [1, 0], [1, 1]],
    337         dense_shape=[2, 2])
    338     features = {"wire": wire_tensor}
    339     output = feature_column_ops.transform_features(
    340         features=features, feature_columns=[vocab_sparse])
    341     self.assertEqual(len(output), 1)
    342     self.assertIn(vocab_sparse, output)
    343     with self.test_session():
    344       lookup_ops.tables_initializer().run()
    345       self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64)
    346       self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0])
    347       self.assertAllEqual(output[vocab_sparse].indices.eval(),
    348                           wire_tensor.indices.eval())
    349       self.assertAllEqual(output[vocab_sparse].dense_shape.eval(),
    350                           wire_tensor.dense_shape.eval())
    351 
    352   def testSparseColumnWithVocabularyWithDenseInputTensor(self):
    353     vocabulary_file = os.path.join(self.get_temp_dir(), "movies.txt")
    354     with open(vocabulary_file, "w") as f:
    355       f.write("\n".join(["marlo", "omar", "stringer"]) + "\n")
    356     vocab_sparse = feature_column.sparse_column_with_vocabulary_file(
    357         "wire", vocabulary_file, vocab_size=3)
    358     wire_tensor = constant_op.constant(
    359         [["omar", "stringer"], ["marlo", "omar"]])
    360     features = {"wire": wire_tensor}
    361     output = feature_column_ops.transform_features(
    362         features=features, feature_columns=[vocab_sparse])
    363     self.assertEqual(len(output), 1)
    364     self.assertIn(vocab_sparse, output)
    365     with self.test_session():
    366       lookup_ops.tables_initializer().run()
    367       self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64)
    368       self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0, 1])
    369       self.assertAllEqual(output[vocab_sparse].indices.eval(),
    370                           [[0, 0], [0, 1], [1, 0], [1, 1]])
    371       self.assertAllEqual(output[vocab_sparse].dense_shape.eval(), [2, 2])
    372 
    373   def testSparseIntColumnWithVocabulary(self):
    374     """Tests a sparse integer column with vocabulary."""
    375     vocabulary_file = os.path.join(self.get_temp_dir(), "courses.txt")
    376     with open(vocabulary_file, "w") as f:
    377       f.write("\n".join(["101", "201", "301"]) + "\n")
    378     vocab_sparse = feature_column.sparse_column_with_vocabulary_file(
    379         "wire", vocabulary_file, vocab_size=3, dtype=dtypes.int64)
    380     wire_tensor = sparse_tensor.SparseTensor(
    381         values=[201, 301, 101],
    382         indices=[[0, 0], [1, 0], [1, 1]],
    383         dense_shape=[2, 2])
    384     features = {"wire": wire_tensor}
    385     output = feature_column_ops.transform_features(
    386         features=features, feature_columns=[vocab_sparse])
    387     self.assertEqual(len(output), 1)
    388     self.assertIn(vocab_sparse, output)
    389     with self.test_session():
    390       lookup_ops.tables_initializer().run()
    391       self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64)
    392       self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0])
    393       self.assertAllEqual(output[vocab_sparse].indices.eval(),
    394                           wire_tensor.indices.eval())
    395       self.assertAllEqual(output[vocab_sparse].dense_shape.eval(),
    396                           wire_tensor.dense_shape.eval())
    397 
    398   def testSparseIntColumnWithVocabularyWithDenseInputTensor(self):
    399     """Tests a sparse integer column with vocabulary."""
    400     vocabulary_file = os.path.join(self.get_temp_dir(), "courses.txt")
    401     with open(vocabulary_file, "w") as f:
    402       f.write("\n".join(["101", "201", "301"]) + "\n")
    403     vocab_sparse = feature_column.sparse_column_with_vocabulary_file(
    404         "wire", vocabulary_file, vocab_size=3, dtype=dtypes.int64)
    405     wire_tensor = constant_op.constant([[201, 301], [101, 201]])
    406     features = {"wire": wire_tensor}
    407     output = feature_column_ops.transform_features(
    408         features=features, feature_columns=[vocab_sparse])
    409     self.assertEqual(len(output), 1)
    410     self.assertIn(vocab_sparse, output)
    411     with self.test_session():
    412       lookup_ops.tables_initializer().run()
    413       self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64)
    414       self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0, 1])
    415       self.assertAllEqual(output[vocab_sparse].indices.eval(),
    416                           [[0, 0], [0, 1], [1, 0], [1, 1]])
    417       self.assertAllEqual(output[vocab_sparse].dense_shape.eval(), [2, 2])
    418 
    419   def testCrossColumn(self):
    420     language = feature_column.sparse_column_with_hash_bucket(
    421         "language", hash_bucket_size=3)
    422     country = feature_column.sparse_column_with_hash_bucket(
    423         "country", hash_bucket_size=5)
    424     country_language = feature_column.crossed_column(
    425         [language, country], hash_bucket_size=15)
    426     features = {
    427         "language":
    428             sparse_tensor.SparseTensor(
    429                 values=["english", "spanish"],
    430                 indices=[[0, 0], [1, 0]],
    431                 dense_shape=[2, 1]),
    432         "country":
    433             sparse_tensor.SparseTensor(
    434                 values=["US", "SV"],
    435                 indices=[[0, 0], [1, 0]],
    436                 dense_shape=[2, 1])
    437     }
    438     # Test transform features.
    439     output = feature_column_ops.transform_features(
    440         features=features, feature_columns=[country_language])
    441     self.assertEqual(len(output), 1)
    442     self.assertIn(country_language, output)
    443     with self.test_session():
    444       self.assertEqual(output[country_language].values.dtype, dtypes.int64)
    445       self.assertTrue(
    446           all(x < 15 and x >= 0 for x in output[country_language].values.eval(
    447           )))
    448 
    449   def testCrossWithBucketizedColumn(self):
    450     price_bucket = feature_column.bucketized_column(
    451         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
    452     country = feature_column.sparse_column_with_hash_bucket(
    453         "country", hash_bucket_size=5)
    454     country_price = feature_column.crossed_column(
    455         [country, price_bucket], hash_bucket_size=15)
    456     features = {
    457         "price":
    458             constant_op.constant([[20.]]),
    459         "country":
    460             sparse_tensor.SparseTensor(
    461                 values=["US", "SV"],
    462                 indices=[[0, 0], [0, 1]],
    463                 dense_shape=[1, 2])
    464     }
    465     # Test transform features.
    466     output = feature_column_ops.transform_features(
    467         features=features, feature_columns=[country_price])
    468     self.assertEqual(len(output), 1)
    469     self.assertIn(country_price, output)
    470     with self.test_session():
    471       self.assertEqual(output[country_price].values.dtype, dtypes.int64)
    472       self.assertTrue(
    473           all(x < 15 and x >= 0 for x in output[country_price].values.eval()))
    474 
    475   def testCrossWithMultiDimensionBucketizedColumn(self):
    476     country = feature_column.sparse_column_with_hash_bucket(
    477         "country", hash_bucket_size=5)
    478     price_bucket = feature_column.bucketized_column(
    479         feature_column.real_valued_column("price", 2),
    480         boundaries=[0., 10., 100.])
    481     country_price = feature_column.crossed_column(
    482         [country, price_bucket], hash_bucket_size=1000)
    483 
    484     with ops.Graph().as_default():
    485       features = {
    486           "price":
    487               constant_op.constant([[20., 210.], [110., 50.], [-3., -30.]]),
    488           "country":
    489               sparse_tensor.SparseTensor(
    490                   values=["US", "SV", "US"],
    491                   indices=[[0, 0], [1, 0], [2, 0]],
    492                   dense_shape=[3, 2])
    493       }
    494       output, column_to_variable, _ = (
    495           feature_column_ops.weighted_sum_from_feature_columns(
    496               features, [country_price], num_outputs=1))
    497 
    498       weights = column_to_variable[country_price][0]
    499       grad = array_ops.squeeze(
    500           gradients_impl.gradients(output, weights)[0].values)
    501       with self.test_session():
    502         variables_lib.global_variables_initializer().run()
    503         self.assertEqual(len(grad.eval()), 6)
    504 
    505       # Test transform features.
    506       output = feature_column_ops.transform_features(
    507           features=features, feature_columns=[country_price])
    508       self.assertEqual(len(output), 1)
    509       self.assertIn(country_price, output)
    510 
    511   def testCrossWithCrossedColumn(self):
    512     price_bucket = feature_column.bucketized_column(
    513         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
    514     country = feature_column.sparse_column_with_hash_bucket(
    515         "country", hash_bucket_size=5)
    516     country_price = feature_column.crossed_column(
    517         [country, price_bucket], hash_bucket_size=15)
    518     wire = feature_column.sparse_column_with_hash_bucket("wire", 10)
    519     wire_country_price = feature_column.crossed_column(
    520         [wire, country_price], hash_bucket_size=15)
    521     features = {
    522         "price":
    523             constant_op.constant([[20.]]),
    524         "country":
    525             sparse_tensor.SparseTensor(
    526                 values=["US", "SV"],
    527                 indices=[[0, 0], [0, 1]],
    528                 dense_shape=[1, 2]),
    529         "wire":
    530             sparse_tensor.SparseTensor(
    531                 values=["omar", "stringer", "marlo"],
    532                 indices=[[0, 0], [0, 1], [0, 2]],
    533                 dense_shape=[1, 3])
    534     }
    535     # Test transform features.
    536     output = feature_column_ops.transform_features(
    537         features=features, feature_columns=[wire_country_price])
    538     self.assertEqual(len(output), 1)
    539     self.assertIn(wire_country_price, output)
    540     with self.test_session():
    541       self.assertEqual(output[wire_country_price].values.dtype, dtypes.int64)
    542       self.assertTrue(
    543           all(x < 15 and x >= 0 for x in output[wire_country_price].values.eval(
    544           )))
    545 
    546   def testIfFeatureTableContainsTransformationReturnIt(self):
    547     any_column = feature_column.sparse_column_with_hash_bucket("sparse", 10)
    548     features = {any_column: "any-thing-even-not-a-tensor"}
    549     output = feature_column_ops._Transformer(features).transform(any_column)
    550     self.assertEqual(output, "any-thing-even-not-a-tensor")
    551 
    552 
    553 class CreateInputLayersForDNNsTest(test.TestCase):
    554 
    555   def testFeatureColumnDictFails(self):
    556     real_valued = feature_column.real_valued_column("price")
    557     features = {"price": constant_op.constant([[20.], [110], [-3]])}
    558     with self.assertRaisesRegexp(
    559         ValueError,
    560         "Expected feature_columns to be iterable, found dict"):
    561       feature_column_ops.input_from_feature_columns(
    562           features, {"feature": real_valued})
    563 
    564   def testSparseTensorRealValuedColumn(self):
    565     var_len_sparse_real_valued_column = (
    566         feature_column._real_valued_var_len_column("rating", is_sparse=True))
    567     features = {
    568         "ids":
    569             sparse_tensor.SparseTensor(
    570                 values=["c", "b", "a"],
    571                 indices=[[0, 0], [1, 0], [2, 0]],
    572                 dense_shape=[3, 1]),
    573         "income":
    574             constant_op.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]),
    575         "rating":
    576             sparse_tensor.SparseTensor(
    577                 values=[3.5, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1])
    578     }
    579     with self.assertRaisesRegexp(
    580         ValueError,
    581         "dd"):
    582       feature_column_ops.input_from_feature_columns(
    583           features, [var_len_sparse_real_valued_column])
    584 
    585   def testAllDNNColumns(self):
    586     sparse_column = feature_column.sparse_column_with_keys(
    587         "ids", ["a", "b", "c", "unseen"])
    588     real_valued_column = feature_column.real_valued_column("income", 2)
    589     one_hot_column = feature_column.one_hot_column(sparse_column)
    590     embedding_column = feature_column.embedding_column(sparse_column, 10)
    591     features = {
    592         "ids":
    593             sparse_tensor.SparseTensor(
    594                 values=["c", "b", "a"],
    595                 indices=[[0, 0], [1, 0], [2, 0]],
    596                 dense_shape=[3, 1]),
    597         "income":
    598             constant_op.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]),
    599     }
    600     columns = [one_hot_column, embedding_column, real_valued_column]
    601     output = feature_column_ops.input_from_feature_columns(features, columns)
    602     output_core = fc_core.input_layer(features, columns)
    603     with self.test_session():
    604       variables_lib.global_variables_initializer().run()
    605       lookup_ops.tables_initializer().run()
    606       self.assertAllEqual(output.eval().shape, [3, 2 + 4 + 10])
    607       # Verify cross compatibility: Core builder output should equal to contrib.
    608       self.assertAllEqual(output.eval().shape, output_core.eval().shape)
    609 
    610   def testAllDNNColumnsWithColumnwiseOutputs(self):
    611     sparse_column = feature_column.sparse_column_with_keys(
    612         "ids", ["a", "b", "c", "unseen"])
    613     real_valued_column = feature_column.real_valued_column("income", 2)
    614     one_hot_column = feature_column.one_hot_column(sparse_column)
    615     embedding_column = feature_column.embedding_column(sparse_column, 10)
    616     features = {
    617         "ids":
    618             sparse_tensor.SparseTensor(
    619                 values=["c", "b", "a"],
    620                 indices=[[0, 0], [1, 0], [2, 0]],
    621                 dense_shape=[3, 1]),
    622         "income":
    623             constant_op.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]),
    624     }
    625     columns = [one_hot_column, embedding_column, real_valued_column]
    626     cols_to_outs = {}
    627     feature_column_ops.input_from_feature_columns(
    628         features, columns, cols_to_outs=cols_to_outs)
    629     with self.test_session():
    630       variables_lib.global_variables_initializer().run()
    631       lookup_ops.tables_initializer().run()
    632       for column in columns:
    633         self.assertTrue(column in cols_to_outs)
    634 
    635   def testRealValuedColumn(self):
    636     real_valued = feature_column.real_valued_column("price")
    637     features = {"price": constant_op.constant([[20.], [110], [-3]])}
    638     output = feature_column_ops.input_from_feature_columns(features,
    639                                                            [real_valued])
    640     with self.test_session():
    641       self.assertAllClose(output.eval(), features["price"].eval())
    642       # Verify cross compatibility: Core builder output should equal to contrib.
    643       self.assertAllClose(output.eval(),
    644                           fc_core.input_layer(features, [real_valued]).eval())
    645 
    646   def testRealValuedColumnWithMultiDimensions(self):
    647     real_valued = feature_column.real_valued_column("price", 2)
    648     features = {
    649         "price": constant_op.constant([[20., 10.], [110, 0.], [-3, 30]])
    650     }
    651     output = feature_column_ops.input_from_feature_columns(features,
    652                                                            [real_valued])
    653     with self.test_session():
    654       self.assertAllClose(output.eval(), features["price"].eval())
    655       # Verify cross compatibility: Core builder output should equal to contrib.
    656       self.assertAllClose(output.eval(),
    657                           fc_core.input_layer(features, [real_valued]).eval())
    658 
    659   def testRealValuedColumnDense(self):
    660     var_len_real_valued = feature_column._real_valued_var_len_column(
    661         "rating", default_value=-1)
    662     rating = np.array([[0., 1., 2., -1.],
    663                        [3., 4., 5., 6.]])
    664     features = {"rating": constant_op.constant(rating)}
    665     with self.test_session() as sess:
    666       output = sess.run(feature_column_ops.input_from_feature_columns(
    667           features, [var_len_real_valued]))
    668     self.assertAllClose(rating, output)
    669 
    670   def testRealValuedColumnTypeConversion(self):
    671     var_len_real_valued = feature_column._real_valued_var_len_column(
    672         "rating", default_value=-1)
    673     rating = np.array([[0, 1, 2, -1],
    674                        [3, 4, 5, 6]])
    675     features = {"rating": constant_op.constant(rating, dtype=dtypes.int64)}
    676     with self.test_session() as sess:
    677       output = sess.run(feature_column_ops.input_from_feature_columns(
    678           features, [var_len_real_valued]))
    679     self.assertAllClose(rating.astype(np.float32), output)
    680 
    681   def testRealValuedColumnWithNormalizer(self):
    682     real_valued = feature_column.real_valued_column(
    683         "price", normalizer=lambda x: x - 2)
    684     features = {"price": constant_op.constant([[20.], [110], [-3]])}
    685     output = feature_column_ops.input_from_feature_columns(features,
    686                                                            [real_valued])
    687     with self.test_session():
    688       self.assertAllClose(output.eval(), features["price"].eval() - 2)
    689       # Verify cross compatibility: Core builder output should equal to contrib.
    690       self.assertAllClose(output.eval(),
    691                           fc_core.input_layer(features, [real_valued]).eval())
    692 
    693   def testRealValuedColumnWithMultiDimensionsAndNormalizer(self):
    694     real_valued = feature_column.real_valued_column(
    695         "price", 2, normalizer=lambda x: x - 2)
    696     features = {
    697         "price": constant_op.constant([[20., 10.], [110, 0.], [-3, 30]])
    698     }
    699     output = feature_column_ops.input_from_feature_columns(features,
    700                                                            [real_valued])
    701     with self.test_session():
    702       self.assertAllClose(output.eval(), features["price"].eval() - 2)
    703       # Verify cross compatibility: Core builder output should equal to contrib.
    704       self.assertAllClose(output.eval(),
    705                           fc_core.input_layer(features, [real_valued]).eval())
    706 
    707   def testBucketizedColumnWithNormalizerSucceedsForDNN(self):
    708     bucket = feature_column.bucketized_column(
    709         feature_column.real_valued_column(
    710             "price", normalizer=lambda x: x - 15),
    711         boundaries=[0., 10., 100.])
    712     # buckets 2, 3, 0
    713     features = {"price": constant_op.constant([[20.], [110], [-3]])}
    714     output = feature_column_ops.input_from_feature_columns(features, [bucket])
    715     expected = [[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]
    716     with self.test_session():
    717       self.assertAllClose(output.eval(), expected)
    718       self.assertAllClose(output.eval(),
    719                           fc_core.input_layer(features, [bucket]).eval())
    720 
    721   def testBucketizedColumnWithMultiDimensionsSucceedsForDNN(self):
    722     bucket = feature_column.bucketized_column(
    723         feature_column.real_valued_column("price", 2),
    724         boundaries=[0., 10., 100.])
    725     # buckets [2, 3], [3, 2], [0, 0]. dimension = 2
    726     features = {
    727         "price": constant_op.constant([[20., 200], [110, 50], [-3, -3]])
    728     }
    729     output = feature_column_ops.input_from_feature_columns(features, [bucket])
    730     expected = [[0, 0, 1, 0, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 1, 0],
    731                 [1, 0, 0, 0, 1, 0, 0, 0]]
    732     with self.test_session():
    733       self.assertAllClose(output.eval(), expected)
    734       self.assertAllClose(output.eval(),
    735                           fc_core.input_layer(features, [bucket]).eval())
    736 
    737   def testOneHotColumnFromWeightedSparseColumnSucceedsForDNN(self):
    738     ids_column = feature_column.sparse_column_with_keys(
    739         "ids", ["a", "b", "c", "unseen"])
    740     ids_tensor = sparse_tensor.SparseTensor(
    741         values=["c", "b", "a", "c"],
    742         indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
    743         dense_shape=[3, 2])
    744     weighted_ids_column = feature_column.weighted_sparse_column(ids_column,
    745                                                                 "weights")
    746     weights_tensor = sparse_tensor.SparseTensor(
    747         values=[10.0, 20.0, 30.0, 40.0],
    748         indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
    749         dense_shape=[3, 2])
    750     features = {"ids": ids_tensor, "weights": weights_tensor}
    751     one_hot_column = feature_column.one_hot_column(weighted_ids_column)
    752     output = feature_column_ops.input_from_feature_columns(features,
    753                                                            [one_hot_column])
    754     output_core = fc_core.input_layer(features, [one_hot_column])
    755     with self.test_session():
    756       variables_lib.global_variables_initializer().run()
    757       lookup_ops.tables_initializer().run()
    758       self.assertAllEqual([[0, 0, 10., 0], [0, 20., 0, 0], [30., 0, 40., 0]],
    759                           output.eval())
    760       # Verify cross compatibility: Core builder output should equal to contrib.
    761       self.assertAllEqual(output.eval(), output_core.eval())
    762 
    763   def testOneHotColumnFromSparseColumnWithKeysSucceedsForDNN(self):
    764     ids_column = feature_column.sparse_column_with_keys(
    765         "ids", ["a", "b", "c", "unseen"])
    766     ids_tensor = sparse_tensor.SparseTensor(
    767         values=["c", "b", "a"],
    768         indices=[[0, 0], [1, 0], [2, 0]],
    769         dense_shape=[3, 1])
    770     one_hot_sparse = feature_column.one_hot_column(ids_column)
    771     features = {"ids": ids_tensor}
    772     output = feature_column_ops.input_from_feature_columns(features,
    773                                                            [one_hot_sparse])
    774     output_core = fc_core.input_layer(features, [one_hot_sparse])
    775 
    776     with self.test_session():
    777       variables_lib.global_variables_initializer().run()
    778       lookup_ops.tables_initializer().run()
    779       self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]],
    780                           output.eval())
    781       # Verify cross compatibility: Core builder output should equal to contrib.
    782       self.assertAllEqual(output.eval(), output_core.eval())
    783 
    784   def testOneHotColumnFromMultivalentSparseColumnWithKeysSucceedsForDNN(self):
    785     ids_column = feature_column.sparse_column_with_keys(
    786         "ids", ["a", "b", "c", "unseen"])
    787     ids_tensor = sparse_tensor.SparseTensor(
    788         values=["c", "b", "a", "c"],
    789         indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
    790         dense_shape=[3, 2])
    791     one_hot_sparse = feature_column.one_hot_column(ids_column)
    792     features = {"ids": ids_tensor}
    793     output = feature_column_ops.input_from_feature_columns(features,
    794                                                            [one_hot_sparse])
    795     output_core = fc_core.input_layer(features, [one_hot_sparse])
    796 
    797     with self.test_session():
    798       variables_lib.global_variables_initializer().run()
    799       lookup_ops.tables_initializer().run()
    800       self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]],
    801                           output.eval())
    802       # Verify cross compatibility: Core builder output should equal to contrib.
    803       self.assertAllEqual(output.eval(), output_core.eval())
    804 
    805   def testOneHotColumnFromSparseColumnWithIntegerizedFeaturePassesForDNN(self):
    806     ids_column = feature_column.sparse_column_with_integerized_feature(
    807         "ids", bucket_size=4)
    808     one_hot_sparse = feature_column.one_hot_column(ids_column)
    809     features = {
    810         "ids":
    811             sparse_tensor.SparseTensor(
    812                 values=[2, 1, 0, 2],
    813                 indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
    814                 dense_shape=[3, 2])
    815     }
    816     output = feature_column_ops.input_from_feature_columns(features,
    817                                                            [one_hot_sparse])
    818     output_core = fc_core.input_layer(features, [one_hot_sparse])
    819     with self.test_session():
    820       variables_lib.global_variables_initializer().run()
    821       self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]],
    822                           output.eval())
    823       # Verify cross compatibility: Core builder output should equal to contrib.
    824       self.assertAllEqual(output.eval(), output_core.eval())
    825 
    826   def testOneHotColumnFromSparseColumnWithHashBucketSucceedsForDNN(self):
    827     hashed_sparse = feature_column.sparse_column_with_hash_bucket("feat", 10)
    828     wire_tensor = sparse_tensor.SparseTensor(
    829         values=["a", "b", "c1", "c2"],
    830         indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
    831         dense_shape=[3, 2])
    832     features = {"feat": wire_tensor}
    833     one_hot_sparse = feature_column.one_hot_column(hashed_sparse)
    834     output = feature_column_ops.input_from_feature_columns(features,
    835                                                            [one_hot_sparse])
    836     output_core = fc_core.input_layer(features, [one_hot_sparse])
    837     with self.test_session():
    838       variables_lib.global_variables_initializer().run()
    839       lookup_ops.tables_initializer().run()
    840       self.assertAllEqual([3, 10], output.eval().shape)
    841       # Verify cross compatibility: Core builder output should equal to contrib.
    842       self.assertAllEqual(output.eval(), output_core.eval())
    843 
    844   def testEmbeddingColumnSucceedsForDNN(self):
    845     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
    846     wire_tensor = sparse_tensor.SparseTensor(
    847         values=["omar", "stringer", "marlo", "xx", "yy"],
    848         indices=[[0, 0], [1, 0], [1, 1], [2, 0], [3, 0]],
    849         dense_shape=[4, 2])
    850     features = {"wire": wire_tensor}
    851     embeded_sparse = feature_column.embedding_column(hashed_sparse, 10)
    852     output = feature_column_ops.input_from_feature_columns(features,
    853                                                            [embeded_sparse])
    854     output_core = fc_core.input_layer(features, [embeded_sparse])
    855     with self.test_session():
    856       variables_lib.global_variables_initializer().run()
    857       self.assertAllEqual(output.eval().shape, [4, 10])
    858       # Verify cross compatibility: Core builder output should equal to contrib.
    859       self.assertAllEqual(output.eval().shape, output_core.eval().shape)
    860 
    861   def testScatteredEmbeddingColumnSucceedsForDNN(self):
    862     wire_tensor = sparse_tensor.SparseTensor(
    863         values=["omar", "stringer", "marlo", "omar"],
    864         indices=[[0, 0], [1, 0], [1, 1], [2, 0]],
    865         dense_shape=[3, 2])
    866 
    867     features = {"wire": wire_tensor}
    868     # Big enough hash space so that hopefully there is no collision
    869     embedded_sparse = feature_column.scattered_embedding_column(
    870         "wire", 1000, 3, layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
    871     output = feature_column_ops.input_from_feature_columns(
    872         features, [embedded_sparse], weight_collections=["my_collection"])
    873     weights = ops.get_collection("my_collection")
    874     grad = gradients_impl.gradients(output, weights)
    875     # Calcuates the tensors calculated by FC core libs. Later, the values will
    876     # be compared with the contrib version.
    877     output_core = fc_core.input_layer(
    878         features, [embedded_sparse], weight_collections=["my_collection_core"])
    879     weights_core = ops.get_collection("my_collection_core")
    880     grad_core = gradients_impl.gradients(output_core, weights_core)
    881     with self.test_session():
    882       variables_lib.global_variables_initializer().run()
    883       gradient_values = []
    884       gradient_values_core = []
    885       # Collect the gradient from the different partitions (one in this test)
    886       for p in range(len(grad)):
    887         gradient_values.extend(grad[p].values.eval())
    888         gradient_values_core.extend(grad_core[p].values.eval())
    889       gradient_values.sort()
    890       gradient_values_core.sort()
    891       self.assertAllEqual(gradient_values, [0.5] * 6 + [2] * 3)
    892       self.assertAllEqual(gradient_values, gradient_values_core)
    893 
    894   def testEmbeddingColumnWithInitializerSucceedsForDNN(self):
    895     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
    896     wire_tensor = sparse_tensor.SparseTensor(
    897         values=["omar", "stringer", "marlo"],
    898         indices=[[0, 0], [1, 0], [1, 1]],
    899         dense_shape=[2, 2])
    900     features = {"wire": wire_tensor}
    901     init_value = 133.7
    902     embeded_sparse = feature_column.embedding_column(
    903         hashed_sparse,
    904         10,
    905         initializer=init_ops.constant_initializer(init_value))
    906     output = feature_column_ops.input_from_feature_columns(features,
    907                                                            [embeded_sparse])
    908     output_core = fc_core.input_layer(features, [embeded_sparse])
    909 
    910     with self.test_session():
    911       variables_lib.global_variables_initializer().run()
    912       output_eval = output.eval()
    913       self.assertAllEqual(output_eval.shape, [2, 10])
    914       self.assertAllClose(output_eval, np.tile(init_value, [2, 10]))
    915       # Verify cross compatibility: Core builder output should equal to contrib.
    916       self.assertAllEqual(output.eval(), output_core.eval())
    917 
    918   def testEmbeddingColumnWithMultipleInitializersFails(self):
    919     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
    920     wire_tensor = sparse_tensor.SparseTensor(
    921         values=["omar", "stringer", "marlo"],
    922         indices=[[0, 0], [1, 0], [1, 1]],
    923         dense_shape=[2, 2])
    924     features = {"wire": wire_tensor}
    925     embedded_sparse = feature_column.embedding_column(
    926         hashed_sparse,
    927         10,
    928         initializer=init_ops.truncated_normal_initializer(
    929             mean=42, stddev=1337))
    930     embedded_sparse_alternate = feature_column.embedding_column(
    931         hashed_sparse,
    932         10,
    933         initializer=init_ops.truncated_normal_initializer(
    934             mean=1337, stddev=42))
    935 
    936     # Makes sure that trying to use different initializers with the same
    937     # embedding column explicitly fails.
    938     with self.test_session():
    939       with self.assertRaisesRegexp(
    940           ValueError,
    941           "Duplicate feature column key found for column: wire_embedding"):
    942         feature_column_ops.input_from_feature_columns(
    943             features, [embedded_sparse, embedded_sparse_alternate])
    944 
    945   def testEmbeddingColumnWithWeightedSparseColumnSucceedsForDNN(self):
    946     """Tests DNN input with embedded weighted sparse column."""
    947     ids = feature_column.sparse_column_with_keys("ids",
    948                                                  ["marlo", "omar", "stringer"])
    949     ids_tensor = sparse_tensor.SparseTensor(
    950         values=["stringer", "stringer", "marlo"],
    951         indices=[[0, 0], [1, 0], [1, 1]],
    952         dense_shape=[2, 2])
    953     weighted_ids = feature_column.weighted_sparse_column(ids, "weights")
    954     weights_tensor = sparse_tensor.SparseTensor(
    955         values=[10.0, 20.0, 30.0],
    956         indices=[[0, 0], [1, 0], [1, 1]],
    957         dense_shape=[2, 2])
    958     features = {"ids": ids_tensor, "weights": weights_tensor}
    959     embeded_sparse = feature_column.embedding_column(weighted_ids, 10)
    960     output = feature_column_ops.input_from_feature_columns(features,
    961                                                            [embeded_sparse])
    962     output_core = fc_core.input_layer(features, [embeded_sparse])
    963 
    964     with self.test_session():
    965       variables_lib.global_variables_initializer().run()
    966       lookup_ops.tables_initializer().run()
    967       self.assertAllEqual(output.eval().shape, [2, 10])
    968       # Verify cross compatibility: Core builder output should equal to contrib.
    969       self.assertAllEqual(output.eval().shape, output_core.eval().shape)
    970 
    971   def testEmbeddingColumnWithIntegerWeightedSparseColumnSucceedsForDNN(self):
    972     """Same as the previous test, but with integer weights."""
    973     ids = feature_column.sparse_column_with_keys("ids",
    974                                                  ["marlo", "omar", "stringer"])
    975     ids_tensor = sparse_tensor.SparseTensor(
    976         values=["stringer", "stringer", "marlo"],
    977         indices=[[0, 0], [1, 0], [1, 1]],
    978         dense_shape=[2, 2])
    979     weighted_ids = feature_column.weighted_sparse_column(
    980         ids, "weights", dtype=dtypes.int32)
    981     weights_tensor = sparse_tensor.SparseTensor(
    982         values=constant_op.constant([10, 20, 30], dtype=dtypes.int32),
    983         indices=[[0, 0], [1, 0], [1, 1]],
    984         dense_shape=[2, 2])
    985     features = {"ids": ids_tensor, "weights": weights_tensor}
    986     embeded_sparse = feature_column.embedding_column(weighted_ids, 10)
    987     output = feature_column_ops.input_from_feature_columns(features,
    988                                                            [embeded_sparse])
    989     with self.test_session():
    990       variables_lib.global_variables_initializer().run()
    991       lookup_ops.tables_initializer().run()
    992       self.assertAllEqual(output.eval().shape, [2, 10])
    993 
    994   def testEmbeddingColumnWithCrossedColumnSucceedsForDNN(self):
    995     a = feature_column.sparse_column_with_hash_bucket(
    996         "aaa", hash_bucket_size=100)
    997     b = feature_column.sparse_column_with_hash_bucket(
    998         "bbb", hash_bucket_size=100)
    999     crossed = feature_column.crossed_column(set([a, b]), hash_bucket_size=10000)
   1000     wire_tensor = sparse_tensor.SparseTensor(
   1001         values=["omar", "stringer", "marlo"],
   1002         indices=[[0, 0], [1, 0], [1, 1]],
   1003         dense_shape=[2, 2])
   1004     features = {"aaa": wire_tensor, "bbb": wire_tensor}
   1005     embeded_sparse = feature_column.embedding_column(crossed, 10)
   1006     output = feature_column_ops.input_from_feature_columns(features,
   1007                                                            [embeded_sparse])
   1008     with self.test_session():
   1009       variables_lib.global_variables_initializer().run()
   1010       self.assertAllEqual(output.eval().shape, [2, 10])
   1011 
   1012   def testSparseColumnFailsForDNN(self):
   1013     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1014     wire_tensor = sparse_tensor.SparseTensor(
   1015         values=["omar", "stringer", "marlo"],
   1016         indices=[[0, 0], [1, 0], [1, 1]],
   1017         dense_shape=[2, 2])
   1018     features = {"wire": wire_tensor}
   1019     with self.test_session():
   1020       with self.assertRaisesRegexp(
   1021           ValueError, "Error creating input layer for column: wire"):
   1022         variables_lib.global_variables_initializer().run()
   1023         feature_column_ops.input_from_feature_columns(features, [hashed_sparse])
   1024 
   1025   def testWeightedSparseColumnFailsForDNN(self):
   1026     ids = feature_column.sparse_column_with_keys("ids",
   1027                                                  ["marlo", "omar", "stringer"])
   1028     ids_tensor = sparse_tensor.SparseTensor(
   1029         values=["stringer", "stringer", "marlo"],
   1030         indices=[[0, 0], [1, 0], [1, 1]],
   1031         dense_shape=[2, 2])
   1032     weighted_ids = feature_column.weighted_sparse_column(ids, "weights")
   1033     weights_tensor = sparse_tensor.SparseTensor(
   1034         values=[10.0, 20.0, 30.0],
   1035         indices=[[0, 0], [1, 0], [1, 1]],
   1036         dense_shape=[2, 2])
   1037     features = {"ids": ids_tensor, "weights": weights_tensor}
   1038     with self.test_session():
   1039       with self.assertRaisesRegexp(
   1040           ValueError,
   1041           "Error creating input layer for column: ids_weighted_by_weights"):
   1042         lookup_ops.tables_initializer().run()
   1043         feature_column_ops.input_from_feature_columns(features, [weighted_ids])
   1044 
   1045   def testCrossedColumnFailsForDNN(self):
   1046     a = feature_column.sparse_column_with_hash_bucket(
   1047         "aaa", hash_bucket_size=100)
   1048     b = feature_column.sparse_column_with_hash_bucket(
   1049         "bbb", hash_bucket_size=100)
   1050     crossed = feature_column.crossed_column(set([a, b]), hash_bucket_size=10000)
   1051     wire_tensor = sparse_tensor.SparseTensor(
   1052         values=["omar", "stringer", "marlo"],
   1053         indices=[[0, 0], [1, 0], [1, 1]],
   1054         dense_shape=[2, 2])
   1055     features = {"aaa": wire_tensor, "bbb": wire_tensor}
   1056     with self.test_session():
   1057       with self.assertRaisesRegexp(
   1058           ValueError, "Error creating input layer for column: aaa_X_bbb"):
   1059         variables_lib.global_variables_initializer().run()
   1060         feature_column_ops.input_from_feature_columns(features, [crossed])
   1061 
   1062   def testDeepColumnsSucceedForDNN(self):
   1063     real_valued = feature_column.real_valued_column("income", 3)
   1064     bucket = feature_column.bucketized_column(
   1065         feature_column.real_valued_column("price", 2),
   1066         boundaries=[0., 10., 100.])
   1067     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1068     features = {
   1069         "income":
   1070             constant_op.constant([[20., 10, -5], [110, 0, -7], [-3, 30, 50]]),
   1071         "price":
   1072             constant_op.constant([[20., 200], [110, 2], [-20, -30]]),
   1073         "wire":
   1074             sparse_tensor.SparseTensor(
   1075                 values=["omar", "stringer", "marlo"],
   1076                 indices=[[0, 0], [1, 0], [2, 0]],
   1077                 dense_shape=[3, 1])
   1078     }
   1079     embeded_sparse = feature_column.embedding_column(
   1080         hashed_sparse, 10, initializer=init_ops.constant_initializer(133.7))
   1081     output = feature_column_ops.input_from_feature_columns(
   1082         features, [real_valued, bucket, embeded_sparse])
   1083     with self.test_session():
   1084       variables_lib.global_variables_initializer().run()
   1085       # size of output = 3 (real_valued) + 2 * 4 (bucket) + 10 (embedding) = 21
   1086       self.assertAllEqual(output.eval().shape, [3, 21])
   1087 
   1088   def testEmbeddingColumnForDNN(self):
   1089     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1090     wire_tensor = sparse_tensor.SparseTensor(
   1091         values=["omar", "stringer", "marlo"],
   1092         indices=[[0, 0], [1, 0], [1, 1]],
   1093         dense_shape=[3, 2])
   1094     features = {"wire": wire_tensor}
   1095     embeded_sparse = feature_column.embedding_column(
   1096         hashed_sparse,
   1097         1,
   1098         combiner="sum",
   1099         initializer=init_ops.ones_initializer())
   1100     output = feature_column_ops.input_from_feature_columns(features,
   1101                                                            [embeded_sparse])
   1102     with self.test_session():
   1103       variables_lib.global_variables_initializer().run()
   1104       # score: (number of values)
   1105       self.assertAllEqual(output.eval(), [[1.], [2.], [0.]])
   1106 
   1107   def testEmbeddingColumnWithMaxNormForDNN(self):
   1108     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1109     wire_tensor = sparse_tensor.SparseTensor(
   1110         values=["omar", "stringer", "marlo"],
   1111         indices=[[0, 0], [1, 0], [1, 1]],
   1112         dense_shape=[3, 2])
   1113     features = {"wire": wire_tensor}
   1114     embedded_sparse = feature_column.embedding_column(
   1115         hashed_sparse,
   1116         1,
   1117         combiner="sum",
   1118         initializer=init_ops.ones_initializer(),
   1119         max_norm=0.5)
   1120     output = feature_column_ops.input_from_feature_columns(features,
   1121                                                            [embedded_sparse])
   1122     with self.test_session():
   1123       variables_lib.global_variables_initializer().run()
   1124       # score: (number of values * 0.5)
   1125       self.assertAllClose(output.eval(), [[0.5], [1.], [0.]])
   1126 
   1127   def testEmbeddingColumnWithWeightedSparseColumnForDNN(self):
   1128     ids = feature_column.sparse_column_with_keys("ids",
   1129                                                  ["marlo", "omar", "stringer"])
   1130     ids_tensor = sparse_tensor.SparseTensor(
   1131         values=["stringer", "stringer", "marlo"],
   1132         indices=[[0, 0], [1, 0], [1, 1]],
   1133         dense_shape=[3, 2])
   1134     weighted_ids = feature_column.weighted_sparse_column(ids, "weights")
   1135     weights_tensor = sparse_tensor.SparseTensor(
   1136         values=[10.0, 20.0, 30.0],
   1137         indices=[[0, 0], [1, 0], [1, 1]],
   1138         dense_shape=[3, 2])
   1139     features = {"ids": ids_tensor, "weights": weights_tensor}
   1140     embeded_sparse = feature_column.embedding_column(
   1141         weighted_ids,
   1142         1,
   1143         combiner="sum",
   1144         initializer=init_ops.ones_initializer())
   1145     output = feature_column_ops.input_from_feature_columns(features,
   1146                                                            [embeded_sparse])
   1147     with self.test_session():
   1148       variables_lib.global_variables_initializer().run()
   1149       lookup_ops.tables_initializer().run()
   1150       # score: (sum of weights)
   1151       self.assertAllEqual(output.eval(), [[10.], [50.], [0.]])
   1152 
   1153   def testInputLayerWithCollectionsForDNN(self):
   1154     real_valued = feature_column.real_valued_column("price")
   1155     bucket = feature_column.bucketized_column(
   1156         real_valued, boundaries=[0., 10., 100.])
   1157     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1158     features = {
   1159         "price":
   1160             constant_op.constant([[20.], [110], [-3]]),
   1161         "wire":
   1162             sparse_tensor.SparseTensor(
   1163                 values=["omar", "stringer", "marlo"],
   1164                 indices=[[0, 0], [1, 0], [2, 0]],
   1165                 dense_shape=[3, 1])
   1166     }
   1167     embeded_sparse = feature_column.embedding_column(hashed_sparse, 10)
   1168     feature_column_ops.input_from_feature_columns(
   1169         features, [real_valued, bucket, embeded_sparse],
   1170         weight_collections=["my_collection"])
   1171     weights = ops.get_collection("my_collection")
   1172     # one variable for embeded sparse
   1173     self.assertEqual(1, len(weights))
   1174 
   1175   def testInputLayerWithTrainableArgForDNN(self):
   1176     real_valued = feature_column.real_valued_column("price")
   1177     bucket = feature_column.bucketized_column(
   1178         real_valued, boundaries=[0., 10., 100.])
   1179     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1180     features = {
   1181         "price":
   1182             constant_op.constant([[20.], [110], [-3]]),
   1183         "wire":
   1184             sparse_tensor.SparseTensor(
   1185                 values=["omar", "stringer", "marlo"],
   1186                 indices=[[0, 0], [1, 0], [2, 0]],
   1187                 dense_shape=[3, 1])
   1188     }
   1189     embeded_sparse = feature_column.embedding_column(hashed_sparse, 10)
   1190     feature_column_ops.input_from_feature_columns(
   1191         features, [real_valued, bucket, embeded_sparse],
   1192         weight_collections=["my_collection"],
   1193         trainable=False)
   1194     # There should not be any trainable variables
   1195     self.assertEqual(0, len(variables_lib.trainable_variables()))
   1196 
   1197     feature_column_ops.input_from_feature_columns(
   1198         features, [real_valued, bucket, embeded_sparse],
   1199         weight_collections=["my_collection"],
   1200         trainable=True)
   1201     # There should one trainable variable for embeded sparse
   1202     self.assertEqual(1, len(variables_lib.trainable_variables()))
   1203 
   1204   def testInputLayerWithNonTrainableEmbeddingForDNN(self):
   1205     sparse_1 = feature_column.sparse_column_with_hash_bucket("wire_1", 10)
   1206     sparse_2 = feature_column.sparse_column_with_hash_bucket("wire_2", 10)
   1207     features = {
   1208         "wire_1":
   1209             sparse_tensor.SparseTensor(
   1210                 values=["omar", "stringer", "marlo"],
   1211                 indices=[[0, 0], [1, 0], [2, 0]],
   1212                 dense_shape=[3, 1]),
   1213         "wire_2":
   1214             sparse_tensor.SparseTensor(
   1215                 values=["jack", "jill"],
   1216                 indices=[[0, 0], [1, 0]],
   1217                 dense_shape=[4, 1])
   1218     }
   1219     dims_1 = 10
   1220     init_1 = 3.14
   1221     embeded_1 = feature_column.embedding_column(
   1222         sparse_1, dims_1, initializer=init_ops.constant_initializer(init_1),
   1223         trainable=False)
   1224     output_1 = feature_column_ops.input_from_feature_columns(
   1225         features, [embeded_1])
   1226     # There should be no trainable variables for sparse_1
   1227     self.assertEqual(0, len(variables_lib.trainable_variables()))
   1228 
   1229     dims_2 = 7
   1230     init_2 = 6.14
   1231     embeded_2 = feature_column.embedding_column(
   1232         sparse_2, dims_2, initializer=init_ops.constant_initializer(init_2),
   1233         trainable=True)
   1234     output_2 = feature_column_ops.input_from_feature_columns(
   1235         features, [embeded_2])
   1236     # There should be one trainable variables for sparse_2
   1237     self.assertEqual(1, len(variables_lib.trainable_variables()))
   1238 
   1239     with self.test_session():
   1240       variables_lib.global_variables_initializer().run()
   1241       output_1_eval = output_1.eval()
   1242       output_2_eval = output_2.eval()
   1243       self.assertAllEqual(output_1_eval.shape, [3, dims_1])
   1244       self.assertAllClose(output_1_eval, np.tile(init_1, [3, dims_1]))
   1245       self.assertAllEqual(output_2_eval.shape, [4, dims_2])
   1246       self.assertAllClose(output_2_eval, np.concatenate(
   1247           (np.tile(init_2, [2, dims_2]), np.tile(0, [2, dims_2]))))
   1248 
   1249 
   1250 class SequenceInputFromFeatureColumnTest(test.TestCase):
   1251 
   1252   def testSupportedColumns(self):
   1253     measurement = feature_column.real_valued_column("measurements")
   1254     country = feature_column.sparse_column_with_hash_bucket("country", 100)
   1255     pets = feature_column.sparse_column_with_hash_bucket("pets", 100)
   1256     ids = feature_column.sparse_column_with_integerized_feature("id", 100)
   1257 
   1258     country_x_pets = feature_column.crossed_column([country, pets], 100)
   1259     country_x_pets_onehot = feature_column.one_hot_column(country_x_pets)
   1260     bucketized_measurement = feature_column.bucketized_column(measurement,
   1261                                                               [.25, .5, .75])
   1262     embedded_id = feature_column.embedding_column(ids, 100)
   1263 
   1264     # `_BucketizedColumn` is not supported.
   1265     self.assertRaisesRegexp(
   1266         ValueError,
   1267         "FeatureColumn type _BucketizedColumn is not currently supported",
   1268         feature_column_ops.sequence_input_from_feature_columns, {},
   1269         [measurement, bucketized_measurement])
   1270 
   1271     # `_CrossedColumn` is not supported.
   1272     self.assertRaisesRegexp(
   1273         ValueError,
   1274         "FeatureColumn type _CrossedColumn is not currently supported",
   1275         feature_column_ops.sequence_input_from_feature_columns, {},
   1276         [embedded_id, country_x_pets])
   1277 
   1278     # `country_x_pets_onehot` depends on a `_CrossedColumn` which is forbidden.
   1279     self.assertRaisesRegexp(
   1280         ValueError, "Column country_X_pets .* _CrossedColumn",
   1281         feature_column_ops.sequence_input_from_feature_columns, {},
   1282         [embedded_id, country_x_pets_onehot])
   1283 
   1284   def testRealValuedColumn(self):
   1285     batch_size = 4
   1286     sequence_length = 8
   1287     dimension = 3
   1288 
   1289     np.random.seed(1111)
   1290     measurement_input = np.random.rand(batch_size, sequence_length, dimension)
   1291     measurement_column = feature_column.real_valued_column("measurements")
   1292     columns_to_tensors = {
   1293         "measurements": constant_op.constant(measurement_input)
   1294     }
   1295     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1296         columns_to_tensors, [measurement_column])
   1297 
   1298     with self.test_session() as sess:
   1299       model_inputs = sess.run(model_input_tensor)
   1300     self.assertAllClose(measurement_input, model_inputs)
   1301 
   1302   def testRealValuedVarLenColumn(self):
   1303     var_len_real_valued = feature_column._real_valued_var_len_column(
   1304         "rating", default_value=-1)
   1305     rating = np.array([[0., 1., 2., -1.],
   1306                        [3., 4., 5., 6.]])
   1307     features = {"rating": constant_op.constant(rating)}
   1308     with self.test_session() as sess:
   1309       output = sess.run(
   1310           feature_column_ops.sequence_input_from_feature_columns(
   1311               features, [var_len_real_valued]))
   1312     reshaped_rating = np.reshape(rating, [2, 4, 1])
   1313     self.assertAllClose(reshaped_rating, output)
   1314 
   1315   def testRealValuedColumnWithExtraDimensions(self):
   1316     batch_size = 4
   1317     sequence_length = 8
   1318     dimensions = [3, 4, 5]
   1319 
   1320     np.random.seed(2222)
   1321     measurement_input = np.random.rand(batch_size, sequence_length, *dimensions)
   1322     measurement_column = feature_column.real_valued_column("measurements")
   1323     columns_to_tensors = {
   1324         "measurements": constant_op.constant(measurement_input)
   1325     }
   1326     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1327         columns_to_tensors, [measurement_column])
   1328 
   1329     expected_shape = [batch_size, sequence_length, np.prod(dimensions)]
   1330     reshaped_measurements = np.reshape(measurement_input, expected_shape)
   1331 
   1332     with self.test_session() as sess:
   1333       model_inputs = sess.run(model_input_tensor)
   1334 
   1335     self.assertAllClose(reshaped_measurements, model_inputs)
   1336 
   1337   def testRealValuedColumnWithNormalizer(self):
   1338     batch_size = 4
   1339     sequence_length = 8
   1340     dimension = 3
   1341     normalizer = lambda x: x - 2
   1342 
   1343     np.random.seed(3333)
   1344     measurement_input = np.random.rand(batch_size, sequence_length, dimension)
   1345     measurement_column = feature_column.real_valued_column(
   1346         "measurements", normalizer=normalizer)
   1347     columns_to_tensors = {
   1348         "measurements": constant_op.constant(measurement_input)
   1349     }
   1350     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1351         columns_to_tensors, [measurement_column])
   1352 
   1353     with self.test_session() as sess:
   1354       model_inputs = sess.run(model_input_tensor)
   1355     self.assertAllClose(normalizer(measurement_input), model_inputs)
   1356 
   1357   def testRealValuedColumnWithMultiDimensionsAndNormalizer(self):
   1358     batch_size = 4
   1359     sequence_length = 8
   1360     dimensions = [3, 4, 5]
   1361     normalizer = lambda x: x / 2.0
   1362 
   1363     np.random.seed(1234)
   1364     measurement_input = np.random.rand(batch_size, sequence_length, *dimensions)
   1365     measurement_column = feature_column.real_valued_column(
   1366         "measurements", normalizer=normalizer)
   1367     columns_to_tensors = {
   1368         "measurements": constant_op.constant(measurement_input)
   1369     }
   1370     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1371         columns_to_tensors, [measurement_column])
   1372 
   1373     expected_shape = [batch_size, sequence_length, np.prod(dimensions)]
   1374     reshaped_measurements = np.reshape(measurement_input, expected_shape)
   1375 
   1376     with self.test_session() as sess:
   1377       model_inputs = sess.run(model_input_tensor)
   1378 
   1379     self.assertAllClose(normalizer(reshaped_measurements), model_inputs)
   1380 
   1381   def testOneHotColumnFromSparseColumnWithKeys(self):
   1382     ids_tensor = sparse_tensor.SparseTensor(
   1383         values=["c", "b",
   1384                 "a", "c", "b",
   1385                 "b"],
   1386         indices=[[0, 0, 0], [0, 1, 0],
   1387                  [1, 0, 0], [1, 0, 1], [1, 1, 0],
   1388                  [3, 2, 0]],
   1389         dense_shape=[4, 3, 2])
   1390 
   1391     ids_column = feature_column.sparse_column_with_keys(
   1392         "ids", ["a", "b", "c", "unseen"])
   1393     one_hot_column = feature_column.one_hot_column(ids_column)
   1394     columns_to_tensors = {"ids": ids_tensor}
   1395     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1396         columns_to_tensors, [one_hot_column])
   1397 
   1398     with self.test_session() as sess:
   1399       variables_lib.global_variables_initializer().run()
   1400       lookup_ops.tables_initializer().run()
   1401       model_input = sess.run(model_input_tensor)
   1402 
   1403     expected_input_shape = np.array([4, 3, 4])
   1404     expected_model_input = np.array(
   1405         [[[0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 0]],
   1406          [[1, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 0]],
   1407          [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
   1408          [[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0]]],
   1409         dtype=np.float32)
   1410 
   1411     self.assertAllEqual(expected_input_shape, model_input.shape)
   1412     self.assertAllClose(expected_model_input, model_input)
   1413 
   1414   def testOneHotColumnFromSparseColumnWithHashBucket(self):
   1415     hash_buckets = 10
   1416     ids_tensor = sparse_tensor.SparseTensor(
   1417         values=["c", "b",
   1418                 "a", "c", "b",
   1419                 "b"],
   1420         indices=[[0, 0, 0], [0, 1, 0],
   1421                  [1, 0, 0], [1, 0, 1], [1, 1, 0],
   1422                  [3, 2, 0]],
   1423         dense_shape=[4, 3, 2])
   1424 
   1425     hashed_ids_column = feature_column.sparse_column_with_hash_bucket(
   1426         "ids", hash_buckets)
   1427     one_hot_column = feature_column.one_hot_column(hashed_ids_column)
   1428     columns_to_tensors = {"ids": ids_tensor}
   1429     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1430         columns_to_tensors, [one_hot_column])
   1431 
   1432     with self.test_session() as sess:
   1433       variables_lib.global_variables_initializer().run()
   1434       lookup_ops.tables_initializer().run()
   1435       model_input = sess.run(model_input_tensor)
   1436 
   1437     expected_input_shape = np.array([4, 3, hash_buckets])
   1438     self.assertAllEqual(expected_input_shape, model_input.shape)
   1439 
   1440   def testEmbeddingColumn(self):
   1441     hash_buckets = 10
   1442     embedding_dimension = 5
   1443     ids_tensor = sparse_tensor.SparseTensor(
   1444         values=["c", "b",
   1445                 "a", "c", "b",
   1446                 "b"],
   1447         indices=[[0, 0, 0], [0, 1, 0],
   1448                  [1, 0, 0], [1, 0, 1], [1, 1, 0],
   1449                  [3, 2, 0]],
   1450         dense_shape=[4, 3, 2])
   1451 
   1452     expected_input_shape = np.array([4, 3, embedding_dimension])
   1453 
   1454     hashed_ids_column = feature_column.sparse_column_with_hash_bucket(
   1455         "ids", hash_buckets)
   1456     embedded_column = feature_column.embedding_column(hashed_ids_column,
   1457                                                       embedding_dimension)
   1458     columns_to_tensors = {"ids": ids_tensor}
   1459     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1460         columns_to_tensors, [embedded_column])
   1461 
   1462     with self.test_session() as sess:
   1463       variables_lib.global_variables_initializer().run()
   1464       lookup_ops.tables_initializer().run()
   1465       model_input = sess.run(model_input_tensor)
   1466 
   1467     self.assertAllEqual(expected_input_shape, model_input.shape)
   1468 
   1469   def testEmbeddingColumnWithAutoReshape(self):
   1470     hash_buckets = 10
   1471     embedding_dimension = 5
   1472     ids_tensor = sparse_tensor.SparseTensor(
   1473         values=["c", "b",
   1474                 "a", "c", "b",
   1475                 "b"],
   1476         indices=[[0, 0], [0, 1],
   1477                  [1, 0], [1, 1], [1, 2],
   1478                  [3, 2]],
   1479         dense_shape=[4, 3])
   1480 
   1481     expected_input_shape = np.array([4, 3, embedding_dimension])
   1482 
   1483     hashed_ids_column = feature_column.sparse_column_with_hash_bucket(
   1484         "ids", hash_buckets)
   1485     embedded_column = feature_column.embedding_column(hashed_ids_column,
   1486                                                       embedding_dimension)
   1487     columns_to_tensors = {"ids": ids_tensor}
   1488     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1489         columns_to_tensors, [embedded_column])
   1490 
   1491     with self.test_session() as sess:
   1492       variables_lib.global_variables_initializer().run()
   1493       lookup_ops.tables_initializer().run()
   1494       model_input = sess.run(model_input_tensor)
   1495 
   1496     self.assertAllEqual(expected_input_shape, model_input.shape)
   1497 
   1498   def testEmbeddingColumnGradient(self):
   1499     hash_buckets = 1000
   1500     embedding_dimension = 3
   1501     ids_tensor = sparse_tensor.SparseTensor(
   1502         values=["c", "b",
   1503                 "a", "c", "b",
   1504                 "b"],
   1505         indices=[[0, 0, 0], [0, 1, 0],
   1506                  [1, 0, 0], [1, 0, 1], [1, 1, 0],
   1507                  [3, 2, 0]],
   1508         dense_shape=[4, 3, 2])
   1509 
   1510     hashed_ids_column = feature_column.sparse_column_with_hash_bucket(
   1511         "ids", hash_buckets)
   1512     embedded_column = feature_column.embedding_column(
   1513         hashed_ids_column, embedding_dimension, combiner="sum")
   1514     columns_to_tensors = {"ids": ids_tensor}
   1515     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1516         columns_to_tensors, [embedded_column],
   1517         weight_collections=["my_collection"])
   1518     embedding_weights = ops.get_collection("my_collection")
   1519     gradient_tensor = gradients_impl.gradients(model_input_tensor,
   1520                                                embedding_weights)
   1521     with self.test_session() as sess:
   1522       variables_lib.global_variables_initializer().run()
   1523       lookup_ops.tables_initializer().run()
   1524       model_input, gradients = sess.run([model_input_tensor, gradient_tensor])
   1525 
   1526     expected_input_shape = [4, 3, embedding_dimension]
   1527     self.assertAllEqual(expected_input_shape, model_input.shape)
   1528 
   1529     # `ids_tensor` consists of 7 instances of <empty>, 3 occurrences of "b",
   1530     # 2 occurrences of "c" and 1 instance of "a".
   1531     expected_gradient_values = sorted([0., 3., 2., 1.] * embedding_dimension)
   1532     actual_gradient_values = np.sort(gradients[0].values, axis=None)
   1533     self.assertAllClose(expected_gradient_values, actual_gradient_values)
   1534 
   1535   def testMultipleColumns(self):
   1536     batch_size = 4
   1537     sequence_length = 3
   1538     measurement_dimension = 5
   1539     country_hash_size = 10
   1540     max_id = 200
   1541     id_embedding_dimension = 11
   1542     normalizer = lambda x: x / 10.0
   1543 
   1544     measurement_tensor = random_ops.random_uniform(
   1545         [batch_size, sequence_length, measurement_dimension])
   1546     country_tensor = sparse_tensor.SparseTensor(
   1547         values=["us", "ca",
   1548                 "ru", "fr", "ca",
   1549                 "mx"],
   1550         indices=[[0, 0, 0], [0, 1, 0],
   1551                  [1, 0, 0], [1, 0, 1], [1, 1, 0],
   1552                  [3, 2, 0]],
   1553         dense_shape=[4, 3, 2])
   1554     id_tensor = sparse_tensor.SparseTensor(
   1555         values=[2, 5,
   1556                 26, 123, 1,
   1557                 0],
   1558         indices=[[0, 0, 0], [0, 0, 1],
   1559                  [0, 1, 1], [1, 0, 0], [1, 1, 0],
   1560                  [3, 2, 0]],
   1561         dense_shape=[4, 3, 2])
   1562 
   1563     columns_to_tensors = {
   1564         "measurements": measurement_tensor,
   1565         "country": country_tensor,
   1566         "id": id_tensor
   1567     }
   1568 
   1569     measurement_column = feature_column.real_valued_column(
   1570         "measurements", normalizer=normalizer)
   1571     country_column = feature_column.sparse_column_with_hash_bucket(
   1572         "country", country_hash_size)
   1573     id_column = feature_column.sparse_column_with_integerized_feature("id",
   1574                                                                       max_id)
   1575 
   1576     onehot_country_column = feature_column.one_hot_column(country_column)
   1577     embedded_id_column = feature_column.embedding_column(id_column,
   1578                                                          id_embedding_dimension)
   1579 
   1580     model_input_columns = [
   1581         measurement_column, onehot_country_column, embedded_id_column
   1582     ]
   1583 
   1584     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
   1585         columns_to_tensors, model_input_columns)
   1586     self.assertEqual(dtypes.float32, model_input_tensor.dtype)
   1587 
   1588     with self.test_session() as sess:
   1589       variables_lib.global_variables_initializer().run()
   1590       lookup_ops.tables_initializer().run()
   1591       model_input = sess.run(model_input_tensor)
   1592 
   1593     expected_input_shape = [
   1594         batch_size, sequence_length,
   1595         measurement_dimension + country_hash_size + id_embedding_dimension
   1596     ]
   1597     self.assertAllEqual(expected_input_shape, model_input.shape)
   1598 
   1599 
   1600 class WeightedSumTest(test.TestCase):
   1601 
   1602   def testFeatureColumnDictFails(self):
   1603     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1604     wire_tensor = sparse_tensor.SparseTensor(
   1605         values=["omar", "stringer", "marlo"],
   1606         indices=[[0, 0], [1, 0], [1, 1]],
   1607         dense_shape=[2, 2])
   1608     features = {"wire": wire_tensor}
   1609     with self.assertRaisesRegexp(
   1610         ValueError,
   1611         "Expected feature_columns to be iterable, found dict"):
   1612       feature_column_ops.weighted_sum_from_feature_columns(
   1613           features, {"feature": hashed_sparse}, num_outputs=5)
   1614 
   1615   def testSparseColumn(self):
   1616     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1617     wire_tensor = sparse_tensor.SparseTensor(
   1618         values=["omar", "stringer", "marlo"],
   1619         indices=[[0, 0], [1, 0], [1, 1]],
   1620         dense_shape=[2, 2])
   1621     features = {"wire": wire_tensor}
   1622     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1623         features, [hashed_sparse], num_outputs=5)
   1624     logits_core = fc_core.linear_model(features, [hashed_sparse], units=5)
   1625     with self.test_session():
   1626       variables_lib.global_variables_initializer().run()
   1627       self.assertAllEqual(logits.eval().shape, [2, 5])
   1628       # Verify cross compatibility: Core builder output should equal to contrib.
   1629       self.assertAllEqual(logits.eval(), logits_core.eval())
   1630 
   1631   def testSparseIntColumn(self):
   1632     """Tests a sparse column with int values."""
   1633     hashed_sparse = feature_column.sparse_column_with_hash_bucket(
   1634         "wire", 10, dtype=dtypes.int64)
   1635     wire_tensor = sparse_tensor.SparseTensor(
   1636         values=[101, 201, 301],
   1637         indices=[[0, 0], [1, 0], [1, 1]],
   1638         dense_shape=[2, 2])
   1639     features = {"wire": wire_tensor}
   1640     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1641         features, [hashed_sparse], num_outputs=5)
   1642     logits_core = fc_core.linear_model(features, [hashed_sparse], units=5)
   1643     with self.test_session():
   1644       variables_lib.global_variables_initializer().run()
   1645       self.assertAllEqual(logits.eval().shape, [2, 5])
   1646       # Verify cross compatibility: Core builder output should equal to contrib.
   1647       self.assertAllEqual(logits.eval(), logits_core.eval())
   1648 
   1649   def testSparseColumnWithDenseInputTensor(self):
   1650     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1651     wire_tensor = constant_op.constant(
   1652         [["omar", "stringer"], ["marlo", "rick"]])
   1653     features = {"wire": wire_tensor}
   1654     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1655         features, [hashed_sparse], num_outputs=5)
   1656     logits_core = fc_core.linear_model(features, [hashed_sparse], units=5)
   1657     with self.test_session():
   1658       variables_lib.global_variables_initializer().run()
   1659       self.assertAllEqual(logits.eval().shape, [2, 5])
   1660       # Verify cross compatibility: Core builder output should equal to contrib.
   1661       self.assertAllEqual(logits.eval(), logits_core.eval())
   1662 
   1663   def testWeightedSparseColumn(self):
   1664     ids = feature_column.sparse_column_with_keys("ids",
   1665                                                  ["marlo", "omar", "stringer"])
   1666     ids_tensor = sparse_tensor.SparseTensor(
   1667         values=["stringer", "stringer", "marlo"],
   1668         indices=[[0, 0], [1, 0], [1, 1]],
   1669         dense_shape=[2, 2])
   1670     weighted_ids = feature_column.weighted_sparse_column(ids, "weights")
   1671     weights_tensor = sparse_tensor.SparseTensor(
   1672         values=[10.0, 20.0, 30.0],
   1673         indices=[[0, 0], [1, 0], [1, 1]],
   1674         dense_shape=[2, 2])
   1675     features = {"ids": ids_tensor, "weights": weights_tensor}
   1676     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1677         features, [weighted_ids], num_outputs=5)
   1678     logits_core = fc_core.linear_model(features, [weighted_ids], units=5)
   1679     with self.test_session():
   1680       variables_lib.global_variables_initializer().run()
   1681       lookup_ops.tables_initializer().run()
   1682       self.assertAllEqual(logits.eval().shape, [2, 5])
   1683       # Verify cross compatibility: Core builder output should equal to contrib.
   1684       self.assertAllEqual(logits.eval(), logits_core.eval())
   1685 
   1686   def testWeightedSparseColumnWithDenseInputTensor(self):
   1687     ids = feature_column.sparse_column_with_keys(
   1688         "ids", ["marlo", "omar", "stringer", "rick"])
   1689     ids_tensor = constant_op.constant([["omar", "stringer"], ["marlo", "rick"]])
   1690     weighted_ids = feature_column.weighted_sparse_column(ids, "weights")
   1691     weights_tensor = constant_op.constant([[10.0, 20.0], [30.0, 40.0]])
   1692 
   1693     features = {"ids": ids_tensor, "weights": weights_tensor}
   1694     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1695         features, [weighted_ids], num_outputs=5)
   1696     logits_core = fc_core.linear_model(features, [weighted_ids], units=5)
   1697 
   1698     with self.test_session():
   1699       variables_lib.global_variables_initializer().run()
   1700       lookup_ops.tables_initializer().run()
   1701       self.assertAllEqual(logits.eval().shape, [2, 5])
   1702       # Verify cross compatibility: Core builder output should equal to contrib.
   1703       self.assertAllEqual(logits.eval(), logits_core.eval())
   1704 
   1705   def testCrossedColumn(self):
   1706     a = feature_column.sparse_column_with_hash_bucket(
   1707         "aaa", hash_bucket_size=100)
   1708     b = feature_column.sparse_column_with_hash_bucket(
   1709         "bbb", hash_bucket_size=100)
   1710     crossed = feature_column.crossed_column(set([a, b]), hash_bucket_size=10000)
   1711     wire_tensor = sparse_tensor.SparseTensor(
   1712         values=["omar", "stringer", "marlo"],
   1713         indices=[[0, 0], [1, 0], [1, 1]],
   1714         dense_shape=[2, 2])
   1715     features = {"aaa": wire_tensor, "bbb": wire_tensor}
   1716     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1717         features, [crossed], num_outputs=5)
   1718     logits_core = fc_core.linear_model(features, [crossed], units=5)
   1719     with self.test_session():
   1720       variables_lib.global_variables_initializer().run()
   1721       self.assertAllEqual(logits.eval().shape, [2, 5])
   1722       # Verify cross compatibility: Core builder output should equal to contrib.
   1723       self.assertAllEqual(logits.eval(), logits_core.eval())
   1724 
   1725   def testEmbeddingColumn(self):
   1726     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1727     wire_tensor = sparse_tensor.SparseTensor(
   1728         values=["omar", "stringer", "marlo"],
   1729         indices=[[0, 0], [1, 0], [1, 1]],
   1730         dense_shape=[2, 2])
   1731     features = {"wire": wire_tensor}
   1732     embeded_sparse = feature_column.embedding_column(hashed_sparse, 10)
   1733     with self.test_session():
   1734       with self.assertRaisesRegexp(
   1735           ValueError, "Error creating weighted sum for column: wire_embedding"):
   1736         variables_lib.global_variables_initializer().run()
   1737         feature_column_ops.weighted_sum_from_feature_columns(
   1738             features, [embeded_sparse], num_outputs=5)
   1739 
   1740   def testSparseFeatureColumnWithVocabularyFile(self):
   1741     vocabulary_file = os.path.join(self.get_temp_dir(), "movies.txt")
   1742     with open(vocabulary_file, "w") as f:
   1743       f.write("\n".join(["head-on", "matrix", "winter sleep"]) + "\n")
   1744     movies = feature_column.sparse_column_with_vocabulary_file(
   1745         column_name="movies", vocabulary_file=vocabulary_file, vocab_size=3)
   1746     with ops.Graph().as_default():
   1747       features = {
   1748           "movies":
   1749               sparse_tensor.SparseTensor(
   1750                   values=["matrix", "head-on", "winter sleep"],
   1751                   indices=[[0, 0], [0, 1], [1, 0]],
   1752                   dense_shape=[2, 2])
   1753       }
   1754       output, column_to_variable, _ = (
   1755           feature_column_ops.weighted_sum_from_feature_columns(
   1756               features, [movies], num_outputs=1))
   1757       logits_core = fc_core.linear_model(features, [movies])
   1758 
   1759       with self.test_session() as sess:
   1760         variables_lib.initialize_all_variables().run()
   1761         lookup_ops.tables_initializer().run()
   1762 
   1763         weights = column_to_variable[movies][0]
   1764         self.assertEqual(weights.get_shape(), (3, 1))
   1765         sess.run(weights.assign([[0.1], [0.3], [0.5]]))
   1766         # score for first example = 0.3 (matrix) + 0.1 (head-on) = 0.4
   1767         # score for second example = 0.5 (winter sleep)
   1768         self.assertAllClose(output.eval(), [[0.4], [0.5]])
   1769         # Cross compatibility: Core builder output should equal to contrib.
   1770         self.assertAllEqual(output.eval().shape, logits_core.eval().shape)
   1771 
   1772   def testRealValuedColumnWithMultiDimensions(self):
   1773     real_valued = feature_column.real_valued_column("price", 2)
   1774     features = {
   1775         "price": constant_op.constant([[20., 10.], [110, 0.], [-3, 30]])
   1776     }
   1777     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1778         features, [real_valued], num_outputs=5)
   1779     with self.test_session():
   1780       variables_lib.global_variables_initializer().run()
   1781       self.assertAllEqual(logits.eval().shape, [3, 5])
   1782 
   1783   def testBucketizedColumnWithMultiDimensions(self):
   1784     bucket = feature_column.bucketized_column(
   1785         feature_column.real_valued_column("price", 2),
   1786         boundaries=[0., 10., 100.])
   1787     features = {
   1788         "price": constant_op.constant([[20., 10.], [110, 0.], [-3, 30]])
   1789     }
   1790     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1791         features, [bucket], num_outputs=5)
   1792     with self.test_session():
   1793       variables_lib.global_variables_initializer().run()
   1794       self.assertAllEqual(logits.eval().shape, [3, 5])
   1795 
   1796   def testAllWideColumns(self):
   1797     real_valued = feature_column.real_valued_column("income", 2)
   1798     bucket = feature_column.bucketized_column(
   1799         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
   1800     hashed_sparse = feature_column.sparse_column_with_hash_bucket("wire", 10)
   1801     crossed = feature_column.crossed_column([bucket, hashed_sparse], 100)
   1802     features = {
   1803         "income":
   1804             constant_op.constant([[20., 10], [110, 0], [-3, 30]]),
   1805         "price":
   1806             constant_op.constant([[20.], [110], [-3]]),
   1807         "wire":
   1808             sparse_tensor.SparseTensor(
   1809                 values=["omar", "stringer", "marlo"],
   1810                 indices=[[0, 0], [1, 0], [2, 0]],
   1811                 dense_shape=[3, 1])
   1812     }
   1813     output, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
   1814         features, [real_valued, bucket, hashed_sparse, crossed], num_outputs=5)
   1815     output_core = fc_core.linear_model(
   1816         features, [real_valued, bucket, hashed_sparse, crossed], units=5)
   1817     with self.test_session():
   1818       variables_lib.global_variables_initializer().run()
   1819       self.assertAllEqual(output.eval().shape, [3, 5])
   1820       # Verify cross compatibility: Core builder output should equal to contrib.
   1821       self.assertAllEqual(output.eval(), output_core.eval())
   1822 
   1823   def testPredictions(self):
   1824     language = feature_column.sparse_column_with_keys(
   1825         column_name="language", keys=["english", "finnish", "hindi"])
   1826     age = feature_column.real_valued_column("age")
   1827     with ops.Graph().as_default():
   1828       features = {
   1829           "age":
   1830               constant_op.constant([[1], [2]]),
   1831           "language":
   1832               sparse_tensor.SparseTensor(
   1833                   values=["hindi", "english"],
   1834                   indices=[[0, 0], [1, 0]],
   1835                   dense_shape=[2, 1]),
   1836       }
   1837       output, column_to_variable, bias = (
   1838           feature_column_ops.weighted_sum_from_feature_columns(
   1839               features, [age, language], num_outputs=1))
   1840       with self.test_session() as sess:
   1841         variables_lib.global_variables_initializer().run()
   1842         lookup_ops.tables_initializer().run()
   1843 
   1844         self.assertAllClose(output.eval(), [[0.], [0.]])
   1845 
   1846         sess.run(bias.assign([0.1]))
   1847         self.assertAllClose(output.eval(), [[0.1], [0.1]])
   1848 
   1849         # score: 0.1 + age*0.1
   1850         sess.run(column_to_variable[age][0].assign([[0.2]]))
   1851         self.assertAllClose(output.eval(), [[0.3], [0.5]])
   1852 
   1853         # score: 0.1 + age*0.1 + language_weight[language_index]
   1854         sess.run(column_to_variable[language][0].assign([[0.1], [0.3], [0.2]]))
   1855         self.assertAllClose(output.eval(), [[0.5], [0.6]])
   1856 
   1857   def testJointPredictions(self):
   1858     country = feature_column.sparse_column_with_keys(
   1859         column_name="country", keys=["us", "finland"])
   1860     language = feature_column.sparse_column_with_keys(
   1861         column_name="language", keys=["english", "finnish", "hindi"])
   1862     with ops.Graph().as_default():
   1863       features = {
   1864           "country":
   1865               sparse_tensor.SparseTensor(
   1866                   values=["finland", "us"],
   1867                   indices=[[0, 0], [1, 0]],
   1868                   dense_shape=[2, 1]),
   1869           "language":
   1870               sparse_tensor.SparseTensor(
   1871                   values=["hindi", "english"],
   1872                   indices=[[0, 0], [1, 0]],
   1873                   dense_shape=[2, 1]),
   1874       }
   1875       output, variables, bias = (
   1876           feature_column_ops.joint_weighted_sum_from_feature_columns(
   1877               features, [country, language], num_outputs=1))
   1878       # Assert that only a single weight is created.
   1879       self.assertEqual(len(variables), 1)
   1880       with self.test_session() as sess:
   1881         variables_lib.global_variables_initializer().run()
   1882         lookup_ops.tables_initializer().run()
   1883 
   1884         self.assertAllClose(output.eval(), [[0.], [0.]])
   1885 
   1886         sess.run(bias.assign([0.1]))
   1887         self.assertAllClose(output.eval(), [[0.1], [0.1]])
   1888 
   1889         # shape is [5,1] because 1 class and 2 + 3 features.
   1890         self.assertEquals(variables[0].get_shape().as_list(), [5, 1])
   1891 
   1892         # score: bias + country_weight + language_weight
   1893         sess.run(variables[0].assign([[0.1], [0.2], [0.3], [0.4], [0.5]]))
   1894         self.assertAllClose(output.eval(), [[0.8], [0.5]])
   1895 
   1896   def testJointPredictionsWeightedFails(self):
   1897     language = feature_column.weighted_sparse_column(
   1898         feature_column.sparse_column_with_keys(
   1899             column_name="language", keys=["english", "finnish", "hindi"]),
   1900         "weight")
   1901     with ops.Graph().as_default():
   1902       features = {
   1903           "weight":
   1904               constant_op.constant([[1], [2]]),
   1905           "language":
   1906               sparse_tensor.SparseTensor(
   1907                   values=["hindi", "english"],
   1908                   indices=[[0, 0], [1, 0]],
   1909                   dense_shape=[2, 1]),
   1910       }
   1911       with self.assertRaises(AssertionError):
   1912         feature_column_ops.joint_weighted_sum_from_feature_columns(
   1913             features, [language], num_outputs=1)
   1914 
   1915   def testJointPredictionsRealFails(self):
   1916     age = feature_column.real_valued_column("age")
   1917     with ops.Graph().as_default():
   1918       features = {"age": constant_op.constant([[1], [2]]),}
   1919       with self.assertRaises(NotImplementedError):
   1920         feature_column_ops.joint_weighted_sum_from_feature_columns(
   1921             features, [age], num_outputs=1)
   1922 
   1923   def testPredictionsWithWeightedSparseColumn(self):
   1924     language = feature_column.sparse_column_with_keys(
   1925         column_name="language", keys=["english", "finnish", "hindi"])
   1926     weighted_language = feature_column.weighted_sparse_column(
   1927         sparse_id_column=language, weight_column_name="age")
   1928     with ops.Graph().as_default():
   1929       features = {
   1930           "language":
   1931               sparse_tensor.SparseTensor(
   1932                   values=["hindi", "english"],
   1933                   indices=[[0, 0], [1, 0]],
   1934                   dense_shape=[2, 1]),
   1935           "age":
   1936               sparse_tensor.SparseTensor(
   1937                   values=[10.0, 20.0],
   1938                   indices=[[0, 0], [1, 0]],
   1939                   dense_shape=[2, 1])
   1940       }
   1941       output, column_to_variable, bias = (
   1942           feature_column_ops.weighted_sum_from_feature_columns(
   1943               features, [weighted_language], num_outputs=1))
   1944       with self.test_session() as sess:
   1945         variables_lib.global_variables_initializer().run()
   1946         lookup_ops.tables_initializer().run()
   1947 
   1948         self.assertAllClose(output.eval(), [[0.], [0.]])
   1949 
   1950         sess.run(bias.assign([0.1]))
   1951         self.assertAllClose(output.eval(), [[0.1], [0.1]])
   1952 
   1953         # score: bias + age*language_weight[index]
   1954         sess.run(column_to_variable[weighted_language][0].assign([[0.1], [0.2],
   1955                                                                   [0.3]]))
   1956         self.assertAllClose(output.eval(), [[3.1], [2.1]])
   1957 
   1958   def testPredictionsWithMultivalentColumnButNoCross(self):
   1959     language = feature_column.sparse_column_with_keys(
   1960         column_name="language", keys=["english", "turkish", "hindi"])
   1961     with ops.Graph().as_default():
   1962       features = {
   1963           "language":
   1964               sparse_tensor.SparseTensor(
   1965                   values=["hindi", "english"],
   1966                   indices=[[0, 0], [0, 1]],
   1967                   dense_shape=[1, 2])
   1968       }
   1969       output, column_to_variable, bias = (
   1970           feature_column_ops.weighted_sum_from_feature_columns(
   1971               features, [language], num_outputs=1))
   1972       with self.test_session() as sess:
   1973         variables_lib.global_variables_initializer().run()
   1974         lookup_ops.tables_initializer().run()
   1975 
   1976         # score: 0.1 + language_weight['hindi'] + language_weight['english']
   1977         sess.run(bias.assign([0.1]))
   1978         sess.run(column_to_variable[language][0].assign([[0.1], [0.3], [0.2]]))
   1979         self.assertAllClose(output.eval(), [[0.4]])
   1980 
   1981   def testSparseFeatureColumnWithHashedBucketSize(self):
   1982     movies = feature_column.sparse_column_with_hash_bucket(
   1983         column_name="movies", hash_bucket_size=15)
   1984     with ops.Graph().as_default():
   1985       features = {
   1986           "movies":
   1987               sparse_tensor.SparseTensor(
   1988                   values=["matrix", "head-on", "winter sleep"],
   1989                   indices=[[0, 0], [0, 1], [1, 0]],
   1990                   dense_shape=[2, 2])
   1991       }
   1992       output, column_to_variable, _ = (
   1993           feature_column_ops.weighted_sum_from_feature_columns(
   1994               features, [movies], num_outputs=1))
   1995       with self.test_session() as sess:
   1996         variables_lib.global_variables_initializer().run()
   1997         lookup_ops.tables_initializer().run()
   1998 
   1999         weights = column_to_variable[movies][0]
   2000         self.assertEqual(weights.get_shape(), (15, 1))
   2001         sess.run(weights.assign(weights + 0.4))
   2002         # score for first example = 0.4 (matrix) + 0.4 (head-on) = 0.8
   2003         # score for second example = 0.4 (winter sleep)
   2004         self.assertAllClose(output.eval(), [[0.8], [0.4]])
   2005 
   2006   def testCrossUsageInPredictions(self):
   2007     language = feature_column.sparse_column_with_hash_bucket(
   2008         "language", hash_bucket_size=3)
   2009     country = feature_column.sparse_column_with_hash_bucket(
   2010         "country", hash_bucket_size=5)
   2011     country_language = feature_column.crossed_column(
   2012         [language, country], hash_bucket_size=10)
   2013     with ops.Graph().as_default():
   2014       features = {
   2015           "language":
   2016               sparse_tensor.SparseTensor(
   2017                   values=["english", "spanish"],
   2018                   indices=[[0, 0], [1, 0]],
   2019                   dense_shape=[2, 1]),
   2020           "country":
   2021               sparse_tensor.SparseTensor(
   2022                   values=["US", "SV"],
   2023                   indices=[[0, 0], [1, 0]],
   2024                   dense_shape=[2, 1])
   2025       }
   2026       output, column_to_variable, _ = (
   2027           feature_column_ops.weighted_sum_from_feature_columns(
   2028               features, [country_language], num_outputs=1))
   2029       with self.test_session() as sess:
   2030         variables_lib.global_variables_initializer().run()
   2031         lookup_ops.tables_initializer().run()
   2032 
   2033         weights = column_to_variable[country_language][0]
   2034         sess.run(weights.assign(weights + 0.4))
   2035         self.assertAllClose(output.eval(), [[0.4], [0.4]])
   2036 
   2037   def testCrossColumnByItself(self):
   2038     language = feature_column.sparse_column_with_hash_bucket(
   2039         "language", hash_bucket_size=3)
   2040     language_language = feature_column.crossed_column(
   2041         [language, language], hash_bucket_size=10)
   2042     with ops.Graph().as_default():
   2043       features = {
   2044           "language":
   2045               sparse_tensor.SparseTensor(
   2046                   values=["english", "spanish"],
   2047                   indices=[[0, 0], [0, 1]],
   2048                   dense_shape=[1, 2]),
   2049       }
   2050       output, column_to_variable, _ = (
   2051           feature_column_ops.weighted_sum_from_feature_columns(
   2052               features, [language_language], num_outputs=1))
   2053       with self.test_session() as sess:
   2054         variables_lib.global_variables_initializer().run()
   2055         lookup_ops.tables_initializer().run()
   2056 
   2057         weights = column_to_variable[language_language][0]
   2058         sess.run(weights.assign(weights + 0.4))
   2059         # There are two features inside language. If we cross it by itself we'll
   2060         # have four crossed features.
   2061         self.assertAllClose(output.eval(), [[1.6]])
   2062 
   2063   def testMultivalentCrossUsageInPredictions(self):
   2064     language = feature_column.sparse_column_with_hash_bucket(
   2065         "language", hash_bucket_size=3)
   2066     country = feature_column.sparse_column_with_hash_bucket(
   2067         "country", hash_bucket_size=5)
   2068     country_language = feature_column.crossed_column(
   2069         [language, country], hash_bucket_size=10)
   2070     with ops.Graph().as_default():
   2071       features = {
   2072           "language":
   2073               sparse_tensor.SparseTensor(
   2074                   values=["english", "spanish"],
   2075                   indices=[[0, 0], [0, 1]],
   2076                   dense_shape=[1, 2]),
   2077           "country":
   2078               sparse_tensor.SparseTensor(
   2079                   values=["US", "SV"],
   2080                   indices=[[0, 0], [0, 1]],
   2081                   dense_shape=[1, 2])
   2082       }
   2083       output, column_to_variable, _ = (
   2084           feature_column_ops.weighted_sum_from_feature_columns(
   2085               features, [country_language], num_outputs=1))
   2086       with self.test_session() as sess:
   2087         variables_lib.global_variables_initializer().run()
   2088         lookup_ops.tables_initializer().run()
   2089 
   2090         weights = column_to_variable[country_language][0]
   2091         sess.run(weights.assign(weights + 0.4))
   2092         # There are four crosses each with 0.4 weight.
   2093         # score = 0.4 + 0.4 + 0.4 + 0.4
   2094         self.assertAllClose(output.eval(), [[1.6]])
   2095 
   2096   def testMultivalentCrossUsageInPredictionsWithPartition(self):
   2097     # bucket size has to be big enough to allow sharding.
   2098     language = feature_column.sparse_column_with_hash_bucket(
   2099         "language", hash_bucket_size=64 << 19)
   2100     country = feature_column.sparse_column_with_hash_bucket(
   2101         "country", hash_bucket_size=64 << 18)
   2102     country_language = feature_column.crossed_column(
   2103         [language, country], hash_bucket_size=64 << 18)
   2104     with ops.Graph().as_default():
   2105       features = {
   2106           "language":
   2107               sparse_tensor.SparseTensor(
   2108                   values=["english", "spanish"],
   2109                   indices=[[0, 0], [0, 1]],
   2110                   dense_shape=[1, 2]),
   2111           "country":
   2112               sparse_tensor.SparseTensor(
   2113                   values=["US", "SV"],
   2114                   indices=[[0, 0], [0, 1]],
   2115                   dense_shape=[1, 2])
   2116       }
   2117       with variable_scope.variable_scope(
   2118           "weighted_sum_from_feature_columns",
   2119           features.values(),
   2120           partitioner=partitioned_variables.min_max_variable_partitioner(
   2121               max_partitions=10, min_slice_size=((64 << 20) - 1))) as scope:
   2122         output, column_to_variable, _ = (
   2123             feature_column_ops.weighted_sum_from_feature_columns(
   2124                 features, [country, language, country_language],
   2125                 num_outputs=1,
   2126                 scope=scope))
   2127       with self.test_session() as sess:
   2128         variables_lib.global_variables_initializer().run()
   2129         lookup_ops.tables_initializer().run()
   2130 
   2131         self.assertEqual(2, len(column_to_variable[country]))
   2132         self.assertEqual(3, len(column_to_variable[language]))
   2133         self.assertEqual(2, len(column_to_variable[country_language]))
   2134 
   2135         weights = column_to_variable[country_language]
   2136         for partition_variable in weights:
   2137           sess.run(partition_variable.assign(partition_variable + 0.4))
   2138         # There are four crosses each with 0.4 weight.
   2139         # score = 0.4 + 0.4 + 0.4 + 0.4
   2140         self.assertAllClose(output.eval(), [[1.6]])
   2141 
   2142   def testRealValuedColumnHavingMultiDimensions(self):
   2143     country = feature_column.sparse_column_with_hash_bucket(
   2144         "country", hash_bucket_size=5)
   2145     age = feature_column.real_valued_column("age")
   2146     # The following RealValuedColumn has 3 dimensions.
   2147     incomes = feature_column.real_valued_column("incomes", 3)
   2148 
   2149     with ops.Graph().as_default():
   2150       features = {
   2151           "age":
   2152               constant_op.constant([[1], [1]]),
   2153           "incomes":
   2154               constant_op.constant([[100., 200., 300.], [10., 20., 30.]]),
   2155           "country":
   2156               sparse_tensor.SparseTensor(
   2157                   values=["US", "SV"],
   2158                   indices=[[0, 0], [1, 0]],
   2159                   dense_shape=[2, 2])
   2160       }
   2161       output, column_to_variable, _ = (
   2162           feature_column_ops.weighted_sum_from_feature_columns(
   2163               features, [country, age, incomes], num_outputs=1))
   2164       with self.test_session() as sess:
   2165         variables_lib.global_variables_initializer().run()
   2166         lookup_ops.tables_initializer().run()
   2167 
   2168         incomes_weights = column_to_variable[incomes][0]
   2169         sess.run(incomes_weights.assign([[0.1], [0.2], [0.3]]))
   2170         self.assertAllClose(output.eval(), [[140.], [14.]])
   2171 
   2172   def testMulticlassWithRealValuedColumnHavingMultiDimensionsAndSparse(self):
   2173     country = feature_column.sparse_column_with_hash_bucket(
   2174         "country", hash_bucket_size=5)
   2175     age = feature_column.real_valued_column("age")
   2176     # The following RealValuedColumn has no predefined dimension so it
   2177     # can be missing.
   2178     height = feature_column._real_valued_var_len_column("height",
   2179                                                         default_value=0,
   2180                                                         is_sparse=False)
   2181     # The following RealValuedColumn has 3 dimensions.
   2182     incomes = feature_column.real_valued_column("incomes", 3)
   2183     with ops.Graph().as_default():
   2184       features = {
   2185           "age":
   2186               constant_op.constant([[1], [1]]),
   2187           "incomes":
   2188               constant_op.constant([[100., 200., 300.], [10., 20., 30.]]),
   2189           "height":
   2190               constant_op.constant([[5., 4.], [0., 6.]]),
   2191           "country":
   2192               sparse_tensor.SparseTensor(
   2193                   values=["US", "SV"],
   2194                   indices=[[0, 0], [1, 0]],
   2195                   dense_shape=[2, 2])
   2196       }
   2197       output, column_to_variable, _ = (
   2198           feature_column_ops.weighted_sum_from_feature_columns(
   2199               features, [country, age, height, incomes], num_outputs=5))
   2200       with self.test_session() as sess:
   2201         variables_lib.global_variables_initializer().run()
   2202         lookup_ops.tables_initializer().run()
   2203 
   2204         height_weights = column_to_variable[height][0]
   2205         sess.run(
   2206             height_weights.assign(
   2207                 [[1., 2., 3., 5., 10.], [1., 2., 3., 5., 10.]]))
   2208         self.assertAllClose(output.eval(), [[9., 18., 27., 45., 90.],
   2209                                             [6., 12., 18., 30., 60.]])
   2210 
   2211         incomes_weights = column_to_variable[incomes][0]
   2212         sess.run(
   2213             incomes_weights.assign([[0.01, 0.1, 1., 10., 100.],
   2214                                     [0.02, 0.2, 2., 20., 200.],
   2215                                     [0.03, 0.3, 3., 30., 300.]]))
   2216         self.assertAllClose(
   2217             output.eval(),
   2218             [[14. + 9., 140. + 18., 1400. + 27., 14000. + 45., 140000. + 90.],
   2219              [1.4 + 6., 14. + 12., 140. + 18., 1400. + 30., 14000. + 60.]])
   2220 
   2221   def testBucketizedColumn(self):
   2222     bucket = feature_column.bucketized_column(
   2223         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
   2224     with ops.Graph().as_default():
   2225       # buckets 2, 3, 0
   2226       features = {"price": constant_op.constant([[20.], [110], [-3]])}
   2227       output, column_to_variable, _ = (
   2228           feature_column_ops.weighted_sum_from_feature_columns(
   2229               features, [bucket], num_outputs=1))
   2230       output_core = fc_core.linear_model(features, [bucket])
   2231       with self.test_session() as sess:
   2232         variables_lib.global_variables_initializer().run()
   2233         lookup_ops.tables_initializer().run()
   2234         # Cross compatibility: Core builder output should equal to contrib.
   2235         self.assertAllEqual(output.eval(), output_core.eval())
   2236 
   2237         sess.run(column_to_variable[bucket][0].assign([[0.1], [0.2], [0.3],
   2238                                                        [0.4]]))
   2239         self.assertAllClose(output.eval(), [[0.3], [0.4], [0.1]])
   2240 
   2241   def testBucketizedColumnHavingMultiDimensions(self):
   2242     country = feature_column.sparse_column_with_hash_bucket(
   2243         "country", hash_bucket_size=5)
   2244     bucket = feature_column.bucketized_column(
   2245         feature_column.real_valued_column("price", 2),
   2246         boundaries=[0., 10., 100.])
   2247     with ops.Graph().as_default():
   2248       # buckets 2, 3, 0
   2249       features = {
   2250           "price":
   2251               constant_op.constant([[20., 210], [110, 50], [-3, -30]]),
   2252           "country":
   2253               sparse_tensor.SparseTensor(
   2254                   values=["US", "SV"],
   2255                   indices=[[0, 0], [1, 0]],
   2256                   dense_shape=[3, 2])
   2257       }
   2258       output, column_to_variable, _ = (
   2259           feature_column_ops.weighted_sum_from_feature_columns(
   2260               features, [bucket, country], num_outputs=1))
   2261       output_core = fc_core.linear_model(features, [bucket, country])
   2262       with self.test_session() as sess:
   2263         variables_lib.global_variables_initializer().run()
   2264         lookup_ops.tables_initializer().run()
   2265         # Cross compatibility: Core builder output should equal to contrib.
   2266         self.assertAllEqual(output.eval(), output_core.eval())
   2267 
   2268         # dimension = 2, bucket_size = 4, num_classes = 1
   2269         sess.run(column_to_variable[bucket][0].assign(
   2270             [[0.1], [0.2], [0.3], [0.4], [1], [2], [3], [4]]))
   2271         self.assertAllClose(output.eval(), [[0.3 + 4], [0.4 + 3], [0.1 + 1]])
   2272 
   2273   def testMulticlassWithBucketizedColumnHavingMultiDimensions(self):
   2274     country = feature_column.sparse_column_with_hash_bucket(
   2275         "country", hash_bucket_size=5)
   2276     bucket = feature_column.bucketized_column(
   2277         feature_column.real_valued_column("price", 2),
   2278         boundaries=[0., 10., 100.])
   2279     with ops.Graph().as_default():
   2280       # buckets 2, 3, 0
   2281       features = {
   2282           "price":
   2283               constant_op.constant([[20., 210], [110, 50], [-3, -30]]),
   2284           "country":
   2285               sparse_tensor.SparseTensor(
   2286                   values=["US", "SV"],
   2287                   indices=[[0, 0], [1, 0]],
   2288                   dense_shape=[3, 2])
   2289       }
   2290       output, column_to_variable, _ = (
   2291           feature_column_ops.weighted_sum_from_feature_columns(
   2292               features, [bucket, country], num_outputs=5))
   2293       with self.test_session() as sess:
   2294         variables_lib.global_variables_initializer().run()
   2295         lookup_ops.tables_initializer().run()
   2296 
   2297         # dimension = 2, bucket_size = 4, num_classes = 5
   2298         sess.run(column_to_variable[bucket][0].assign(
   2299             [[0.1, 1, 10, 100, 1000], [0.2, 2, 20, 200, 2000],
   2300              [0.3, 3, 30, 300, 3000], [0.4, 4, 40, 400, 4000],
   2301              [5, 50, 500, 5000, 50000], [6, 60, 600, 6000, 60000],
   2302              [7, 70, 700, 7000, 70000], [8, 80, 800, 8000, 80000]]))
   2303         self.assertAllClose(
   2304             output.eval(),
   2305             [[0.3 + 8, 3 + 80, 30 + 800, 300 + 8000, 3000 + 80000],
   2306              [0.4 + 7, 4 + 70, 40 + 700, 400 + 7000, 4000 + 70000],
   2307              [0.1 + 5, 1 + 50, 10 + 500, 100 + 5000, 1000 + 50000]])
   2308 
   2309   def testCrossWithBucketizedColumn(self):
   2310     price_bucket = feature_column.bucketized_column(
   2311         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
   2312     country = feature_column.sparse_column_with_hash_bucket(
   2313         "country", hash_bucket_size=5)
   2314     country_price = feature_column.crossed_column(
   2315         [country, price_bucket], hash_bucket_size=10)
   2316     with ops.Graph().as_default():
   2317       features = {
   2318           "price":
   2319               constant_op.constant([[20.]]),
   2320           "country":
   2321               sparse_tensor.SparseTensor(
   2322                   values=["US", "SV"],
   2323                   indices=[[0, 0], [0, 1]],
   2324                   dense_shape=[1, 2])
   2325       }
   2326       output, column_to_variable, _ = (
   2327           feature_column_ops.weighted_sum_from_feature_columns(
   2328               features, [country_price], num_outputs=1))
   2329       with self.test_session() as sess:
   2330         variables_lib.global_variables_initializer().run()
   2331         lookup_ops.tables_initializer().run()
   2332 
   2333         weights = column_to_variable[country_price][0]
   2334         sess.run(weights.assign(weights + 0.4))
   2335         # There are two crosses each with 0.4 weight.
   2336         # score = 0.4 + 0.4
   2337         self.assertAllClose(output.eval(), [[0.8]])
   2338 
   2339   def testCrossWithCrossedColumn(self):
   2340     price_bucket = feature_column.bucketized_column(
   2341         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
   2342     language = feature_column.sparse_column_with_hash_bucket(
   2343         "language", hash_bucket_size=3)
   2344     country = feature_column.sparse_column_with_hash_bucket(
   2345         "country", hash_bucket_size=5)
   2346     country_language = feature_column.crossed_column(
   2347         [language, country], hash_bucket_size=10)
   2348     country_language_price = feature_column.crossed_column(
   2349         set([country_language, price_bucket]), hash_bucket_size=15)
   2350     with ops.Graph().as_default():
   2351       features = {
   2352           "price":
   2353               constant_op.constant([[20.]]),
   2354           "country":
   2355               sparse_tensor.SparseTensor(
   2356                   values=["US", "SV"],
   2357                   indices=[[0, 0], [0, 1]],
   2358                   dense_shape=[1, 2]),
   2359           "language":
   2360               sparse_tensor.SparseTensor(
   2361                   values=["english", "spanish"],
   2362                   indices=[[0, 0], [0, 1]],
   2363                   dense_shape=[1, 2])
   2364       }
   2365       output, column_to_variable, _ = (
   2366           feature_column_ops.weighted_sum_from_feature_columns(
   2367               features, [country_language_price], num_outputs=1))
   2368       with self.test_session() as sess:
   2369         variables_lib.global_variables_initializer().run()
   2370         lookup_ops.tables_initializer().run()
   2371 
   2372         weights = column_to_variable[country_language_price][0]
   2373         sess.run(weights.assign(weights + 0.4))
   2374         # There are two crosses each with 0.4 weight.
   2375         # score = 0.4 + 0.4 + 0.4 + 0.4
   2376         self.assertAllClose(output.eval(), [[1.6]])
   2377 
   2378   def testIntegerizedColumn(self):
   2379     product = feature_column.sparse_column_with_integerized_feature(
   2380         "product", bucket_size=5)
   2381     with ops.Graph().as_default():
   2382       features = {
   2383           "product":
   2384               sparse_tensor.SparseTensor(
   2385                   values=[0, 4, 2],
   2386                   indices=[[0, 0], [1, 0], [2, 0]],
   2387                   dense_shape=[3, 1])
   2388       }
   2389       output, column_to_variable, _ = (
   2390           feature_column_ops.weighted_sum_from_feature_columns(
   2391               features, [product], num_outputs=1))
   2392       with self.test_session() as sess:
   2393         variables_lib.global_variables_initializer().run()
   2394         lookup_ops.tables_initializer().run()
   2395         product_weights = column_to_variable[product][0]
   2396         sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]]))
   2397         self.assertAllClose(output.eval(), [[0.1], [0.5], [0.3]])
   2398 
   2399   def testIntegerizedColumnWithDenseInputTensor(self):
   2400     product = feature_column.sparse_column_with_integerized_feature(
   2401         "product", bucket_size=5)
   2402     with ops.Graph().as_default():
   2403       features = {"product": constant_op.constant([[0], [4], [2]])}
   2404       output, column_to_variable, _ = (
   2405           feature_column_ops.weighted_sum_from_feature_columns(
   2406               features, [product], num_outputs=1))
   2407       with self.test_session() as sess:
   2408         variables_lib.global_variables_initializer().run()
   2409         lookup_ops.tables_initializer().run()
   2410         product_weights = column_to_variable[product][0]
   2411         sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]]))
   2412         self.assertAllClose(output.eval(), [[0.1], [0.5], [0.3]])
   2413 
   2414   def testIntegerizedColumnWithDenseInputTensor2(self):
   2415     product = feature_column.sparse_column_with_integerized_feature(
   2416         "product", bucket_size=5)
   2417     with ops.Graph().as_default():
   2418       features = {"product": constant_op.constant([[0, 4], [2, 3]])}
   2419       output, column_to_variable, _ = (
   2420           feature_column_ops.weighted_sum_from_feature_columns(
   2421               features, [product], num_outputs=1))
   2422       with self.test_session() as sess:
   2423         variables_lib.global_variables_initializer().run()
   2424         lookup_ops.tables_initializer().run()
   2425         product_weights = column_to_variable[product][0]
   2426         sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]]))
   2427         self.assertAllClose(output.eval(), [[0.6], [0.7]])
   2428 
   2429   def testIntegerizedColumnWithInvalidId(self):
   2430     product = feature_column.sparse_column_with_integerized_feature(
   2431         "product", bucket_size=5)
   2432     with ops.Graph().as_default():
   2433       features = {
   2434           "product":
   2435               sparse_tensor.SparseTensor(
   2436                   values=[5, 4, 7],
   2437                   indices=[[0, 0], [1, 0], [2, 0]],
   2438                   dense_shape=[3, 1])
   2439       }
   2440       output, column_to_variable, _ = (
   2441           feature_column_ops.weighted_sum_from_feature_columns(
   2442               features, [product], num_outputs=1))
   2443       with self.test_session() as sess:
   2444         variables_lib.global_variables_initializer().run()
   2445         lookup_ops.tables_initializer().run()
   2446         product_weights = column_to_variable[product][0]
   2447         sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]]))
   2448         self.assertAllClose(output.eval(), [[0.1], [0.5], [0.3]])
   2449 
   2450   def testMulticlassWithOnlyBias(self):
   2451     with ops.Graph().as_default():
   2452       features = {"age": constant_op.constant([[10.], [20.], [30.], [40.]])}
   2453       output, _, bias = feature_column_ops.weighted_sum_from_feature_columns(
   2454           features, [feature_column.real_valued_column("age")], num_outputs=3)
   2455       with self.test_session() as sess:
   2456         variables_lib.global_variables_initializer().run()
   2457         lookup_ops.tables_initializer().run()
   2458         sess.run(bias.assign([0.1, 0.2, 0.3]))
   2459         self.assertAllClose(output.eval(), [[0.1, 0.2, 0.3], [0.1, 0.2, 0.3],
   2460                                             [0.1, 0.2, 0.3], [0.1, 0.2, 0.3]])
   2461 
   2462   def testMulticlassWithRealValuedColumn(self):
   2463     with ops.Graph().as_default():
   2464       column = feature_column.real_valued_column("age")
   2465       features = {"age": constant_op.constant([[10.], [20.], [30.], [40.]])}
   2466       output, column_to_variable, _ = (
   2467           feature_column_ops.weighted_sum_from_feature_columns(
   2468               features, [column], num_outputs=3))
   2469       with self.test_session() as sess:
   2470         variables_lib.global_variables_initializer().run()
   2471         lookup_ops.tables_initializer().run()
   2472         weights = column_to_variable[column][0]
   2473         self.assertEqual(weights.get_shape(), (1, 3))
   2474         sess.run(weights.assign([[0.01, 0.03, 0.05]]))
   2475         self.assertAllClose(output.eval(), [[0.1, 0.3, 0.5], [0.2, 0.6, 1.0],
   2476                                             [0.3, 0.9, 1.5], [0.4, 1.2, 2.0]])
   2477 
   2478   def testMulticlassWithSparseColumn(self):
   2479     with ops.Graph().as_default():
   2480       column = feature_column.sparse_column_with_keys(
   2481           column_name="language",
   2482           keys=["english", "arabic", "hindi", "russian", "swahili"])
   2483       features = {
   2484           "language":
   2485               sparse_tensor.SparseTensor(
   2486                   values=["hindi", "english", "arabic", "russian"],
   2487                   indices=[[0, 0], [1, 0], [2, 0], [3, 0]],
   2488                   dense_shape=[4, 1])
   2489       }
   2490       output, column_to_variable, _ = (
   2491           feature_column_ops.weighted_sum_from_feature_columns(
   2492               features, [column], num_outputs=3))
   2493       with self.test_session() as sess:
   2494         variables_lib.global_variables_initializer().run()
   2495         lookup_ops.tables_initializer().run()
   2496         weights = column_to_variable[column][0]
   2497         self.assertEqual(weights.get_shape(), (5, 3))
   2498         sess.run(
   2499             weights.assign([[0.1, 0.4, 0.7],
   2500                             [0.2, 0.5, 0.8],
   2501                             [0.3, 0.6, 0.9],
   2502                             [0.4, 0.7, 1.0],
   2503                             [0.5, 0.8, 1.1]]))
   2504         self.assertAllClose(output.eval(), [[0.3, 0.6, 0.9],
   2505                                             [0.1, 0.4, 0.7],
   2506                                             [0.2, 0.5, 0.8],
   2507                                             [0.4, 0.7, 1.0]])
   2508 
   2509   def testMulticlassWithBucketizedColumn(self):
   2510     column = feature_column.bucketized_column(
   2511         feature_column.real_valued_column("price"),
   2512         boundaries=[0., 100., 500., 1000.])
   2513     with ops.Graph().as_default():
   2514       # buckets 0, 2, 1, 2
   2515       features = {"price": constant_op.constant([[-3], [110], [20.], [210]])}
   2516       output, column_to_variable, _ = (
   2517           feature_column_ops.weighted_sum_from_feature_columns(
   2518               features, [column], num_outputs=3))
   2519       with self.test_session() as sess:
   2520         variables_lib.global_variables_initializer().run()
   2521         lookup_ops.tables_initializer().run()
   2522 
   2523         weights = column_to_variable[column][0]
   2524         self.assertEqual(weights.get_shape(), (5, 3))
   2525         sess.run(
   2526             weights.assign([[0.1, 0.4, 0.7],
   2527                             [0.2, 0.5, 0.8],
   2528                             [0.3, 0.6, 0.9],
   2529                             [0.4, 0.7, 1.0],
   2530                             [0.5, 0.8, 1.1]]))
   2531         self.assertAllClose(output.eval(), [[0.1, 0.4, 0.7],
   2532                                             [0.3, 0.6, 0.9],
   2533                                             [0.2, 0.5, 0.8],
   2534                                             [0.3, 0.6, 0.9]])
   2535 
   2536   def testMulticlassWithCrossedColumn(self):
   2537     language = feature_column.sparse_column_with_hash_bucket(
   2538         "language", hash_bucket_size=3)
   2539     country = feature_column.sparse_column_with_hash_bucket(
   2540         "country", hash_bucket_size=2)
   2541     column = feature_column.crossed_column(
   2542         {language, country}, hash_bucket_size=5)
   2543     with ops.Graph().as_default():
   2544       features = {
   2545           "language":
   2546               sparse_tensor.SparseTensor(
   2547                   values=["english", "spanish", "russian", "swahili"],
   2548                   indices=[[0, 0], [1, 0], [2, 0], [3, 0]],
   2549                   dense_shape=[4, 1]),
   2550           "country":
   2551               sparse_tensor.SparseTensor(
   2552                   values=["US", "SV", "RU", "KE"],
   2553                   indices=[[0, 0], [1, 0], [2, 0], [3, 0]],
   2554                   dense_shape=[4, 1])
   2555       }
   2556       output, column_to_variable, _ = (
   2557           feature_column_ops.weighted_sum_from_feature_columns(
   2558               features, [column], num_outputs=3))
   2559       with self.test_session() as sess:
   2560         variables_lib.global_variables_initializer().run()
   2561         lookup_ops.tables_initializer().run()
   2562 
   2563         weights = column_to_variable[column][0]
   2564         self.assertEqual(weights.get_shape(), (5, 3))
   2565         sess.run(
   2566             weights.assign([[0.1, 0.4, 0.7],
   2567                             [0.2, 0.5, 0.8],
   2568                             [0.3, 0.6, 0.9],
   2569                             [0.4, 0.7, 1.0],
   2570                             [0.5, 0.8, 1.1]]))
   2571         self.assertAllClose(array_ops.shape(output).eval(), [4, 3])
   2572 
   2573   def testMulticlassWithMultivalentColumn(self):
   2574     column = feature_column.sparse_column_with_keys(
   2575         column_name="language",
   2576         keys=["english", "turkish", "hindi", "russian", "swahili"])
   2577     with ops.Graph().as_default():
   2578       features = {
   2579           "language":
   2580               sparse_tensor.SparseTensor(
   2581                   values=["hindi", "english", "turkish", "turkish", "english"],
   2582                   indices=[[0, 0], [0, 1], [1, 0], [2, 0], [3, 0]],
   2583                   dense_shape=[4, 2])
   2584       }
   2585       output, column_to_variable, _ = (
   2586           feature_column_ops.weighted_sum_from_feature_columns(
   2587               features, [column], num_outputs=3))
   2588       with self.test_session() as sess:
   2589         variables_lib.global_variables_initializer().run()
   2590         lookup_ops.tables_initializer().run()
   2591 
   2592         weights = column_to_variable[column][0]
   2593         self.assertEqual(weights.get_shape(), (5, 3))
   2594         sess.run(
   2595             weights.assign([[0.1, 0.4, 0.7],
   2596                             [0.2, 0.5, 0.8],
   2597                             [0.3, 0.6, 0.9],
   2598                             [0.4, 0.7, 1.0],
   2599                             [0.5, 0.8, 1.1]]))
   2600         self.assertAllClose(output.eval(), [[0.4, 1.0, 1.6],
   2601                                             [0.2, 0.5, 0.8],
   2602                                             [0.2, 0.5, 0.8],
   2603                                             [0.1, 0.4, 0.7]])
   2604 
   2605   def testVariablesAddedToCollection(self):
   2606     price_bucket = feature_column.bucketized_column(
   2607         feature_column.real_valued_column("price"), boundaries=[0., 10., 100.])
   2608     country = feature_column.sparse_column_with_hash_bucket(
   2609         "country", hash_bucket_size=5)
   2610     country_price = feature_column.crossed_column(
   2611         [country, price_bucket], hash_bucket_size=10)
   2612     with ops.Graph().as_default():
   2613       features = {
   2614           "price":
   2615               constant_op.constant([[20.]]),
   2616           "country":
   2617               sparse_tensor.SparseTensor(
   2618                   values=["US", "SV"],
   2619                   indices=[[0, 0], [0, 1]],
   2620                   dense_shape=[1, 2])
   2621       }
   2622       feature_column_ops.weighted_sum_from_feature_columns(
   2623           features, [country_price, price_bucket],
   2624           num_outputs=1,
   2625           weight_collections=["my_collection"])
   2626       weights = ops.get_collection("my_collection")
   2627       # 3 = bias + price_bucket + country_price
   2628       self.assertEqual(3, len(weights))
   2629 
   2630 
   2631 class ParseExampleTest(test.TestCase):
   2632 
   2633   def testParseExample(self):
   2634     bucket = feature_column.bucketized_column(
   2635         feature_column.real_valued_column(
   2636             "price", dimension=3),
   2637         boundaries=[0., 10., 100.])
   2638     wire_cast = feature_column.sparse_column_with_keys(
   2639         "wire_cast", ["marlo", "omar", "stringer"])
   2640     # buckets 2, 3, 0
   2641     data = example_pb2.Example(features=feature_pb2.Features(feature={
   2642         "price":
   2643             feature_pb2.Feature(float_list=feature_pb2.FloatList(
   2644                 value=[20., 110, -3])),
   2645         "wire_cast":
   2646             feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
   2647                 value=[b"stringer", b"marlo"])),
   2648     }))
   2649     output = feature_column_ops.parse_feature_columns_from_examples(
   2650         serialized=[data.SerializeToString()],
   2651         feature_columns=[bucket, wire_cast])
   2652     self.assertIn(bucket, output)
   2653     self.assertIn(wire_cast, output)
   2654     with self.test_session():
   2655       lookup_ops.tables_initializer().run()
   2656       self.assertAllEqual(output[bucket].eval(), [[2, 3, 0]])
   2657       self.assertAllEqual(output[wire_cast].indices.eval(), [[0, 0], [0, 1]])
   2658       self.assertAllEqual(output[wire_cast].values.eval(), [2, 0])
   2659 
   2660   def testParseSequenceExample(self):
   2661     location_keys = ["east_side", "west_side", "nyc"]
   2662     embedding_dimension = 10
   2663 
   2664     location = feature_column.sparse_column_with_keys(
   2665         "location", keys=location_keys)
   2666     location_onehot = feature_column.one_hot_column(location)
   2667     wire_cast = feature_column.sparse_column_with_keys(
   2668         "wire_cast", ["marlo", "omar", "stringer"])
   2669     wire_cast_embedded = feature_column.embedding_column(
   2670         wire_cast, dimension=embedding_dimension)
   2671     measurements = feature_column.real_valued_column(
   2672         "measurements", dimension=2)
   2673 
   2674     context_feature_columns = [location_onehot]
   2675     sequence_feature_columns = [wire_cast_embedded, measurements]
   2676 
   2677     sequence_example = example_pb2.SequenceExample(
   2678         context=feature_pb2.Features(feature={
   2679             "location":
   2680                 feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
   2681                     value=[b"west_side"])),
   2682         }),
   2683         feature_lists=feature_pb2.FeatureLists(feature_list={
   2684             "wire_cast":
   2685                 feature_pb2.FeatureList(feature=[
   2686                     feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
   2687                         value=[b"marlo", b"stringer"])),
   2688                     feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
   2689                         value=[b"omar", b"stringer", b"marlo"])),
   2690                     feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
   2691                         value=[b"marlo"])),
   2692                 ]),
   2693             "measurements":
   2694                 feature_pb2.FeatureList(feature=[
   2695                     feature_pb2.Feature(float_list=feature_pb2.FloatList(
   2696                         value=[0.2, 0.3])),
   2697                     feature_pb2.Feature(float_list=feature_pb2.FloatList(
   2698                         value=[0.1, 0.8])),
   2699                     feature_pb2.Feature(float_list=feature_pb2.FloatList(
   2700                         value=[0.5, 0.0])),
   2701                 ])
   2702         }))
   2703 
   2704     ctx, seq = feature_column_ops.parse_feature_columns_from_sequence_examples(
   2705         serialized=sequence_example.SerializeToString(),
   2706         context_feature_columns=context_feature_columns,
   2707         sequence_feature_columns=sequence_feature_columns)
   2708 
   2709     self.assertIn("location", ctx)
   2710     self.assertIsInstance(ctx["location"], sparse_tensor.SparseTensor)
   2711     self.assertIn("wire_cast", seq)
   2712     self.assertIsInstance(seq["wire_cast"], sparse_tensor.SparseTensor)
   2713     self.assertIn("measurements", seq)
   2714     self.assertIsInstance(seq["measurements"], ops.Tensor)
   2715 
   2716     with self.test_session() as sess:
   2717       location_val, wire_cast_val, measurement_val = sess.run(
   2718           [ctx["location"], seq["wire_cast"], seq["measurements"]])
   2719 
   2720     self.assertAllEqual(location_val.indices, np.array([[0]]))
   2721     self.assertAllEqual(location_val.values, np.array([b"west_side"]))
   2722     self.assertAllEqual(location_val.dense_shape, np.array([1]))
   2723 
   2724     self.assertAllEqual(wire_cast_val.indices,
   2725                         np.array(
   2726                             [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0]]))
   2727     self.assertAllEqual(
   2728         wire_cast_val.values,
   2729         np.array(
   2730             [b"marlo", b"stringer", b"omar", b"stringer", b"marlo", b"marlo"]))
   2731     self.assertAllEqual(wire_cast_val.dense_shape, np.array([3, 3]))
   2732 
   2733     self.assertAllClose(measurement_val,
   2734                         np.array([[0.2, 0.3], [0.1, 0.8], [0.5, 0.0]]))
   2735 
   2736 
   2737 class InferRealValuedColumnTest(test.TestCase):
   2738 
   2739   def testTensorInt32(self):
   2740     self.assertEqual(
   2741         feature_column_ops.infer_real_valued_columns(
   2742             array_ops.zeros(
   2743                 shape=[33, 4], dtype=dtypes.int32)), [
   2744                     feature_column.real_valued_column(
   2745                         "", dimension=4, dtype=dtypes.int32)
   2746                 ])
   2747 
   2748   def testTensorInt64(self):
   2749     self.assertEqual(
   2750         feature_column_ops.infer_real_valued_columns(
   2751             array_ops.zeros(
   2752                 shape=[33, 4], dtype=dtypes.int64)), [
   2753                     feature_column.real_valued_column(
   2754                         "", dimension=4, dtype=dtypes.int64)
   2755                 ])
   2756 
   2757   def testTensorFloat32(self):
   2758     self.assertEqual(
   2759         feature_column_ops.infer_real_valued_columns(
   2760             array_ops.zeros(
   2761                 shape=[33, 4], dtype=dtypes.float32)), [
   2762                     feature_column.real_valued_column(
   2763                         "", dimension=4, dtype=dtypes.float32)
   2764                 ])
   2765 
   2766   def testTensorFloat64(self):
   2767     self.assertEqual(
   2768         feature_column_ops.infer_real_valued_columns(
   2769             array_ops.zeros(
   2770                 shape=[33, 4], dtype=dtypes.float64)), [
   2771                     feature_column.real_valued_column(
   2772                         "", dimension=4, dtype=dtypes.float64)
   2773                 ])
   2774 
   2775   def testDictionary(self):
   2776     self.assertItemsEqual(
   2777         feature_column_ops.infer_real_valued_columns({
   2778             "a": array_ops.zeros(
   2779                 shape=[33, 4], dtype=dtypes.int32),
   2780             "b": array_ops.zeros(
   2781                 shape=[3, 2], dtype=dtypes.float32)
   2782         }), [
   2783             feature_column.real_valued_column(
   2784                 "a", dimension=4, dtype=dtypes.int32),
   2785             feature_column.real_valued_column(
   2786                 "b", dimension=2, dtype=dtypes.float32)
   2787         ])
   2788 
   2789   def testNotGoodDtype(self):
   2790     with self.assertRaises(ValueError):
   2791       feature_column_ops.infer_real_valued_columns(
   2792           constant_op.constant(
   2793               [["a"]], dtype=dtypes.string))
   2794 
   2795   def testSparseTensor(self):
   2796     with self.assertRaises(ValueError):
   2797       feature_column_ops.infer_real_valued_columns(
   2798           sparse_tensor.SparseTensor(
   2799               indices=[[0, 0]], values=["a"], dense_shape=[1, 1]))
   2800 
   2801 
   2802 if __name__ == "__main__":
   2803   test.main()
   2804