Home | History | Annotate | Download | only in examples
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 r"""Regression on Boston housing data using DNNBoostedTreeCombinedRegressor.
     16 
     17   Example Usage:
     18 
     19   python tensorflow/contrib/boosted_trees/examples/boston_combined.py \
     20   --batch_size=404 --output_dir="/tmp/boston" \
     21   --dnn_hidden_units="8,4" --dnn_steps_to_train=1000 \
     22   --tree_depth=4 --tree_learning_rate=0.1 \
     23   --num_trees=100 --tree_l2=0.001 --num_eval_steps=1 \
     24   --vmodule=training_ops=1
     25 
     26   When training is done, mean squared error on eval data is reported.
     27   Point tensorboard to the directory for the run to see how the training
     28   progresses:
     29 
     30   tensorboard --logdir=/tmp/boston
     31 
     32 """
     33 from __future__ import absolute_import
     34 from __future__ import division
     35 from __future__ import print_function
     36 
     37 import argparse
     38 import sys
     39 import tensorflow as tf
     40 
     41 from tensorflow.contrib.boosted_trees.estimator_batch.dnn_tree_combined_estimator import DNNBoostedTreeCombinedRegressor
     42 from tensorflow.contrib.boosted_trees.proto import learner_pb2
     43 from tensorflow.contrib.layers.python.layers import feature_column
     44 from tensorflow.contrib.learn.python.learn import learn_runner
     45 from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
     46 from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils
     47 
     48 _BOSTON_NUM_FEATURES = 13
     49 
     50 
     51 def _get_estimator(output_dir, feature_cols):
     52   """Configures DNNBoostedTreeCombinedRegressor based on flags."""
     53   learner_config = learner_pb2.LearnerConfig()
     54   learner_config.learning_rate_tuner.fixed.learning_rate = (
     55       FLAGS.tree_learning_rate)
     56   learner_config.regularization.l1 = 0.0
     57   learner_config.regularization.l2 = FLAGS.tree_l2
     58   learner_config.constraints.max_tree_depth = FLAGS.tree_depth
     59 
     60   run_config = tf.contrib.learn.RunConfig(save_summary_steps=1)
     61 
     62   # Create a DNNBoostedTreeCombinedRegressor estimator.
     63   estimator = DNNBoostedTreeCombinedRegressor(
     64       dnn_hidden_units=[int(x) for x in FLAGS.dnn_hidden_units.split(",")],
     65       dnn_feature_columns=feature_cols,
     66       tree_learner_config=learner_config,
     67       num_trees=FLAGS.num_trees,
     68       # This should be the number of examples. For large datasets it can be
     69       # larger than the batch_size.
     70       tree_examples_per_layer=FLAGS.batch_size,
     71       model_dir=output_dir,
     72       config=run_config,
     73       dnn_input_layer_to_tree=True,
     74       dnn_steps_to_train=FLAGS.dnn_steps_to_train)
     75   return estimator
     76 
     77 
     78 def _make_experiment_fn(output_dir):
     79   """Creates experiment for DNNBoostedTreeCombinedRegressor."""
     80   (x_train, y_train), (x_test,
     81                        y_test) = tf.keras.datasets.boston_housing.load_data()
     82 
     83   train_input_fn = tf.estimator.inputs.numpy_input_fn(
     84       x={"x": x_train},
     85       y=y_train,
     86       batch_size=FLAGS.batch_size,
     87       num_epochs=None,
     88       shuffle=True)
     89   eval_input_fn = tf.estimator.inputs.numpy_input_fn(
     90       x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False)
     91 
     92   feature_columns = [
     93       feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES)
     94   ]
     95   feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(
     96       feature_columns)
     97   serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
     98   export_strategies = [
     99       saved_model_export_utils.make_export_strategy(serving_input_fn)]
    100   return tf.contrib.learn.Experiment(
    101       estimator=_get_estimator(output_dir, feature_columns),
    102       train_input_fn=train_input_fn,
    103       eval_input_fn=eval_input_fn,
    104       train_steps=None,
    105       eval_steps=FLAGS.num_eval_steps,
    106       eval_metrics=None,
    107       export_strategies=export_strategies)
    108 
    109 
    110 def main(unused_argv):
    111   learn_runner.run(
    112       experiment_fn=_make_experiment_fn,
    113       output_dir=FLAGS.output_dir,
    114       schedule="train_and_evaluate")
    115 
    116 
    117 if __name__ == "__main__":
    118   tf.logging.set_verbosity(tf.logging.INFO)
    119   parser = argparse.ArgumentParser()
    120   # Define the list of flags that users can change.
    121   parser.add_argument(
    122       "--batch_size",
    123       type=int,
    124       default=1000,
    125       help="The batch size for reading data.")
    126   parser.add_argument(
    127       "--output_dir",
    128       type=str,
    129       required=True,
    130       help="Choose the dir for the output.")
    131   parser.add_argument(
    132       "--num_eval_steps",
    133       type=int,
    134       default=1,
    135       help="The number of steps to run evaluation for.")
    136   # Flags for configuring DNNBoostedTreeCombinedRegressor.
    137   parser.add_argument(
    138       "--dnn_hidden_units",
    139       type=str,
    140       default="8,4",
    141       help="Hidden layers for DNN.")
    142   parser.add_argument(
    143       "--dnn_steps_to_train",
    144       type=int,
    145       default=1000,
    146       help="Number of steps to train DNN.")
    147   parser.add_argument(
    148       "--tree_depth", type=int, default=4, help="Maximum depth of trees.")
    149   parser.add_argument(
    150       "--tree_l2", type=float, default=1.0, help="l2 regularization per batch.")
    151   parser.add_argument(
    152       "--tree_learning_rate",
    153       type=float,
    154       default=0.1,
    155       help=("Learning rate (shrinkage weight) with which each "
    156             "new tree is added."))
    157   parser.add_argument(
    158       "--num_trees",
    159       type=int,
    160       default=None,
    161       required=True,
    162       help="Number of trees to grow before stopping.")
    163 
    164   FLAGS, unparsed = parser.parse_known_args()
    165   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
    166