Home | History | Annotate | Download | only in utils
      1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 // =============================================================================
     15 #include "tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h"
     16 
     17 #include <sys/types.h>
     18 #include <algorithm>
     19 #include <cstdlib>
     20 #include <functional>
     21 #include <iterator>
     22 #include <unordered_set>
     23 #include <utility>
     24 
     25 #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h"  // NOLINT
     26 #include "tensorflow/core/lib/core/status_test_util.h"
     27 #include "tensorflow/core/platform/env.h"
     28 
     29 using std::unordered_set;
     30 using tensorflow::boosted_trees::learner::LearningRateDropoutDrivenConfig;
     31 using tensorflow::boosted_trees::trees::DecisionTreeEnsembleConfig;
     32 
     33 namespace tensorflow {
     34 namespace boosted_trees {
     35 namespace utils {
     36 namespace {
     37 
     38 const uint32 kSeed = 123;
     39 const int32 kNumTrees = 1000;
     40 
     41 class DropoutUtilsTest : public ::testing::Test {
     42  public:
     43   void SetUp() override {
     44     // Fill an weights.
     45     for (int i = 0; i < kNumTrees; ++i) {
     46       weights_.push_back(1.1 + 0.4 * i);
     47     }
     48   }
     49 
     50  protected:
     51   std::vector<float> weights_;
     52 };
     53 
     54 TEST_F(DropoutUtilsTest, DropoutProbabilityTest) {
     55   std::vector<int32> dropped_trees;
     56   std::vector<float> original_weights;
     57   std::unordered_set<int32> trees_not_to_drop;
     58 
     59   // Do not drop any trees
     60   {
     61     LearningRateDropoutDrivenConfig config;
     62     config.set_dropout_probability(0.0);
     63     config.set_learning_rate(1.0);
     64 
     65     TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
     66                                             weights_, &dropped_trees,
     67                                             &original_weights));
     68 
     69     // Nothing changed
     70     EXPECT_TRUE(dropped_trees.empty());
     71     EXPECT_TRUE(original_weights.empty());
     72   }
     73   // Drop out all trees
     74   {
     75     LearningRateDropoutDrivenConfig config;
     76     config.set_dropout_probability(1.0);
     77     config.set_learning_rate(1.0);
     78 
     79     TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
     80                                             weights_, &dropped_trees,
     81                                             &original_weights));
     82 
     83     // No trees left
     84     EXPECT_EQ(kNumTrees, dropped_trees.size());
     85     EXPECT_EQ(kNumTrees, original_weights.size());
     86     EXPECT_EQ(original_weights, weights_);
     87   }
     88   // 50% probability of dropping a tree
     89   {
     90     const int32 kNumRuns = 1000;
     91     LearningRateDropoutDrivenConfig config;
     92     config.set_dropout_probability(0.5);
     93     config.set_learning_rate(1.0);
     94 
     95     int32 total_num_trees = 0;
     96     for (int i = 0; i < kNumRuns; ++i) {
     97       // draw random seeds
     98       uint random_generator_seed =
     99           static_cast<uint>(Env::Default()->NowMicros());
    100       uint32 seed = rand_r(&random_generator_seed) % 100 + i;
    101       TF_EXPECT_OK(DropoutUtils::DropOutTrees(seed, config, trees_not_to_drop,
    102                                               weights_, &dropped_trees,
    103                                               &original_weights));
    104 
    105       // We would expect 400-600 trees left
    106       EXPECT_NEAR(500, kNumTrees - dropped_trees.size(), 100);
    107       total_num_trees += kNumTrees - dropped_trees.size();
    108 
    109       // Trees dropped are unique
    110       unordered_set<int32> ids;
    111       for (const auto& tree : dropped_trees) {
    112         ids.insert(tree);
    113       }
    114       EXPECT_EQ(ids.size(), dropped_trees.size());
    115     }
    116     EXPECT_NEAR(500, total_num_trees / kNumRuns, 5);
    117   }
    118 }
    119 
    120 TEST_F(DropoutUtilsTest, DropoutIgnoresNotToDropTest) {
    121   std::vector<int32> dropped_trees;
    122   std::vector<float> original_weights;
    123 
    124   // Empty do not drop set.
    125   {
    126     std::unordered_set<int32> trees_not_to_drop;
    127 
    128     LearningRateDropoutDrivenConfig config;
    129     config.set_dropout_probability(1.0);
    130     config.set_learning_rate(1.0);
    131 
    132     TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    133                                             weights_, &dropped_trees,
    134                                             &original_weights));
    135 
    136     // No trees left
    137     EXPECT_EQ(kNumTrees, dropped_trees.size());
    138     EXPECT_EQ(kNumTrees, original_weights.size());
    139     EXPECT_EQ(original_weights, weights_);
    140   }
    141 
    142   // Do not drop any trees
    143   {
    144     std::unordered_set<int32> trees_not_to_drop;
    145     for (int i = 0; i < kNumTrees; ++i) {
    146       trees_not_to_drop.insert(i);
    147     }
    148 
    149     LearningRateDropoutDrivenConfig config;
    150     config.set_dropout_probability(1.0);
    151     config.set_learning_rate(1.0);
    152 
    153     TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    154                                             weights_, &dropped_trees,
    155                                             &original_weights));
    156 
    157     // No trees were dropped - they all were in do not drop set.
    158     EXPECT_EQ(0, dropped_trees.size());
    159     EXPECT_EQ(0, original_weights.size());
    160   }
    161   // Do not drop some trees
    162   {
    163     std::unordered_set<int32> trees_not_to_drop;
    164     trees_not_to_drop.insert(0);
    165     trees_not_to_drop.insert(34);
    166 
    167     LearningRateDropoutDrivenConfig config;
    168     config.set_dropout_probability(1.0);
    169     config.set_learning_rate(1.0);
    170 
    171     TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    172                                             weights_, &dropped_trees,
    173                                             &original_weights));
    174 
    175     // No trees were dropped - they all were in do not drop set.
    176     EXPECT_EQ(kNumTrees - 2, dropped_trees.size());
    177     EXPECT_EQ(kNumTrees - 2, original_weights.size());
    178     EXPECT_TRUE(std::find(dropped_trees.begin(), dropped_trees.end(), 0) ==
    179                 dropped_trees.end());
    180     EXPECT_TRUE(std::find(dropped_trees.begin(), dropped_trees.end(), 34) ==
    181                 dropped_trees.end());
    182   }
    183 }
    184 
    185 TEST_F(DropoutUtilsTest, DropoutSeedTest) {
    186   std::unordered_set<int32> trees_not_to_drop;
    187   // Different seeds remove different trees
    188   {
    189     LearningRateDropoutDrivenConfig config;
    190     config.set_dropout_probability(0.5);
    191     config.set_learning_rate(1.0);
    192 
    193     std::vector<int32> dropped_trees_1;
    194     std::vector<float> original_weights_1;
    195     std::vector<int32> dropped_trees_2;
    196     std::vector<float> original_weights_2;
    197 
    198     DecisionTreeEnsembleConfig new_ensemble_1;
    199     DecisionTreeEnsembleConfig new_ensemble_2;
    200 
    201     TF_EXPECT_OK(DropoutUtils::DropOutTrees(
    202         kSeed + 1, config, trees_not_to_drop, weights_, &dropped_trees_1,
    203         &original_weights_1));
    204     TF_EXPECT_OK(DropoutUtils::DropOutTrees(
    205         kSeed + 2, config, trees_not_to_drop, weights_, &dropped_trees_2,
    206         &original_weights_2));
    207 
    208     EXPECT_FALSE(dropped_trees_1 == dropped_trees_2);
    209     EXPECT_FALSE(original_weights_1 == original_weights_2);
    210   }
    211   //  The same seed produces the same result
    212   {
    213     LearningRateDropoutDrivenConfig config;
    214     config.set_dropout_probability(0.5);
    215     config.set_learning_rate(1.0);
    216 
    217     std::vector<int32> dropped_trees_1;
    218     std::vector<float> original_weights_1;
    219     std::vector<int32> dropped_trees_2;
    220     std::vector<float> original_weights_2;
    221 
    222     DecisionTreeEnsembleConfig new_ensemble_1;
    223     DecisionTreeEnsembleConfig new_ensemble_2;
    224 
    225     TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    226                                             weights_, &dropped_trees_1,
    227                                             &original_weights_1));
    228     TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    229                                             weights_, &dropped_trees_2,
    230                                             &original_weights_2));
    231 
    232     EXPECT_TRUE(dropped_trees_1 == dropped_trees_2);
    233     EXPECT_TRUE(original_weights_1 == original_weights_2);
    234   }
    235 }
    236 
    237 TEST_F(DropoutUtilsTest, InvalidConfigTest) {
    238   std::vector<int32> dropped_trees;
    239   std::vector<float> original_weights;
    240   std::unordered_set<int32> trees_not_to_drop;
    241   // Negative prob
    242   {
    243     LearningRateDropoutDrivenConfig config;
    244     config.set_dropout_probability(-1.34);
    245 
    246     EXPECT_FALSE(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    247                                             weights_, &dropped_trees,
    248                                             &original_weights)
    249                      .ok());
    250   }
    251   // Larger than 1 prob of dropping a tree.
    252   {
    253     LearningRateDropoutDrivenConfig config;
    254     config.set_dropout_probability(1.34);
    255 
    256     EXPECT_FALSE(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    257                                             weights_, &dropped_trees,
    258                                             &original_weights)
    259                      .ok());
    260   }
    261   // Negative probability of skipping dropout.
    262   {
    263     LearningRateDropoutDrivenConfig config;
    264     config.set_dropout_probability(0.5);
    265     config.set_probability_of_skipping_dropout(-10);
    266 
    267     DecisionTreeEnsembleConfig new_ensemble;
    268     EXPECT_FALSE(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    269                                             weights_, &dropped_trees,
    270                                             &original_weights)
    271                      .ok());
    272   }
    273   // Larger than 1 probability of skipping dropout.
    274   {
    275     LearningRateDropoutDrivenConfig config;
    276     config.set_dropout_probability(0.5);
    277     config.set_probability_of_skipping_dropout(1.2);
    278 
    279     DecisionTreeEnsembleConfig new_ensemble;
    280     EXPECT_FALSE(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop,
    281                                             weights_, &dropped_trees,
    282                                             &original_weights)
    283                      .ok());
    284   }
    285 }
    286 namespace {
    287 
    288 void ExpectVecsEquiv(const std::vector<float>& vec1,
    289                      const std::vector<float>& vec2) {
    290   EXPECT_EQ(vec1.size(), vec2.size());
    291   for (int i = 0; i < vec1.size(); ++i) {
    292     EXPECT_NEAR(vec1[i], vec2[i], 1e-3);
    293   }
    294 }
    295 
    296 std::vector<float> GetWeightsByIndex(const std::vector<float>& weights,
    297                                      const std::vector<int>& indices) {
    298   std::vector<float> res;
    299   res.reserve(indices.size());
    300   for (const int index : indices) {
    301     res.push_back(weights[index]);
    302   }
    303   return res;
    304 }
    305 
    306 void MergeLastElements(const int32 last_n, std::vector<float>* weights) {
    307   float sum = 0.0;
    308   for (int i = 0; i < last_n; ++i) {
    309     sum += weights->back();
    310     weights->pop_back();
    311   }
    312   weights->push_back(sum);
    313 }
    314 
    315 }  // namespace
    316 
    317 TEST_F(DropoutUtilsTest, GetTreesWeightsForAddingTreesTest) {
    318   // Adding trees should give the same res in any order
    319   {
    320     std::vector<float> weights = {1.0, 1.0, 1.0, 1.0, 1.0};
    321     std::vector<int32> dropped_1 = {0, 3};
    322 
    323     std::vector<int32> dropped_2 = {0};
    324 
    325     std::vector<float> res_1;
    326     std::vector<float> res_2;
    327     // Do one order
    328     {
    329       std::vector<float> current_weights = weights;
    330       std::vector<int32> num_updates =
    331           std::vector<int32>(current_weights.size(), 1);
    332       DropoutUtils::GetTreesWeightsForAddingTrees(
    333           dropped_1, GetWeightsByIndex(current_weights, dropped_1),
    334           current_weights.size(), 1, &current_weights, &num_updates);
    335       DropoutUtils::GetTreesWeightsForAddingTrees(
    336           dropped_2, GetWeightsByIndex(current_weights, dropped_2),
    337           current_weights.size(), 1, &current_weights, &num_updates);
    338       res_1 = current_weights;
    339     }
    340     // Do another order
    341     {
    342       std::vector<float> current_weights = weights;
    343       std::vector<int32> num_updates =
    344           std::vector<int32>(current_weights.size(), 1);
    345 
    346       DropoutUtils::GetTreesWeightsForAddingTrees(
    347           dropped_2, GetWeightsByIndex(current_weights, dropped_2),
    348           current_weights.size(), 1, &current_weights, &num_updates);
    349       DropoutUtils::GetTreesWeightsForAddingTrees(
    350           dropped_1, GetWeightsByIndex(current_weights, dropped_1),
    351           current_weights.size(), 1, &current_weights, &num_updates);
    352       res_2 = current_weights;
    353     }
    354     // The vectors are the same, but the last two elements have the same sum.
    355     EXPECT_EQ(res_1.size(), 7);
    356     EXPECT_EQ(res_2.size(), 7);
    357 
    358     MergeLastElements(2, &res_1);
    359     MergeLastElements(2, &res_2);
    360 
    361     EXPECT_EQ(res_1, res_2);
    362   }
    363   // Now when the weights are not all 1s
    364   {
    365     std::vector<float> weights = {1.1, 2.1, 3.1, 4.1, 5.1};
    366     std::vector<int32> dropped_1 = {0, 3};
    367 
    368     std::vector<int32> dropped_2 = {0};
    369 
    370     std::vector<float> res_1;
    371     std::vector<float> res_2;
    372     // Do one order
    373     {
    374       std::vector<float> current_weights = weights;
    375       std::vector<int32> num_updates =
    376           std::vector<int32>(current_weights.size(), 1);
    377       DropoutUtils::GetTreesWeightsForAddingTrees(
    378           dropped_1, GetWeightsByIndex(current_weights, dropped_1),
    379           current_weights.size(), 1, &current_weights, &num_updates);
    380       DropoutUtils::GetTreesWeightsForAddingTrees(
    381           dropped_2, GetWeightsByIndex(current_weights, dropped_2),
    382           current_weights.size(), 1, &current_weights, &num_updates);
    383       res_1 = current_weights;
    384     }
    385     // Do another order
    386     {
    387       std::vector<float> current_weights = weights;
    388       std::vector<int32> num_updates =
    389           std::vector<int32>(current_weights.size(), 1);
    390       DropoutUtils::GetTreesWeightsForAddingTrees(
    391           dropped_2, GetWeightsByIndex(current_weights, dropped_2),
    392           current_weights.size(), 1, &current_weights, &num_updates);
    393       DropoutUtils::GetTreesWeightsForAddingTrees(
    394           dropped_1, GetWeightsByIndex(current_weights, dropped_1),
    395           current_weights.size(), 1, &current_weights, &num_updates);
    396       res_2 = current_weights;
    397     }
    398     EXPECT_EQ(res_1.size(), 7);
    399     EXPECT_EQ(res_2.size(), 7);
    400 
    401     // The vectors are the same, but the last two elements have the same sum.
    402     MergeLastElements(2, &res_1);
    403     MergeLastElements(2, &res_2);
    404 
    405     ExpectVecsEquiv(res_1, res_2);
    406   }
    407 }
    408 
    409 TEST_F(DropoutUtilsTest, GetTreesWeightsForAddingTreesIndexTest) {
    410   std::vector<float> weights = {1.0, 1.0, 1.0, 1.0, 1.0};
    411   std::vector<int32> dropped = {0, 3};
    412 
    413   std::vector<float> res;
    414   std::vector<float> res_2;
    415 
    416   // The tree that is added does not yet have an entry in weights vector.
    417   {
    418     std::vector<float> current_weights = weights;
    419     std::vector<int32> num_updates =
    420         std::vector<int32>(current_weights.size(), 1);
    421     DropoutUtils::GetTreesWeightsForAddingTrees(
    422         dropped, GetWeightsByIndex(current_weights, dropped),
    423         current_weights.size(), 1, &current_weights, &num_updates);
    424     EXPECT_EQ(current_weights.size(), weights.size() + 1);
    425     EXPECT_EQ(num_updates.size(), weights.size() + 1);
    426 
    427     std::vector<int32> expected_num_updates = {2, 1, 1, 2, 1, 1};
    428     std::vector<float> expected_weights = {2.0 / 3, 1, 1, 2.0 / 3, 1, 2.0 / 3};
    429     EXPECT_EQ(expected_weights, current_weights);
    430     EXPECT_EQ(expected_num_updates, num_updates);
    431   }
    432   // The tree that is added has already an entry in weights and updates (batch
    433   // mode).
    434   {
    435     std::vector<float> current_weights = weights;
    436     std::vector<int32> num_updates =
    437         std::vector<int32>(current_weights.size(), 1);
    438     DropoutUtils::GetTreesWeightsForAddingTrees(
    439         dropped, GetWeightsByIndex(current_weights, dropped),
    440         current_weights.size() - 1, 1, &current_weights, &num_updates);
    441     EXPECT_EQ(current_weights.size(), weights.size());
    442     EXPECT_EQ(num_updates.size(), weights.size());
    443 
    444     std::vector<int32> expected_num_updates = {2, 1, 1, 2, 2};
    445     std::vector<float> expected_weights = {2.0 / 3, 1, 1, 2.0 / 3, 2.0 / 3};
    446     EXPECT_EQ(expected_weights, current_weights);
    447     EXPECT_EQ(expected_num_updates, num_updates);
    448   }
    449 }
    450 
    451 }  // namespace
    452 }  // namespace utils
    453 }  // namespace boosted_trees
    454 }  // namespace tensorflow
    455