1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // ============================================================================= 15 #include "tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h" 16 17 #include <sys/types.h> 18 #include <algorithm> 19 #include <cstdlib> 20 #include <functional> 21 #include <iterator> 22 #include <unordered_set> 23 #include <utility> 24 25 #include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h" // NOLINT 26 #include "tensorflow/core/lib/core/status_test_util.h" 27 #include "tensorflow/core/platform/env.h" 28 29 using std::unordered_set; 30 using tensorflow::boosted_trees::learner::LearningRateDropoutDrivenConfig; 31 using tensorflow::boosted_trees::trees::DecisionTreeEnsembleConfig; 32 33 namespace tensorflow { 34 namespace boosted_trees { 35 namespace utils { 36 namespace { 37 38 const uint32 kSeed = 123; 39 const int32 kNumTrees = 1000; 40 41 class DropoutUtilsTest : public ::testing::Test { 42 public: 43 void SetUp() override { 44 // Fill an weights. 45 for (int i = 0; i < kNumTrees; ++i) { 46 weights_.push_back(1.1 + 0.4 * i); 47 } 48 } 49 50 protected: 51 std::vector<float> weights_; 52 }; 53 54 TEST_F(DropoutUtilsTest, DropoutProbabilityTest) { 55 std::vector<int32> dropped_trees; 56 std::vector<float> original_weights; 57 std::unordered_set<int32> trees_not_to_drop; 58 59 // Do not drop any trees 60 { 61 LearningRateDropoutDrivenConfig config; 62 config.set_dropout_probability(0.0); 63 config.set_learning_rate(1.0); 64 65 TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 66 weights_, &dropped_trees, 67 &original_weights)); 68 69 // Nothing changed 70 EXPECT_TRUE(dropped_trees.empty()); 71 EXPECT_TRUE(original_weights.empty()); 72 } 73 // Drop out all trees 74 { 75 LearningRateDropoutDrivenConfig config; 76 config.set_dropout_probability(1.0); 77 config.set_learning_rate(1.0); 78 79 TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 80 weights_, &dropped_trees, 81 &original_weights)); 82 83 // No trees left 84 EXPECT_EQ(kNumTrees, dropped_trees.size()); 85 EXPECT_EQ(kNumTrees, original_weights.size()); 86 EXPECT_EQ(original_weights, weights_); 87 } 88 // 50% probability of dropping a tree 89 { 90 const int32 kNumRuns = 1000; 91 LearningRateDropoutDrivenConfig config; 92 config.set_dropout_probability(0.5); 93 config.set_learning_rate(1.0); 94 95 int32 total_num_trees = 0; 96 for (int i = 0; i < kNumRuns; ++i) { 97 // draw random seeds 98 uint random_generator_seed = 99 static_cast<uint>(Env::Default()->NowMicros()); 100 uint32 seed = rand_r(&random_generator_seed) % 100 + i; 101 TF_EXPECT_OK(DropoutUtils::DropOutTrees(seed, config, trees_not_to_drop, 102 weights_, &dropped_trees, 103 &original_weights)); 104 105 // We would expect 400-600 trees left 106 EXPECT_NEAR(500, kNumTrees - dropped_trees.size(), 100); 107 total_num_trees += kNumTrees - dropped_trees.size(); 108 109 // Trees dropped are unique 110 unordered_set<int32> ids; 111 for (const auto& tree : dropped_trees) { 112 ids.insert(tree); 113 } 114 EXPECT_EQ(ids.size(), dropped_trees.size()); 115 } 116 EXPECT_NEAR(500, total_num_trees / kNumRuns, 5); 117 } 118 } 119 120 TEST_F(DropoutUtilsTest, DropoutIgnoresNotToDropTest) { 121 std::vector<int32> dropped_trees; 122 std::vector<float> original_weights; 123 124 // Empty do not drop set. 125 { 126 std::unordered_set<int32> trees_not_to_drop; 127 128 LearningRateDropoutDrivenConfig config; 129 config.set_dropout_probability(1.0); 130 config.set_learning_rate(1.0); 131 132 TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 133 weights_, &dropped_trees, 134 &original_weights)); 135 136 // No trees left 137 EXPECT_EQ(kNumTrees, dropped_trees.size()); 138 EXPECT_EQ(kNumTrees, original_weights.size()); 139 EXPECT_EQ(original_weights, weights_); 140 } 141 142 // Do not drop any trees 143 { 144 std::unordered_set<int32> trees_not_to_drop; 145 for (int i = 0; i < kNumTrees; ++i) { 146 trees_not_to_drop.insert(i); 147 } 148 149 LearningRateDropoutDrivenConfig config; 150 config.set_dropout_probability(1.0); 151 config.set_learning_rate(1.0); 152 153 TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 154 weights_, &dropped_trees, 155 &original_weights)); 156 157 // No trees were dropped - they all were in do not drop set. 158 EXPECT_EQ(0, dropped_trees.size()); 159 EXPECT_EQ(0, original_weights.size()); 160 } 161 // Do not drop some trees 162 { 163 std::unordered_set<int32> trees_not_to_drop; 164 trees_not_to_drop.insert(0); 165 trees_not_to_drop.insert(34); 166 167 LearningRateDropoutDrivenConfig config; 168 config.set_dropout_probability(1.0); 169 config.set_learning_rate(1.0); 170 171 TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 172 weights_, &dropped_trees, 173 &original_weights)); 174 175 // No trees were dropped - they all were in do not drop set. 176 EXPECT_EQ(kNumTrees - 2, dropped_trees.size()); 177 EXPECT_EQ(kNumTrees - 2, original_weights.size()); 178 EXPECT_TRUE(std::find(dropped_trees.begin(), dropped_trees.end(), 0) == 179 dropped_trees.end()); 180 EXPECT_TRUE(std::find(dropped_trees.begin(), dropped_trees.end(), 34) == 181 dropped_trees.end()); 182 } 183 } 184 185 TEST_F(DropoutUtilsTest, DropoutSeedTest) { 186 std::unordered_set<int32> trees_not_to_drop; 187 // Different seeds remove different trees 188 { 189 LearningRateDropoutDrivenConfig config; 190 config.set_dropout_probability(0.5); 191 config.set_learning_rate(1.0); 192 193 std::vector<int32> dropped_trees_1; 194 std::vector<float> original_weights_1; 195 std::vector<int32> dropped_trees_2; 196 std::vector<float> original_weights_2; 197 198 DecisionTreeEnsembleConfig new_ensemble_1; 199 DecisionTreeEnsembleConfig new_ensemble_2; 200 201 TF_EXPECT_OK(DropoutUtils::DropOutTrees( 202 kSeed + 1, config, trees_not_to_drop, weights_, &dropped_trees_1, 203 &original_weights_1)); 204 TF_EXPECT_OK(DropoutUtils::DropOutTrees( 205 kSeed + 2, config, trees_not_to_drop, weights_, &dropped_trees_2, 206 &original_weights_2)); 207 208 EXPECT_FALSE(dropped_trees_1 == dropped_trees_2); 209 EXPECT_FALSE(original_weights_1 == original_weights_2); 210 } 211 // The same seed produces the same result 212 { 213 LearningRateDropoutDrivenConfig config; 214 config.set_dropout_probability(0.5); 215 config.set_learning_rate(1.0); 216 217 std::vector<int32> dropped_trees_1; 218 std::vector<float> original_weights_1; 219 std::vector<int32> dropped_trees_2; 220 std::vector<float> original_weights_2; 221 222 DecisionTreeEnsembleConfig new_ensemble_1; 223 DecisionTreeEnsembleConfig new_ensemble_2; 224 225 TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 226 weights_, &dropped_trees_1, 227 &original_weights_1)); 228 TF_EXPECT_OK(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 229 weights_, &dropped_trees_2, 230 &original_weights_2)); 231 232 EXPECT_TRUE(dropped_trees_1 == dropped_trees_2); 233 EXPECT_TRUE(original_weights_1 == original_weights_2); 234 } 235 } 236 237 TEST_F(DropoutUtilsTest, InvalidConfigTest) { 238 std::vector<int32> dropped_trees; 239 std::vector<float> original_weights; 240 std::unordered_set<int32> trees_not_to_drop; 241 // Negative prob 242 { 243 LearningRateDropoutDrivenConfig config; 244 config.set_dropout_probability(-1.34); 245 246 EXPECT_FALSE(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 247 weights_, &dropped_trees, 248 &original_weights) 249 .ok()); 250 } 251 // Larger than 1 prob of dropping a tree. 252 { 253 LearningRateDropoutDrivenConfig config; 254 config.set_dropout_probability(1.34); 255 256 EXPECT_FALSE(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 257 weights_, &dropped_trees, 258 &original_weights) 259 .ok()); 260 } 261 // Negative probability of skipping dropout. 262 { 263 LearningRateDropoutDrivenConfig config; 264 config.set_dropout_probability(0.5); 265 config.set_probability_of_skipping_dropout(-10); 266 267 DecisionTreeEnsembleConfig new_ensemble; 268 EXPECT_FALSE(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 269 weights_, &dropped_trees, 270 &original_weights) 271 .ok()); 272 } 273 // Larger than 1 probability of skipping dropout. 274 { 275 LearningRateDropoutDrivenConfig config; 276 config.set_dropout_probability(0.5); 277 config.set_probability_of_skipping_dropout(1.2); 278 279 DecisionTreeEnsembleConfig new_ensemble; 280 EXPECT_FALSE(DropoutUtils::DropOutTrees(kSeed, config, trees_not_to_drop, 281 weights_, &dropped_trees, 282 &original_weights) 283 .ok()); 284 } 285 } 286 namespace { 287 288 void ExpectVecsEquiv(const std::vector<float>& vec1, 289 const std::vector<float>& vec2) { 290 EXPECT_EQ(vec1.size(), vec2.size()); 291 for (int i = 0; i < vec1.size(); ++i) { 292 EXPECT_NEAR(vec1[i], vec2[i], 1e-3); 293 } 294 } 295 296 std::vector<float> GetWeightsByIndex(const std::vector<float>& weights, 297 const std::vector<int>& indices) { 298 std::vector<float> res; 299 res.reserve(indices.size()); 300 for (const int index : indices) { 301 res.push_back(weights[index]); 302 } 303 return res; 304 } 305 306 void MergeLastElements(const int32 last_n, std::vector<float>* weights) { 307 float sum = 0.0; 308 for (int i = 0; i < last_n; ++i) { 309 sum += weights->back(); 310 weights->pop_back(); 311 } 312 weights->push_back(sum); 313 } 314 315 } // namespace 316 317 TEST_F(DropoutUtilsTest, GetTreesWeightsForAddingTreesTest) { 318 // Adding trees should give the same res in any order 319 { 320 std::vector<float> weights = {1.0, 1.0, 1.0, 1.0, 1.0}; 321 std::vector<int32> dropped_1 = {0, 3}; 322 323 std::vector<int32> dropped_2 = {0}; 324 325 std::vector<float> res_1; 326 std::vector<float> res_2; 327 // Do one order 328 { 329 std::vector<float> current_weights = weights; 330 std::vector<int32> num_updates = 331 std::vector<int32>(current_weights.size(), 1); 332 DropoutUtils::GetTreesWeightsForAddingTrees( 333 dropped_1, GetWeightsByIndex(current_weights, dropped_1), 334 current_weights.size(), 1, ¤t_weights, &num_updates); 335 DropoutUtils::GetTreesWeightsForAddingTrees( 336 dropped_2, GetWeightsByIndex(current_weights, dropped_2), 337 current_weights.size(), 1, ¤t_weights, &num_updates); 338 res_1 = current_weights; 339 } 340 // Do another order 341 { 342 std::vector<float> current_weights = weights; 343 std::vector<int32> num_updates = 344 std::vector<int32>(current_weights.size(), 1); 345 346 DropoutUtils::GetTreesWeightsForAddingTrees( 347 dropped_2, GetWeightsByIndex(current_weights, dropped_2), 348 current_weights.size(), 1, ¤t_weights, &num_updates); 349 DropoutUtils::GetTreesWeightsForAddingTrees( 350 dropped_1, GetWeightsByIndex(current_weights, dropped_1), 351 current_weights.size(), 1, ¤t_weights, &num_updates); 352 res_2 = current_weights; 353 } 354 // The vectors are the same, but the last two elements have the same sum. 355 EXPECT_EQ(res_1.size(), 7); 356 EXPECT_EQ(res_2.size(), 7); 357 358 MergeLastElements(2, &res_1); 359 MergeLastElements(2, &res_2); 360 361 EXPECT_EQ(res_1, res_2); 362 } 363 // Now when the weights are not all 1s 364 { 365 std::vector<float> weights = {1.1, 2.1, 3.1, 4.1, 5.1}; 366 std::vector<int32> dropped_1 = {0, 3}; 367 368 std::vector<int32> dropped_2 = {0}; 369 370 std::vector<float> res_1; 371 std::vector<float> res_2; 372 // Do one order 373 { 374 std::vector<float> current_weights = weights; 375 std::vector<int32> num_updates = 376 std::vector<int32>(current_weights.size(), 1); 377 DropoutUtils::GetTreesWeightsForAddingTrees( 378 dropped_1, GetWeightsByIndex(current_weights, dropped_1), 379 current_weights.size(), 1, ¤t_weights, &num_updates); 380 DropoutUtils::GetTreesWeightsForAddingTrees( 381 dropped_2, GetWeightsByIndex(current_weights, dropped_2), 382 current_weights.size(), 1, ¤t_weights, &num_updates); 383 res_1 = current_weights; 384 } 385 // Do another order 386 { 387 std::vector<float> current_weights = weights; 388 std::vector<int32> num_updates = 389 std::vector<int32>(current_weights.size(), 1); 390 DropoutUtils::GetTreesWeightsForAddingTrees( 391 dropped_2, GetWeightsByIndex(current_weights, dropped_2), 392 current_weights.size(), 1, ¤t_weights, &num_updates); 393 DropoutUtils::GetTreesWeightsForAddingTrees( 394 dropped_1, GetWeightsByIndex(current_weights, dropped_1), 395 current_weights.size(), 1, ¤t_weights, &num_updates); 396 res_2 = current_weights; 397 } 398 EXPECT_EQ(res_1.size(), 7); 399 EXPECT_EQ(res_2.size(), 7); 400 401 // The vectors are the same, but the last two elements have the same sum. 402 MergeLastElements(2, &res_1); 403 MergeLastElements(2, &res_2); 404 405 ExpectVecsEquiv(res_1, res_2); 406 } 407 } 408 409 TEST_F(DropoutUtilsTest, GetTreesWeightsForAddingTreesIndexTest) { 410 std::vector<float> weights = {1.0, 1.0, 1.0, 1.0, 1.0}; 411 std::vector<int32> dropped = {0, 3}; 412 413 std::vector<float> res; 414 std::vector<float> res_2; 415 416 // The tree that is added does not yet have an entry in weights vector. 417 { 418 std::vector<float> current_weights = weights; 419 std::vector<int32> num_updates = 420 std::vector<int32>(current_weights.size(), 1); 421 DropoutUtils::GetTreesWeightsForAddingTrees( 422 dropped, GetWeightsByIndex(current_weights, dropped), 423 current_weights.size(), 1, ¤t_weights, &num_updates); 424 EXPECT_EQ(current_weights.size(), weights.size() + 1); 425 EXPECT_EQ(num_updates.size(), weights.size() + 1); 426 427 std::vector<int32> expected_num_updates = {2, 1, 1, 2, 1, 1}; 428 std::vector<float> expected_weights = {2.0 / 3, 1, 1, 2.0 / 3, 1, 2.0 / 3}; 429 EXPECT_EQ(expected_weights, current_weights); 430 EXPECT_EQ(expected_num_updates, num_updates); 431 } 432 // The tree that is added has already an entry in weights and updates (batch 433 // mode). 434 { 435 std::vector<float> current_weights = weights; 436 std::vector<int32> num_updates = 437 std::vector<int32>(current_weights.size(), 1); 438 DropoutUtils::GetTreesWeightsForAddingTrees( 439 dropped, GetWeightsByIndex(current_weights, dropped), 440 current_weights.size() - 1, 1, ¤t_weights, &num_updates); 441 EXPECT_EQ(current_weights.size(), weights.size()); 442 EXPECT_EQ(num_updates.size(), weights.size()); 443 444 std::vector<int32> expected_num_updates = {2, 1, 1, 2, 2}; 445 std::vector<float> expected_weights = {2.0 / 3, 1, 1, 2.0 / 3, 2.0 / 3}; 446 EXPECT_EQ(expected_weights, current_weights); 447 EXPECT_EQ(expected_num_updates, num_updates); 448 } 449 } 450 451 } // namespace 452 } // namespace utils 453 } // namespace boosted_trees 454 } // namespace tensorflow 455