1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog (at) gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #define EIGEN_USE_THREADS 11 12 13 #include "main.h" 14 #include <iostream> 15 #include <Eigen/CXX11/Tensor> 16 17 using Eigen::Tensor; 18 19 20 void test_multithread_elementwise() 21 { 22 Tensor<float, 3> in1(2,3,7); 23 Tensor<float, 3> in2(2,3,7); 24 Tensor<float, 3> out(2,3,7); 25 26 in1.setRandom(); 27 in2.setRandom(); 28 29 Eigen::ThreadPool tp(internal::random<int>(3, 11)); 30 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11)); 31 out.device(thread_pool_device) = in1 + in2 * 3.14f; 32 33 for (int i = 0; i < 2; ++i) { 34 for (int j = 0; j < 3; ++j) { 35 for (int k = 0; k < 7; ++k) { 36 VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f); 37 } 38 } 39 } 40 } 41 42 43 void test_multithread_compound_assignment() 44 { 45 Tensor<float, 3> in1(2,3,7); 46 Tensor<float, 3> in2(2,3,7); 47 Tensor<float, 3> out(2,3,7); 48 49 in1.setRandom(); 50 in2.setRandom(); 51 52 Eigen::ThreadPool tp(internal::random<int>(3, 11)); 53 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11)); 54 out.device(thread_pool_device) = in1; 55 out.device(thread_pool_device) += in2 * 3.14f; 56 57 for (int i = 0; i < 2; ++i) { 58 for (int j = 0; j < 3; ++j) { 59 for (int k = 0; k < 7; ++k) { 60 VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f); 61 } 62 } 63 } 64 } 65 66 template<int DataLayout> 67 void test_multithread_contraction() 68 { 69 Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31); 70 Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10); 71 Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10); 72 73 t_left.setRandom(); 74 t_right.setRandom(); 75 76 // this contraction should be equivalent to a single matrix multiplication 77 typedef Tensor<float, 1>::DimensionPair DimPair; 78 Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}}); 79 80 typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf; 81 MapXf m_left(t_left.data(), 1500, 1147); 82 MapXf m_right(t_right.data(), 1147, 1400); 83 Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400); 84 85 Eigen::ThreadPool tp(4); 86 Eigen::ThreadPoolDevice thread_pool_device(&tp, 4); 87 88 // compute results by separate methods 89 t_result.device(thread_pool_device) = t_left.contract(t_right, dims); 90 m_result = m_left * m_right; 91 92 for (ptrdiff_t i = 0; i < t_result.size(); i++) { 93 VERIFY(&t_result.data()[i] != &m_result.data()[i]); 94 if (fabsf(t_result(i) - m_result(i)) < 1e-4f) { 95 continue; 96 } 97 if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) { 98 continue; 99 } 100 std::cout << "mismatch detected at index " << i << ": " << t_result(i) 101 << " vs " << m_result(i) << std::endl; 102 assert(false); 103 } 104 } 105 106 template<int DataLayout> 107 void test_contraction_corner_cases() 108 { 109 Tensor<float, 2, DataLayout> t_left(32, 500); 110 Tensor<float, 2, DataLayout> t_right(32, 28*28); 111 Tensor<float, 2, DataLayout> t_result(500, 28*28); 112 113 t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; 114 t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; 115 t_result = t_result.constant(NAN); 116 117 // this contraction should be equivalent to a single matrix multiplication 118 typedef Tensor<float, 1>::DimensionPair DimPair; 119 Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}}; 120 121 typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf; 122 MapXf m_left(t_left.data(), 32, 500); 123 MapXf m_right(t_right.data(), 32, 28*28); 124 Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28*28); 125 126 Eigen::ThreadPool tp(12); 127 Eigen::ThreadPoolDevice thread_pool_device(&tp, 12); 128 129 // compute results by separate methods 130 t_result.device(thread_pool_device) = t_left.contract(t_right, dims); 131 m_result = m_left.transpose() * m_right; 132 133 for (ptrdiff_t i = 0; i < t_result.size(); i++) { 134 assert(!(numext::isnan)(t_result.data()[i])); 135 if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { 136 std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; 137 assert(false); 138 } 139 } 140 141 t_left.resize(32, 1); 142 t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; 143 t_result.resize (1, 28*28); 144 t_result = t_result.constant(NAN); 145 t_result.device(thread_pool_device) = t_left.contract(t_right, dims); 146 new(&m_left) MapXf(t_left.data(), 32, 1); 147 m_result = m_left.transpose() * m_right; 148 for (ptrdiff_t i = 0; i < t_result.size(); i++) { 149 assert(!(numext::isnan)(t_result.data()[i])); 150 if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { 151 std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; 152 assert(false); 153 } 154 } 155 156 t_left.resize(32, 500); 157 t_right.resize(32, 4); 158 t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; 159 t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; 160 t_result.resize (500, 4); 161 t_result = t_result.constant(NAN); 162 t_result.device(thread_pool_device) = t_left.contract(t_right, dims); 163 new(&m_left) MapXf(t_left.data(), 32, 500); 164 new(&m_right) MapXf(t_right.data(), 32, 4); 165 m_result = m_left.transpose() * m_right; 166 for (ptrdiff_t i = 0; i < t_result.size(); i++) { 167 assert(!(numext::isnan)(t_result.data()[i])); 168 if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { 169 std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; 170 assert(false); 171 } 172 } 173 174 t_left.resize(32, 1); 175 t_right.resize(32, 4); 176 t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; 177 t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; 178 t_result.resize (1, 4); 179 t_result = t_result.constant(NAN); 180 t_result.device(thread_pool_device) = t_left.contract(t_right, dims); 181 new(&m_left) MapXf(t_left.data(), 32, 1); 182 new(&m_right) MapXf(t_right.data(), 32, 4); 183 m_result = m_left.transpose() * m_right; 184 for (ptrdiff_t i = 0; i < t_result.size(); i++) { 185 assert(!(numext::isnan)(t_result.data()[i])); 186 if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { 187 std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; 188 assert(false); 189 } 190 } 191 } 192 193 template<int DataLayout> 194 void test_multithread_contraction_agrees_with_singlethread() { 195 int contract_size = internal::random<int>(1, 5000); 196 197 Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80), 198 contract_size, 199 internal::random<int>(1, 100)); 200 201 Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25), 202 internal::random<int>(1, 37), 203 contract_size, 204 internal::random<int>(1, 51)); 205 206 left.setRandom(); 207 right.setRandom(); 208 209 // add constants to shift values away from 0 for more precision 210 left += left.constant(1.5f); 211 right += right.constant(1.5f); 212 213 typedef Tensor<float, 1>::DimensionPair DimPair; 214 Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}}); 215 216 Eigen::ThreadPool tp(internal::random<int>(2, 11)); 217 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11)); 218 219 Tensor<float, 5, DataLayout> st_result; 220 st_result = left.contract(right, dims); 221 222 Tensor<float, 5, DataLayout> tp_result(st_result.dimensions()); 223 tp_result.device(thread_pool_device) = left.contract(right, dims); 224 225 VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions())); 226 for (ptrdiff_t i = 0; i < st_result.size(); i++) { 227 // if both of the values are very small, then do nothing (because the test will fail 228 // due to numerical precision issues when values are small) 229 if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) { 230 VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]); 231 } 232 } 233 } 234 235 236 template<int DataLayout> 237 void test_full_contraction() { 238 int contract_size1 = internal::random<int>(1, 500); 239 int contract_size2 = internal::random<int>(1, 500); 240 241 Tensor<float, 2, DataLayout> left(contract_size1, 242 contract_size2); 243 Tensor<float, 2, DataLayout> right(contract_size1, 244 contract_size2); 245 left.setRandom(); 246 right.setRandom(); 247 248 // add constants to shift values away from 0 for more precision 249 left += left.constant(1.5f); 250 right += right.constant(1.5f); 251 252 typedef Tensor<float, 2>::DimensionPair DimPair; 253 Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}}); 254 255 Eigen::ThreadPool tp(internal::random<int>(2, 11)); 256 Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11)); 257 258 Tensor<float, 0, DataLayout> st_result; 259 st_result = left.contract(right, dims); 260 261 Tensor<float, 0, DataLayout> tp_result; 262 tp_result.device(thread_pool_device) = left.contract(right, dims); 263 264 VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions())); 265 // if both of the values are very small, then do nothing (because the test will fail 266 // due to numerical precision issues when values are small) 267 if (numext::abs(st_result() - tp_result()) >= 1e-4f) { 268 VERIFY_IS_APPROX(st_result(), tp_result()); 269 } 270 } 271 272 template<int DataLayout> 273 void test_multithreaded_reductions() { 274 const int num_threads = internal::random<int>(3, 11); 275 ThreadPool thread_pool(num_threads); 276 Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads); 277 278 const int num_rows = internal::random<int>(13, 732); 279 const int num_cols = internal::random<int>(13, 732); 280 Tensor<float, 2, DataLayout> t1(num_rows, num_cols); 281 t1.setRandom(); 282 283 Tensor<float, 0, DataLayout> full_redux; 284 full_redux = t1.sum(); 285 286 Tensor<float, 0, DataLayout> full_redux_tp; 287 full_redux_tp.device(thread_pool_device) = t1.sum(); 288 289 // Check that the single threaded and the multi threaded reductions return 290 // the same result. 291 VERIFY_IS_APPROX(full_redux(), full_redux_tp()); 292 } 293 294 295 void test_memcpy() { 296 297 for (int i = 0; i < 5; ++i) { 298 const int num_threads = internal::random<int>(3, 11); 299 Eigen::ThreadPool tp(num_threads); 300 Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads); 301 302 const int size = internal::random<int>(13, 7632); 303 Tensor<float, 1> t1(size); 304 t1.setRandom(); 305 std::vector<float> result(size); 306 thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float)); 307 for (int j = 0; j < size; j++) { 308 VERIFY_IS_EQUAL(t1(j), result[j]); 309 } 310 } 311 } 312 313 314 void test_multithread_random() 315 { 316 Eigen::ThreadPool tp(2); 317 Eigen::ThreadPoolDevice device(&tp, 2); 318 Tensor<float, 1> t(1 << 20); 319 t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>(); 320 } 321 322 template<int DataLayout> 323 void test_multithread_shuffle() 324 { 325 Tensor<float, 4, DataLayout> tensor(17,5,7,11); 326 tensor.setRandom(); 327 328 const int num_threads = internal::random<int>(2, 11); 329 ThreadPool threads(num_threads); 330 Eigen::ThreadPoolDevice device(&threads, num_threads); 331 332 Tensor<float, 4, DataLayout> shuffle(7,5,11,17); 333 array<ptrdiff_t, 4> shuffles = {{2,1,3,0}}; 334 shuffle.device(device) = tensor.shuffle(shuffles); 335 336 for (int i = 0; i < 17; ++i) { 337 for (int j = 0; j < 5; ++j) { 338 for (int k = 0; k < 7; ++k) { 339 for (int l = 0; l < 11; ++l) { 340 VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i)); 341 } 342 } 343 } 344 } 345 } 346 347 348 void test_cxx11_tensor_thread_pool() 349 { 350 CALL_SUBTEST_1(test_multithread_elementwise()); 351 CALL_SUBTEST_1(test_multithread_compound_assignment()); 352 353 CALL_SUBTEST_2(test_multithread_contraction<ColMajor>()); 354 CALL_SUBTEST_2(test_multithread_contraction<RowMajor>()); 355 356 CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>()); 357 CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>()); 358 359 // Exercise various cases that have been problematic in the past. 360 CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>()); 361 CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>()); 362 363 CALL_SUBTEST_4(test_full_contraction<ColMajor>()); 364 CALL_SUBTEST_4(test_full_contraction<RowMajor>()); 365 366 CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>()); 367 CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>()); 368 369 CALL_SUBTEST_6(test_memcpy()); 370 CALL_SUBTEST_6(test_multithread_random()); 371 CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>()); 372 CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>()); 373 } 374