Home | History | Annotate | Download | only in test
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog (at) gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #define EIGEN_USE_THREADS
     11 
     12 
     13 #include "main.h"
     14 #include <iostream>
     15 #include <Eigen/CXX11/Tensor>
     16 
     17 using Eigen::Tensor;
     18 
     19 
     20 void test_multithread_elementwise()
     21 {
     22   Tensor<float, 3> in1(2,3,7);
     23   Tensor<float, 3> in2(2,3,7);
     24   Tensor<float, 3> out(2,3,7);
     25 
     26   in1.setRandom();
     27   in2.setRandom();
     28 
     29   Eigen::ThreadPool tp(internal::random<int>(3, 11));
     30   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
     31   out.device(thread_pool_device) = in1 + in2 * 3.14f;
     32 
     33   for (int i = 0; i < 2; ++i) {
     34     for (int j = 0; j < 3; ++j) {
     35       for (int k = 0; k < 7; ++k) {
     36         VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
     37       }
     38     }
     39   }
     40 }
     41 
     42 
     43 void test_multithread_compound_assignment()
     44 {
     45   Tensor<float, 3> in1(2,3,7);
     46   Tensor<float, 3> in2(2,3,7);
     47   Tensor<float, 3> out(2,3,7);
     48 
     49   in1.setRandom();
     50   in2.setRandom();
     51 
     52   Eigen::ThreadPool tp(internal::random<int>(3, 11));
     53   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
     54   out.device(thread_pool_device) = in1;
     55   out.device(thread_pool_device) += in2 * 3.14f;
     56 
     57   for (int i = 0; i < 2; ++i) {
     58     for (int j = 0; j < 3; ++j) {
     59       for (int k = 0; k < 7; ++k) {
     60         VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
     61       }
     62     }
     63   }
     64 }
     65 
     66 template<int DataLayout>
     67 void test_multithread_contraction()
     68 {
     69   Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31);
     70   Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10);
     71   Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10);
     72 
     73   t_left.setRandom();
     74   t_right.setRandom();
     75 
     76   // this contraction should be equivalent to a single matrix multiplication
     77   typedef Tensor<float, 1>::DimensionPair DimPair;
     78   Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
     79 
     80   typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
     81   MapXf m_left(t_left.data(), 1500, 1147);
     82   MapXf m_right(t_right.data(), 1147, 1400);
     83   Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
     84 
     85   Eigen::ThreadPool tp(4);
     86   Eigen::ThreadPoolDevice thread_pool_device(&tp, 4);
     87 
     88   // compute results by separate methods
     89   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
     90   m_result = m_left * m_right;
     91 
     92  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
     93     VERIFY(&t_result.data()[i] != &m_result.data()[i]);
     94     if (fabsf(t_result(i) - m_result(i)) < 1e-4f) {
     95       continue;
     96     }
     97     if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
     98       continue;
     99     }
    100     std::cout << "mismatch detected at index " << i << ": " << t_result(i)
    101               << " vs " <<  m_result(i) << std::endl;
    102     assert(false);
    103   }
    104 }
    105 
    106 template<int DataLayout>
    107 void test_contraction_corner_cases()
    108 {
    109   Tensor<float, 2, DataLayout> t_left(32, 500);
    110   Tensor<float, 2, DataLayout> t_right(32, 28*28);
    111   Tensor<float, 2, DataLayout> t_result(500, 28*28);
    112 
    113   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
    114   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
    115   t_result = t_result.constant(NAN);
    116 
    117   // this contraction should be equivalent to a single matrix multiplication
    118   typedef Tensor<float, 1>::DimensionPair DimPair;
    119   Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}};
    120 
    121   typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
    122   MapXf m_left(t_left.data(), 32, 500);
    123   MapXf m_right(t_right.data(), 32, 28*28);
    124   Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28*28);
    125 
    126   Eigen::ThreadPool tp(12);
    127   Eigen::ThreadPoolDevice thread_pool_device(&tp, 12);
    128 
    129   // compute results by separate methods
    130   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
    131   m_result = m_left.transpose() * m_right;
    132 
    133   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
    134     assert(!(numext::isnan)(t_result.data()[i]));
    135     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
    136       std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
    137       assert(false);
    138     }
    139   }
    140 
    141   t_left.resize(32, 1);
    142   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
    143   t_result.resize (1, 28*28);
    144   t_result = t_result.constant(NAN);
    145   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
    146   new(&m_left) MapXf(t_left.data(), 32, 1);
    147   m_result = m_left.transpose() * m_right;
    148   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
    149     assert(!(numext::isnan)(t_result.data()[i]));
    150     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
    151       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
    152       assert(false);
    153     }
    154   }
    155 
    156   t_left.resize(32, 500);
    157   t_right.resize(32, 4);
    158   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
    159   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
    160   t_result.resize (500, 4);
    161   t_result = t_result.constant(NAN);
    162   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
    163   new(&m_left) MapXf(t_left.data(), 32, 500);
    164   new(&m_right) MapXf(t_right.data(), 32, 4);
    165   m_result = m_left.transpose() * m_right;
    166   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
    167     assert(!(numext::isnan)(t_result.data()[i]));
    168     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
    169       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
    170       assert(false);
    171     }
    172   }
    173 
    174   t_left.resize(32, 1);
    175   t_right.resize(32, 4);
    176   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
    177   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
    178   t_result.resize (1, 4);
    179   t_result = t_result.constant(NAN);
    180   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
    181   new(&m_left) MapXf(t_left.data(), 32, 1);
    182   new(&m_right) MapXf(t_right.data(), 32, 4);
    183   m_result = m_left.transpose() * m_right;
    184   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
    185     assert(!(numext::isnan)(t_result.data()[i]));
    186     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
    187       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
    188       assert(false);
    189     }
    190   }
    191 }
    192 
    193 template<int DataLayout>
    194 void test_multithread_contraction_agrees_with_singlethread() {
    195   int contract_size = internal::random<int>(1, 5000);
    196 
    197   Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80),
    198                                     contract_size,
    199                                     internal::random<int>(1, 100));
    200 
    201   Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25),
    202                                      internal::random<int>(1, 37),
    203                                      contract_size,
    204                                      internal::random<int>(1, 51));
    205 
    206   left.setRandom();
    207   right.setRandom();
    208 
    209   // add constants to shift values away from 0 for more precision
    210   left += left.constant(1.5f);
    211   right += right.constant(1.5f);
    212 
    213   typedef Tensor<float, 1>::DimensionPair DimPair;
    214   Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
    215 
    216   Eigen::ThreadPool tp(internal::random<int>(2, 11));
    217   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
    218 
    219   Tensor<float, 5, DataLayout> st_result;
    220   st_result = left.contract(right, dims);
    221 
    222   Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
    223   tp_result.device(thread_pool_device) = left.contract(right, dims);
    224 
    225   VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
    226   for (ptrdiff_t i = 0; i < st_result.size(); i++) {
    227     // if both of the values are very small, then do nothing (because the test will fail
    228     // due to numerical precision issues when values are small)
    229     if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
    230       VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
    231     }
    232   }
    233 }
    234 
    235 
    236 template<int DataLayout>
    237 void test_full_contraction() {
    238   int contract_size1 = internal::random<int>(1, 500);
    239   int contract_size2 = internal::random<int>(1, 500);
    240 
    241   Tensor<float, 2, DataLayout> left(contract_size1,
    242                                     contract_size2);
    243   Tensor<float, 2, DataLayout> right(contract_size1,
    244                                     contract_size2);
    245   left.setRandom();
    246   right.setRandom();
    247 
    248   // add constants to shift values away from 0 for more precision
    249   left += left.constant(1.5f);
    250   right += right.constant(1.5f);
    251 
    252   typedef Tensor<float, 2>::DimensionPair DimPair;
    253   Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
    254 
    255   Eigen::ThreadPool tp(internal::random<int>(2, 11));
    256   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
    257 
    258   Tensor<float, 0, DataLayout> st_result;
    259   st_result = left.contract(right, dims);
    260 
    261   Tensor<float, 0, DataLayout> tp_result;
    262   tp_result.device(thread_pool_device) = left.contract(right, dims);
    263 
    264   VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
    265   // if both of the values are very small, then do nothing (because the test will fail
    266   // due to numerical precision issues when values are small)
    267   if (numext::abs(st_result() - tp_result()) >= 1e-4f) {
    268     VERIFY_IS_APPROX(st_result(), tp_result());
    269   }
    270 }
    271 
    272 template<int DataLayout>
    273 void test_multithreaded_reductions() {
    274   const int num_threads = internal::random<int>(3, 11);
    275   ThreadPool thread_pool(num_threads);
    276   Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
    277 
    278   const int num_rows = internal::random<int>(13, 732);
    279   const int num_cols = internal::random<int>(13, 732);
    280   Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
    281   t1.setRandom();
    282 
    283   Tensor<float, 0, DataLayout> full_redux;
    284   full_redux = t1.sum();
    285 
    286   Tensor<float, 0, DataLayout> full_redux_tp;
    287   full_redux_tp.device(thread_pool_device) = t1.sum();
    288 
    289   // Check that the single threaded and the multi threaded reductions return
    290   // the same result.
    291   VERIFY_IS_APPROX(full_redux(), full_redux_tp());
    292 }
    293 
    294 
    295 void test_memcpy() {
    296 
    297   for (int i = 0; i < 5; ++i) {
    298     const int num_threads = internal::random<int>(3, 11);
    299     Eigen::ThreadPool tp(num_threads);
    300     Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
    301 
    302     const int size = internal::random<int>(13, 7632);
    303     Tensor<float, 1> t1(size);
    304     t1.setRandom();
    305     std::vector<float> result(size);
    306     thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float));
    307     for (int j = 0; j < size; j++) {
    308       VERIFY_IS_EQUAL(t1(j), result[j]);
    309     }
    310   }
    311 }
    312 
    313 
    314 void test_multithread_random()
    315 {
    316   Eigen::ThreadPool tp(2);
    317   Eigen::ThreadPoolDevice device(&tp, 2);
    318   Tensor<float, 1> t(1 << 20);
    319   t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>();
    320 }
    321 
    322 template<int DataLayout>
    323 void test_multithread_shuffle()
    324 {
    325   Tensor<float, 4, DataLayout> tensor(17,5,7,11);
    326   tensor.setRandom();
    327 
    328   const int num_threads = internal::random<int>(2, 11);
    329   ThreadPool threads(num_threads);
    330   Eigen::ThreadPoolDevice device(&threads, num_threads);
    331 
    332   Tensor<float, 4, DataLayout> shuffle(7,5,11,17);
    333   array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
    334   shuffle.device(device) = tensor.shuffle(shuffles);
    335 
    336   for (int i = 0; i < 17; ++i) {
    337     for (int j = 0; j < 5; ++j) {
    338       for (int k = 0; k < 7; ++k) {
    339         for (int l = 0; l < 11; ++l) {
    340           VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i));
    341         }
    342       }
    343     }
    344   }
    345 }
    346 
    347 
    348 void test_cxx11_tensor_thread_pool()
    349 {
    350   CALL_SUBTEST_1(test_multithread_elementwise());
    351   CALL_SUBTEST_1(test_multithread_compound_assignment());
    352 
    353   CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
    354   CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
    355 
    356   CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
    357   CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
    358 
    359   // Exercise various cases that have been problematic in the past.
    360   CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
    361   CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
    362 
    363   CALL_SUBTEST_4(test_full_contraction<ColMajor>());
    364   CALL_SUBTEST_4(test_full_contraction<RowMajor>());
    365 
    366   CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
    367   CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
    368 
    369   CALL_SUBTEST_6(test_memcpy());
    370   CALL_SUBTEST_6(test_multithread_random());
    371   CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
    372   CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>());
    373 }
    374