Home | History | Annotate | Download | only in test
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2016
      5 // Mehdi Goli    Codeplay Software Ltd.
      6 // Ralph Potter  Codeplay Software Ltd.
      7 // Luke Iwanski  Codeplay Software Ltd.
      8 // Contact: <eigen (at) codeplay.com>
      9 // Benoit Steiner <benoit.steiner.goog (at) gmail.com>
     10 //
     11 // This Source Code Form is subject to the terms of the Mozilla
     12 // Public License v. 2.0. If a copy of the MPL was not distributed
     13 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     14 
     15 
     16 #define EIGEN_TEST_NO_LONGDOUBLE
     17 #define EIGEN_TEST_NO_COMPLEX
     18 #define EIGEN_TEST_FUNC cxx11_tensor_sycl
     19 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
     20 #define EIGEN_USE_SYCL
     21 
     22 #include "main.h"
     23 #include <unsupported/Eigen/CXX11/Tensor>
     24 
     25 using Eigen::array;
     26 using Eigen::SyclDevice;
     27 using Eigen::Tensor;
     28 using Eigen::TensorMap;
     29 
     30 void test_sycl_cpu(const Eigen::SyclDevice &sycl_device) {
     31 
     32   int sizeDim1 = 100;
     33   int sizeDim2 = 100;
     34   int sizeDim3 = 100;
     35   array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
     36   Tensor<float, 3> in1(tensorRange);
     37   Tensor<float, 3> in2(tensorRange);
     38   Tensor<float, 3> in3(tensorRange);
     39   Tensor<float, 3> out(tensorRange);
     40 
     41   in2 = in2.random();
     42   in3 = in3.random();
     43 
     44   float * gpu_in1_data  = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float)));
     45   float * gpu_in2_data  = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float)));
     46   float * gpu_in3_data  = static_cast<float*>(sycl_device.allocate(in3.dimensions().TotalSize()*sizeof(float)));
     47   float * gpu_out_data =  static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
     48 
     49   TensorMap<Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange);
     50   TensorMap<Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange);
     51   TensorMap<Tensor<float, 3>> gpu_in3(gpu_in3_data, tensorRange);
     52   TensorMap<Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange);
     53 
     54   /// a=1.2f
     55   gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f);
     56   sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.dimensions().TotalSize())*sizeof(float));
     57   for (int i = 0; i < sizeDim1; ++i) {
     58     for (int j = 0; j < sizeDim2; ++j) {
     59       for (int k = 0; k < sizeDim3; ++k) {
     60         VERIFY_IS_APPROX(in1(i,j,k), 1.2f);
     61       }
     62     }
     63   }
     64   printf("a=1.2f Test passed\n");
     65 
     66   /// a=b*1.2f
     67   gpu_out.device(sycl_device) = gpu_in1 * 1.2f;
     68   sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.dimensions().TotalSize())*sizeof(float));
     69   for (int i = 0; i < sizeDim1; ++i) {
     70     for (int j = 0; j < sizeDim2; ++j) {
     71       for (int k = 0; k < sizeDim3; ++k) {
     72         VERIFY_IS_APPROX(out(i,j,k),
     73                          in1(i,j,k) * 1.2f);
     74       }
     75     }
     76   }
     77   printf("a=b*1.2f Test Passed\n");
     78 
     79   /// c=a*b
     80   sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(float));
     81   gpu_out.device(sycl_device) = gpu_in1 * gpu_in2;
     82   sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
     83   for (int i = 0; i < sizeDim1; ++i) {
     84     for (int j = 0; j < sizeDim2; ++j) {
     85       for (int k = 0; k < sizeDim3; ++k) {
     86         VERIFY_IS_APPROX(out(i,j,k),
     87                          in1(i,j,k) *
     88                              in2(i,j,k));
     89       }
     90     }
     91   }
     92   printf("c=a*b Test Passed\n");
     93 
     94   /// c=a+b
     95   gpu_out.device(sycl_device) = gpu_in1 + gpu_in2;
     96   sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
     97   for (int i = 0; i < sizeDim1; ++i) {
     98     for (int j = 0; j < sizeDim2; ++j) {
     99       for (int k = 0; k < sizeDim3; ++k) {
    100         VERIFY_IS_APPROX(out(i,j,k),
    101                          in1(i,j,k) +
    102                              in2(i,j,k));
    103       }
    104     }
    105   }
    106   printf("c=a+b Test Passed\n");
    107 
    108   /// c=a*a
    109   gpu_out.device(sycl_device) = gpu_in1 * gpu_in1;
    110   sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
    111   for (int i = 0; i < sizeDim1; ++i) {
    112     for (int j = 0; j < sizeDim2; ++j) {
    113       for (int k = 0; k < sizeDim3; ++k) {
    114         VERIFY_IS_APPROX(out(i,j,k),
    115                          in1(i,j,k) *
    116                              in1(i,j,k));
    117       }
    118     }
    119   }
    120   printf("c= a*a Test Passed\n");
    121 
    122   //a*3.14f + b*2.7f
    123   gpu_out.device(sycl_device) =  gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f);
    124   sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
    125   for (int i = 0; i < sizeDim1; ++i) {
    126     for (int j = 0; j < sizeDim2; ++j) {
    127       for (int k = 0; k < sizeDim3; ++k) {
    128         VERIFY_IS_APPROX(out(i,j,k),
    129                          in1(i,j,k) * 3.14f
    130                        + in2(i,j,k) * 2.7f);
    131       }
    132     }
    133   }
    134   printf("a*3.14f + b*2.7f Test Passed\n");
    135 
    136   ///d= (a>0.5? b:c)
    137   sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.dimensions().TotalSize())*sizeof(float));
    138   gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3);
    139   sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
    140   for (int i = 0; i < sizeDim1; ++i) {
    141     for (int j = 0; j < sizeDim2; ++j) {
    142       for (int k = 0; k < sizeDim3; ++k) {
    143         VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f)
    144                                                 ? in2(i, j, k)
    145                                                 : in3(i, j, k));
    146       }
    147     }
    148   }
    149   printf("d= (a>0.5? b:c) Test Passed\n");
    150   sycl_device.deallocate(gpu_in1_data);
    151   sycl_device.deallocate(gpu_in2_data);
    152   sycl_device.deallocate(gpu_in3_data);
    153   sycl_device.deallocate(gpu_out_data);
    154 }
    155 void test_cxx11_tensor_sycl() {
    156   cl::sycl::gpu_selector s;
    157   Eigen::SyclDevice sycl_device(s);
    158   CALL_SUBTEST(test_sycl_cpu(sycl_device));
    159 }
    160