Home | History | Annotate | Download | only in test
      1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include <unistd.h>
     16 #ifdef __APPLE__
     17 #include <sys/time.h>
     18 #endif
     19 
     20 #include <cstdint>
     21 #include <cstdlib>
     22 #include <ctime>
     23 #include <iostream>
     24 #include <map>
     25 #include <vector>
     26 
     27 #include "../meta/legacy_multi_thread_gemm.h"
     28 #include "../public/gemmlowp.h"
     29 #include "test.h"
     30 // lets include these so we make sure they always compile
     31 #include "../meta/multi_thread_gemm.h"
     32 #include "../meta/multi_thread_transform.h"
     33 #include "../meta/legacy_multi_thread_common.h"
     34 
     35 #if defined(__arm__) && !defined(GEMMLOWP_NEON)
     36 #warning "Building without NEON support on ARM, check your compiler setup!"
     37 #endif
     38 
     39 double time() {
     40 #ifdef __APPLE__
     41   timeval t;
     42   gettimeofday(&t, nullptr);
     43   return t.tv_sec + 1e-6 * t.tv_usec;
     44 #else
     45   timespec t;
     46   clock_gettime(CLOCK_REALTIME, &t);
     47   return t.tv_sec + 1e-9 * t.tv_nsec;
     48 #endif
     49 }
     50 
     51 void prepare_test_data(std::uint8_t* data, std::int32_t rows, std::int32_t cols,
     52                        std::int32_t seed, std::int32_t seed_2) {
     53   std::int32_t value = seed;
     54   for (int i = 0; i < rows; ++i) {
     55     for (int j = 0; j < cols; ++j) {
     56       data[i * cols + j] = static_cast<std::uint8_t>(value);
     57       value = ((value * seed_2) + seed) % 256;
     58     }
     59   }
     60 }
     61 
     62 void check_result(std::uint8_t* left, std::uint8_t* right, std::uint8_t* result,
     63                   std::int32_t rows, std::int32_t cols, std::int32_t depth,
     64                   std::int32_t lhs_offset, std::int32_t rhs_offset,
     65                   std::int32_t sum_offset, std::int32_t mul_offset,
     66                   std::int32_t shift) {
     67   std::int32_t rounding = (1 << (shift - 1));
     68   std::int32_t wrong = 0;
     69   for (int i = 0; i < rows; ++i) {
     70     for (int j = 0; j < cols; ++j) {
     71       std::int32_t expected = 0;
     72       for (int k = 0; k < depth; ++k) {
     73         expected +=
     74             (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) *
     75             (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset);
     76       }
     77       expected += sum_offset;
     78       expected *= mul_offset;
     79       expected += rounding;
     80       expected = (expected >> shift);
     81       if (expected < 0) {
     82         expected = 0;
     83       } else if (expected > 255) {
     84         expected = 255;
     85       }
     86       expected = static_cast<std::int32_t>(static_cast<std::uint8_t>(expected));
     87       std::int32_t actual = static_cast<std::int32_t>(result[i * cols + j]);
     88       if (actual != expected) {
     89         std::cout << "(" << i << ", " << j << "): " << expected << "!="
     90                   << actual << std::endl;
     91         wrong++;
     92       }
     93     }
     94   }
     95   if (wrong > 0) {
     96     std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : "
     97               << wrong << "/" << (rows * cols) << std::endl
     98               << std::flush;
     99     std::exit(1);
    100   } else {
    101     std::cout << "." << std::flush;
    102   }
    103 }
    104 
    105 void check_result_f(std::uint8_t* left, std::uint8_t* right, float* result,
    106                     std::int32_t rows, std::int32_t cols, std::int32_t depth,
    107                     std::int32_t lhs_offset, std::int32_t rhs_offset,
    108                     float result_offset) {
    109   std::int32_t wrong = 0;
    110   for (int i = 0; i < rows; ++i) {
    111     for (int j = 0; j < cols; ++j) {
    112       std::int32_t expected = 0;
    113       for (int k = 0; k < depth; ++k) {
    114         expected +=
    115             (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) *
    116             (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset);
    117       }
    118       float expected_float = static_cast<float>(expected) * result_offset;
    119       float actual_float = result[i * cols + j];
    120       if (actual_float != expected_float) {
    121         std::cout << "(" << i << ", " << j << "): " << expected_float << "!="
    122                   << actual_float << std::endl;
    123         wrong++;
    124       }
    125     }
    126   }
    127   if (wrong > 0) {
    128     std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : "
    129               << wrong << "/" << (rows * cols) << std::endl
    130               << std::flush;
    131     std::exit(1);
    132   } else {
    133     std::cout << "." << std::flush;
    134   }
    135 }
    136 
    137 
    138 void check_result_i32(std::uint8_t* left, std::uint8_t* right,
    139                       std::int32_t* result, std::int32_t rows,
    140                       std::int32_t cols, std::int32_t depth,
    141                       std::int32_t lhs_offset, std::int32_t rhs_offset) {
    142   std::int32_t wrong = 0;
    143   for (int i = 0; i < rows; ++i) {
    144     for (int j = 0; j < cols; ++j) {
    145       std::int32_t expected = 0;
    146       for (int k = 0; k < depth; ++k) {
    147         expected +=
    148             (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) *
    149             (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset);
    150       }
    151       std::int32_t actual = result[i * cols + j];
    152       if (actual != expected) {
    153         std::cout << "(" << i << ", " << j << "): " << expected << "!="
    154                   << actual << std::endl;
    155         wrong++;
    156       }
    157     }
    158   }
    159   if (wrong > 0) {
    160     std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : "
    161               << wrong << "/" << (rows * cols) << std::endl
    162               << std::flush;
    163     std::exit(1);
    164   } else {
    165     std::cout << "." << std::flush;
    166   }
    167 }
    168 
    169 template <typename T>
    170 void clear(T* result, std::int32_t rows, std::int32_t cols) {
    171   for (int i = 0; i < rows * cols; ++i) {
    172     result[i] = static_cast<T>(0);
    173   }
    174 }
    175 
    176 void test(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs,
    177           std::int32_t m, std::int32_t n, std::int32_t k, std::uint8_t* result,
    178           gemmlowp::WorkersPool* pool, std::int32_t pool_size) {
    179   prepare_test_data(lhs, m, k, 11, 13);
    180   prepare_test_data(rhs, n, k, 177, 19);
    181 
    182   clear(result, m, n);
    183   gemmlowp::meta::multi_thread_gemm_q8(pool, pool_size, scratch, lhs, rhs, m, n,
    184                                        k, -127, -127, 127 * k, 1, 7, result);
    185   check_result(lhs, rhs, result, m, n, k, -127, -127, 127 * k, 1, 7);
    186 }
    187 
    188 void test_f(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs,
    189             std::int32_t m, std::int32_t n, std::int32_t k, float* result,
    190             gemmlowp::WorkersPool* pool, std::int32_t pool_size) {
    191   prepare_test_data(lhs, m, k, 11, 13);
    192   prepare_test_data(rhs, n, k, 177, 19);
    193 
    194   clear(result, m, n);
    195   float scale = 1.0f / 1234567.8f;
    196   gemmlowp::meta::multi_thread_gemm_f(pool, pool_size, scratch, lhs, rhs, m, n,
    197                                       k, -127, -127, scale, result);
    198   check_result_f(lhs, rhs, result, m, n, k, -127, -127, scale);
    199 }
    200 
    201 void test_i32(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs,
    202               std::int32_t m, std::int32_t n, std::int32_t k,
    203               std::int32_t* result, gemmlowp::WorkersPool* pool,
    204               std::int32_t pool_size) {
    205   prepare_test_data(lhs, m, k, 11, 13);
    206   prepare_test_data(rhs, n, k, 177, 19);
    207 
    208   clear(result, m, n);
    209   gemmlowp::meta::multi_thread_gemm_i32(pool, pool_size, scratch, lhs, rhs, m,
    210                                         n, k, -127, -127, result);
    211   check_result_i32(lhs, rhs, result, m, n, k, -127, -127);
    212 }
    213 
    214 void q_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd,
    215              int kd, std::uint8_t* scratch, std::uint8_t* left,
    216              std::uint8_t* right, std::uint8_t* result,
    217              gemmlowp::WorkersPool* pool, int t) {
    218   for (int m = mi; m < mx; m += md) {
    219     for (int n = ni; n < nx; n += nd) {
    220       for (int k = ki; k < kx; k += kd) {
    221         test(scratch, left, right, m, n, k, result, pool, t);
    222       }
    223     }
    224   }
    225   std::cout << std::endl;
    226 }
    227 
    228 void f_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd,
    229              int kd, std::uint8_t* scratch, std::uint8_t* left,
    230              std::uint8_t* right, float* result, gemmlowp::WorkersPool* pool,
    231              int t) {
    232   for (int m = mi; m < mx; m += md) {
    233     for (int n = ni; n < nx; n += nd) {
    234       for (int k = ki; k < kx; k += kd) {
    235         test_f(scratch, left, right, m, n, k, result, pool, t);
    236       }
    237     }
    238   }
    239   std::cout << std::endl;
    240 }
    241 
    242 void i32_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd,
    243                int kd, std::uint8_t* scratch, std::uint8_t* left,
    244                std::uint8_t* right, std::int32_t* result,
    245                gemmlowp::WorkersPool* pool, int t) {
    246   for (int m = mi; m < mx; m += md) {
    247     for (int n = ni; n < nx; n += nd) {
    248       for (int k = ki; k < kx; k += kd) {
    249         test_i32(scratch, left, right, m, n, k, result, pool, t);
    250       }
    251     }
    252   }
    253   std::cout << std::endl;
    254 }
    255 
    256 int main(int argc, char* argv[]) {
    257   bool run_long_test = false;
    258 
    259   if (argc > 1 && strcmp(argv[1], "long")) {
    260     run_long_test = true;
    261   }
    262 
    263   const std::int32_t min_n = 1;
    264   const std::int32_t min_m = 1;
    265   const std::int32_t min_k = 8;
    266 
    267   const std::int32_t max_n = 1024;
    268   const std::int32_t max_m = 1024;
    269   const std::int32_t max_k = 2048;
    270 
    271   std::uint8_t* left = new std::uint8_t[max_m * max_k];
    272   std::uint8_t* right = new std::uint8_t[max_n * max_k];
    273   std::uint8_t* result = new std::uint8_t[max_m * max_n];
    274   float* result_float = new float[max_m * max_n];
    275   std::int32_t* result_i32 = new std::int32_t[max_m * max_n];
    276   std::uint8_t* scratch = new std::uint8_t[1024 * 1024 * 64];
    277 
    278   gemmlowp::WorkersPool pool;
    279 
    280   int max_repetitions = run_long_test ? 10 : 1;
    281 
    282   for (int repetitions = 0; repetitions < max_repetitions; ++repetitions) {
    283     int t = std::min(repetitions + 1, 4);
    284     std::cout << "Threads: " << t << std::endl << std::flush;
    285 
    286     std::cout << "Quantized 8 bit." << std::endl << std::flush;
    287 
    288     std::cout << "Small." << std::endl << std::flush;
    289     q_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result, &pool,
    290             t);
    291 
    292     if (run_long_test) {
    293       std::cout << "Big." << std::endl << std::flush;
    294       q_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right,
    295               result, &pool, t);
    296     }
    297 
    298     std::cout << "Gemv." << std::endl << std::flush;
    299     q_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right, result,
    300             &pool, t);
    301     q_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right, result,
    302             &pool, t);
    303 
    304     std::cout << std::endl << "Floats." << std::endl << std::flush;
    305 
    306     std::cout << "Small." << std::endl << std::flush;
    307     f_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result_float,
    308             &pool, t);
    309 
    310     if (run_long_test) {
    311       std::cout << "Big." << std::endl << std::flush;
    312       f_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right,
    313               result_float, &pool, t);
    314     }
    315 
    316     std::cout << "Gemv." << std::endl << std::flush;
    317     f_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right,
    318             result_float, &pool, t);
    319     f_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right,
    320             result_float, &pool, t);
    321 
    322     std::cout << std::endl << "Int32." << std::endl << std::flush;
    323 
    324     std::cout << "Small." << std::endl << std::flush;
    325     i32_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result_i32,
    326               &pool, t);
    327 
    328     if (run_long_test) {
    329       std::cout << "Big." << std::endl << std::flush;
    330       i32_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right,
    331                 result_i32, &pool, t);
    332     }
    333 
    334     std::cout << "Gemv." << std::endl << std::flush;
    335     i32_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right,
    336               result_i32, &pool, t);
    337     i32_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right,
    338               result_i32, &pool, t);
    339 
    340     std::cout << std::endl << std::flush;
    341   }
    342 
    343   std::cout << "Done." << std::endl << std::flush;
    344 }
    345