1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <unistd.h> 16 #ifdef __APPLE__ 17 #include <sys/time.h> 18 #endif 19 20 #include <cstdint> 21 #include <cstdlib> 22 #include <ctime> 23 #include <iostream> 24 #include <map> 25 #include <vector> 26 27 #include "../meta/legacy_multi_thread_gemm.h" 28 #include "../public/gemmlowp.h" 29 #include "test.h" 30 // lets include these so we make sure they always compile 31 #include "../meta/multi_thread_gemm.h" 32 #include "../meta/multi_thread_transform.h" 33 #include "../meta/legacy_multi_thread_common.h" 34 35 #if defined(__arm__) && !defined(GEMMLOWP_NEON) 36 #warning "Building without NEON support on ARM, check your compiler setup!" 37 #endif 38 39 double time() { 40 #ifdef __APPLE__ 41 timeval t; 42 gettimeofday(&t, nullptr); 43 return t.tv_sec + 1e-6 * t.tv_usec; 44 #else 45 timespec t; 46 clock_gettime(CLOCK_REALTIME, &t); 47 return t.tv_sec + 1e-9 * t.tv_nsec; 48 #endif 49 } 50 51 void prepare_test_data(std::uint8_t* data, std::int32_t rows, std::int32_t cols, 52 std::int32_t seed, std::int32_t seed_2) { 53 std::int32_t value = seed; 54 for (int i = 0; i < rows; ++i) { 55 for (int j = 0; j < cols; ++j) { 56 data[i * cols + j] = static_cast<std::uint8_t>(value); 57 value = ((value * seed_2) + seed) % 256; 58 } 59 } 60 } 61 62 void check_result(std::uint8_t* left, std::uint8_t* right, std::uint8_t* result, 63 std::int32_t rows, std::int32_t cols, std::int32_t depth, 64 std::int32_t lhs_offset, std::int32_t rhs_offset, 65 std::int32_t sum_offset, std::int32_t mul_offset, 66 std::int32_t shift) { 67 std::int32_t rounding = (1 << (shift - 1)); 68 std::int32_t wrong = 0; 69 for (int i = 0; i < rows; ++i) { 70 for (int j = 0; j < cols; ++j) { 71 std::int32_t expected = 0; 72 for (int k = 0; k < depth; ++k) { 73 expected += 74 (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) * 75 (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset); 76 } 77 expected += sum_offset; 78 expected *= mul_offset; 79 expected += rounding; 80 expected = (expected >> shift); 81 if (expected < 0) { 82 expected = 0; 83 } else if (expected > 255) { 84 expected = 255; 85 } 86 expected = static_cast<std::int32_t>(static_cast<std::uint8_t>(expected)); 87 std::int32_t actual = static_cast<std::int32_t>(result[i * cols + j]); 88 if (actual != expected) { 89 std::cout << "(" << i << ", " << j << "): " << expected << "!=" 90 << actual << std::endl; 91 wrong++; 92 } 93 } 94 } 95 if (wrong > 0) { 96 std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : " 97 << wrong << "/" << (rows * cols) << std::endl 98 << std::flush; 99 std::exit(1); 100 } else { 101 std::cout << "." << std::flush; 102 } 103 } 104 105 void check_result_f(std::uint8_t* left, std::uint8_t* right, float* result, 106 std::int32_t rows, std::int32_t cols, std::int32_t depth, 107 std::int32_t lhs_offset, std::int32_t rhs_offset, 108 float result_offset) { 109 std::int32_t wrong = 0; 110 for (int i = 0; i < rows; ++i) { 111 for (int j = 0; j < cols; ++j) { 112 std::int32_t expected = 0; 113 for (int k = 0; k < depth; ++k) { 114 expected += 115 (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) * 116 (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset); 117 } 118 float expected_float = static_cast<float>(expected) * result_offset; 119 float actual_float = result[i * cols + j]; 120 if (actual_float != expected_float) { 121 std::cout << "(" << i << ", " << j << "): " << expected_float << "!=" 122 << actual_float << std::endl; 123 wrong++; 124 } 125 } 126 } 127 if (wrong > 0) { 128 std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : " 129 << wrong << "/" << (rows * cols) << std::endl 130 << std::flush; 131 std::exit(1); 132 } else { 133 std::cout << "." << std::flush; 134 } 135 } 136 137 138 void check_result_i32(std::uint8_t* left, std::uint8_t* right, 139 std::int32_t* result, std::int32_t rows, 140 std::int32_t cols, std::int32_t depth, 141 std::int32_t lhs_offset, std::int32_t rhs_offset) { 142 std::int32_t wrong = 0; 143 for (int i = 0; i < rows; ++i) { 144 for (int j = 0; j < cols; ++j) { 145 std::int32_t expected = 0; 146 for (int k = 0; k < depth; ++k) { 147 expected += 148 (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) * 149 (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset); 150 } 151 std::int32_t actual = result[i * cols + j]; 152 if (actual != expected) { 153 std::cout << "(" << i << ", " << j << "): " << expected << "!=" 154 << actual << std::endl; 155 wrong++; 156 } 157 } 158 } 159 if (wrong > 0) { 160 std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : " 161 << wrong << "/" << (rows * cols) << std::endl 162 << std::flush; 163 std::exit(1); 164 } else { 165 std::cout << "." << std::flush; 166 } 167 } 168 169 template <typename T> 170 void clear(T* result, std::int32_t rows, std::int32_t cols) { 171 for (int i = 0; i < rows * cols; ++i) { 172 result[i] = static_cast<T>(0); 173 } 174 } 175 176 void test(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs, 177 std::int32_t m, std::int32_t n, std::int32_t k, std::uint8_t* result, 178 gemmlowp::WorkersPool* pool, std::int32_t pool_size) { 179 prepare_test_data(lhs, m, k, 11, 13); 180 prepare_test_data(rhs, n, k, 177, 19); 181 182 clear(result, m, n); 183 gemmlowp::meta::multi_thread_gemm_q8(pool, pool_size, scratch, lhs, rhs, m, n, 184 k, -127, -127, 127 * k, 1, 7, result); 185 check_result(lhs, rhs, result, m, n, k, -127, -127, 127 * k, 1, 7); 186 } 187 188 void test_f(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs, 189 std::int32_t m, std::int32_t n, std::int32_t k, float* result, 190 gemmlowp::WorkersPool* pool, std::int32_t pool_size) { 191 prepare_test_data(lhs, m, k, 11, 13); 192 prepare_test_data(rhs, n, k, 177, 19); 193 194 clear(result, m, n); 195 float scale = 1.0f / 1234567.8f; 196 gemmlowp::meta::multi_thread_gemm_f(pool, pool_size, scratch, lhs, rhs, m, n, 197 k, -127, -127, scale, result); 198 check_result_f(lhs, rhs, result, m, n, k, -127, -127, scale); 199 } 200 201 void test_i32(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs, 202 std::int32_t m, std::int32_t n, std::int32_t k, 203 std::int32_t* result, gemmlowp::WorkersPool* pool, 204 std::int32_t pool_size) { 205 prepare_test_data(lhs, m, k, 11, 13); 206 prepare_test_data(rhs, n, k, 177, 19); 207 208 clear(result, m, n); 209 gemmlowp::meta::multi_thread_gemm_i32(pool, pool_size, scratch, lhs, rhs, m, 210 n, k, -127, -127, result); 211 check_result_i32(lhs, rhs, result, m, n, k, -127, -127); 212 } 213 214 void q_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd, 215 int kd, std::uint8_t* scratch, std::uint8_t* left, 216 std::uint8_t* right, std::uint8_t* result, 217 gemmlowp::WorkersPool* pool, int t) { 218 for (int m = mi; m < mx; m += md) { 219 for (int n = ni; n < nx; n += nd) { 220 for (int k = ki; k < kx; k += kd) { 221 test(scratch, left, right, m, n, k, result, pool, t); 222 } 223 } 224 } 225 std::cout << std::endl; 226 } 227 228 void f_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd, 229 int kd, std::uint8_t* scratch, std::uint8_t* left, 230 std::uint8_t* right, float* result, gemmlowp::WorkersPool* pool, 231 int t) { 232 for (int m = mi; m < mx; m += md) { 233 for (int n = ni; n < nx; n += nd) { 234 for (int k = ki; k < kx; k += kd) { 235 test_f(scratch, left, right, m, n, k, result, pool, t); 236 } 237 } 238 } 239 std::cout << std::endl; 240 } 241 242 void i32_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd, 243 int kd, std::uint8_t* scratch, std::uint8_t* left, 244 std::uint8_t* right, std::int32_t* result, 245 gemmlowp::WorkersPool* pool, int t) { 246 for (int m = mi; m < mx; m += md) { 247 for (int n = ni; n < nx; n += nd) { 248 for (int k = ki; k < kx; k += kd) { 249 test_i32(scratch, left, right, m, n, k, result, pool, t); 250 } 251 } 252 } 253 std::cout << std::endl; 254 } 255 256 int main(int argc, char* argv[]) { 257 bool run_long_test = false; 258 259 if (argc > 1 && strcmp(argv[1], "long")) { 260 run_long_test = true; 261 } 262 263 const std::int32_t min_n = 1; 264 const std::int32_t min_m = 1; 265 const std::int32_t min_k = 8; 266 267 const std::int32_t max_n = 1024; 268 const std::int32_t max_m = 1024; 269 const std::int32_t max_k = 2048; 270 271 std::uint8_t* left = new std::uint8_t[max_m * max_k]; 272 std::uint8_t* right = new std::uint8_t[max_n * max_k]; 273 std::uint8_t* result = new std::uint8_t[max_m * max_n]; 274 float* result_float = new float[max_m * max_n]; 275 std::int32_t* result_i32 = new std::int32_t[max_m * max_n]; 276 std::uint8_t* scratch = new std::uint8_t[1024 * 1024 * 64]; 277 278 gemmlowp::WorkersPool pool; 279 280 int max_repetitions = run_long_test ? 10 : 1; 281 282 for (int repetitions = 0; repetitions < max_repetitions; ++repetitions) { 283 int t = std::min(repetitions + 1, 4); 284 std::cout << "Threads: " << t << std::endl << std::flush; 285 286 std::cout << "Quantized 8 bit." << std::endl << std::flush; 287 288 std::cout << "Small." << std::endl << std::flush; 289 q_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result, &pool, 290 t); 291 292 if (run_long_test) { 293 std::cout << "Big." << std::endl << std::flush; 294 q_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right, 295 result, &pool, t); 296 } 297 298 std::cout << "Gemv." << std::endl << std::flush; 299 q_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right, result, 300 &pool, t); 301 q_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right, result, 302 &pool, t); 303 304 std::cout << std::endl << "Floats." << std::endl << std::flush; 305 306 std::cout << "Small." << std::endl << std::flush; 307 f_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result_float, 308 &pool, t); 309 310 if (run_long_test) { 311 std::cout << "Big." << std::endl << std::flush; 312 f_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right, 313 result_float, &pool, t); 314 } 315 316 std::cout << "Gemv." << std::endl << std::flush; 317 f_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right, 318 result_float, &pool, t); 319 f_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right, 320 result_float, &pool, t); 321 322 std::cout << std::endl << "Int32." << std::endl << std::flush; 323 324 std::cout << "Small." << std::endl << std::flush; 325 i32_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result_i32, 326 &pool, t); 327 328 if (run_long_test) { 329 std::cout << "Big." << std::endl << std::flush; 330 i32_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right, 331 result_i32, &pool, t); 332 } 333 334 std::cout << "Gemv." << std::endl << std::flush; 335 i32_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right, 336 result_i32, &pool, t); 337 i32_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right, 338 result_i32, &pool, t); 339 340 std::cout << std::endl << std::flush; 341 } 342 343 std::cout << "Done." << std::endl << std::flush; 344 } 345