1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <stdio.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include <sys/time.h> 21 #include <time.h> 22 #include <unistd.h> 23 #include <sched.h> 24 #include <sys/resource.h> 25 #include <sys/syscall.h> 26 #include <sys/types.h> 27 #include <sys/mman.h> 28 29 #ifdef __ARM_NEON__ 30 #include <arm_neon.h> 31 #endif 32 33 34 typedef long long nsecs_t; 35 static nsecs_t gTime; 36 float data_f[1024 * 128]; 37 38 static nsecs_t system_time() 39 { 40 struct timespec t; 41 t.tv_sec = t.tv_nsec = 0; 42 clock_gettime(CLOCK_MONOTONIC, &t); 43 return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec; 44 } 45 46 static void startTime() 47 { 48 gTime = system_time(); 49 } 50 51 static void endTime(const char *str, double ops) 52 { 53 nsecs_t t = system_time() - gTime; 54 double ds = ((double)t) / 1e9; 55 printf("Test: %s, %f Mops\n", str, ops / ds / 1e6); 56 } 57 58 59 static void test_mad() { 60 for(int i=0; i<1020; i++) { 61 data_f[i] = i; 62 } 63 64 startTime(); 65 66 // Do ~1 billion ops 67 for (int ct=0; ct < (1000 * (1000 / 20)); ct++) { 68 for (int i=0; i < 1000; i++) { 69 data_f[i] = (data_f[i] * 0.02f + 70 data_f[i+1] * 0.04f + 71 data_f[i+2] * 0.05f + 72 data_f[i+3] * 0.1f + 73 data_f[i+4] * 0.2f + 74 data_f[i+5] * 0.2f + 75 data_f[i+6] * 0.1f + 76 data_f[i+7] * 0.05f + 77 data_f[i+8] * 0.04f + 78 data_f[i+9] * 0.02f + 1.f); 79 } 80 } 81 82 endTime("scalar mad", 1e9); 83 } 84 85 86 #ifdef __ARM_NEON__ 87 88 static void test_fma() { 89 for(int i=0; i<1020 * 4; i++) { 90 data_f[i] = i; 91 } 92 float32x4_t c0_02 = vdupq_n_f32(0.02f); 93 float32x4_t c0_04 = vdupq_n_f32(0.04f); 94 float32x4_t c0_05 = vdupq_n_f32(0.05f); 95 float32x4_t c0_10 = vdupq_n_f32(0.1f); 96 float32x4_t c0_20 = vdupq_n_f32(0.2f); 97 float32x4_t c1_00 = vdupq_n_f32(1.0f); 98 99 startTime(); 100 101 // Do ~1 billion ops 102 for (int ct=0; ct < (1000 * (1000 / 80)); ct++) { 103 for (int i=0; i < 1000; i++) { 104 float32x4_t t; 105 t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02); 106 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04); 107 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05); 108 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10); 109 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20); 110 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20); 111 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10); 112 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05); 113 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04); 114 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02); 115 t = vaddq_f32(t, c1_00); 116 vst1q_f32((float32_t *)&data_f[i], t); 117 } 118 } 119 120 endTime("neon fma", 1e9); 121 } 122 #endif 123 124 int fp_test(int, char**) { 125 test_mad(); 126 127 #ifdef __ARM_NEON__ 128 test_fma(); 129 #endif 130 131 return 0; 132 } 133