1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 #include <jni.h> 18 #include <time.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <cpu-features.h> 22 #include "helloneon-intrinsics.h" 23 24 #define DEBUG 0 25 26 #if DEBUG 27 #include <android/log.h> 28 # define D(x...) __android_log_print(ANDROID_LOG_INFO,"helloneon",x) 29 #else 30 # define D(...) do {} while (0) 31 #endif 32 33 /* return current time in milliseconds */ 34 static double 35 now_ms(void) 36 { 37 struct timespec res; 38 clock_gettime(CLOCK_REALTIME, &res); 39 return 1000.0*res.tv_sec + (double)res.tv_nsec/1e6; 40 } 41 42 43 /* this is a FIR filter implemented in C */ 44 static void 45 fir_filter_c(short *output, const short* input, const short* kernel, int width, int kernelSize) 46 { 47 int offset = -kernelSize/2; 48 int nn; 49 for (nn = 0; nn < width; nn++) { 50 int sum = 0; 51 int mm; 52 for (mm = 0; mm < kernelSize; mm++) { 53 sum += kernel[mm]*input[nn+offset+mm]; 54 } 55 output[nn] = (short)((sum + 0x8000) >> 16); 56 } 57 } 58 59 #define FIR_KERNEL_SIZE 32 60 #define FIR_OUTPUT_SIZE 2560 61 #define FIR_INPUT_SIZE (FIR_OUTPUT_SIZE + FIR_KERNEL_SIZE) 62 #define FIR_ITERATIONS 600 63 64 static const short fir_kernel[FIR_KERNEL_SIZE] = { 65 0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10, 66 0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10 }; 67 68 static short fir_output[FIR_OUTPUT_SIZE]; 69 static short fir_input_0[FIR_INPUT_SIZE]; 70 static const short* fir_input = fir_input_0 + (FIR_KERNEL_SIZE/2); 71 static short fir_output_expected[FIR_OUTPUT_SIZE]; 72 73 /* This is a trivial JNI example where we use a native method 74 * to return a new VM String. See the corresponding Java source 75 * file located at: 76 * 77 * apps/samples/hello-neon/project/src/com/example/neon/HelloNeon.java 78 */ 79 jstring 80 Java_com_example_neon_HelloNeon_stringFromJNI( JNIEnv* env, 81 jobject thiz ) 82 { 83 char* str; 84 AndroidCpuFamily family; 85 uint64_t features; 86 char buffer[512]; 87 char tryNeon = 0; 88 double t0, t1, time_c, time_neon; 89 90 /* setup FIR input - whatever */ 91 { 92 int nn; 93 for (nn = 0; nn < FIR_INPUT_SIZE; nn++) { 94 fir_input_0[nn] = (5*nn) & 255; 95 } 96 fir_filter_c(fir_output_expected, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); 97 } 98 99 /* Benchmark small FIR filter loop - C version */ 100 t0 = now_ms(); 101 { 102 int count = FIR_ITERATIONS; 103 for (; count > 0; count--) { 104 fir_filter_c(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); 105 } 106 } 107 t1 = now_ms(); 108 time_c = t1 - t0; 109 110 asprintf(&str, "FIR Filter benchmark:\nC version : %g ms\n", time_c); 111 strlcpy(buffer, str, sizeof buffer); 112 free(str); 113 114 strlcat(buffer, "Neon version : ", sizeof buffer); 115 116 family = android_getCpuFamily(); 117 if ((family != ANDROID_CPU_FAMILY_ARM) && 118 (family != ANDROID_CPU_FAMILY_X86)) 119 { 120 strlcat(buffer, "Not an ARM and not an X86 CPU !\n", sizeof buffer); 121 goto EXIT; 122 } 123 124 features = android_getCpuFeatures(); 125 if (((features & ANDROID_CPU_ARM_FEATURE_ARMv7) == 0) && 126 ((features & ANDROID_CPU_X86_FEATURE_SSSE3) == 0)) 127 { 128 strlcat(buffer, "Not an ARMv7 and not an X86 SSSE3 CPU !\n", sizeof buffer); 129 goto EXIT; 130 } 131 132 /* HAVE_NEON is defined in Android.mk ! */ 133 #ifdef HAVE_NEON 134 if (((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0) && 135 ((features & ANDROID_CPU_X86_FEATURE_SSSE3) == 0)) 136 { 137 strlcat(buffer, "CPU doesn't support NEON !\n", sizeof buffer); 138 goto EXIT; 139 } 140 141 /* Benchmark small FIR filter loop - Neon version */ 142 t0 = now_ms(); 143 { 144 int count = FIR_ITERATIONS; 145 for (; count > 0; count--) { 146 fir_filter_neon_intrinsics(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); 147 } 148 } 149 t1 = now_ms(); 150 time_neon = t1 - t0; 151 asprintf(&str, "%g ms (x%g faster)\n", time_neon, time_c / (time_neon < 1e-6 ? 1. : time_neon)); 152 strlcat(buffer, str, sizeof buffer); 153 free(str); 154 155 /* check the result, just in case */ 156 { 157 int nn, fails = 0; 158 for (nn = 0; nn < FIR_OUTPUT_SIZE; nn++) { 159 if (fir_output[nn] != fir_output_expected[nn]) { 160 if (++fails < 16) 161 D("neon[%d] = %d expected %d", nn, fir_output[nn], fir_output_expected[nn]); 162 } 163 } 164 D("%d fails\n", fails); 165 } 166 #else /* !HAVE_NEON */ 167 strlcat(buffer, "Program not compiled with ARMv7 support !\n", sizeof buffer); 168 #endif /* !HAVE_NEON */ 169 EXIT: 170 return (*env)->NewStringUTF(env, buffer); 171 } 172