Home | History | Annotate | Download | only in jni
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 #include <jni.h>
     18 #include <time.h>
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <cpu-features.h>
     22 #include "helloneon-intrinsics.h"
     23 
     24 #define DEBUG 0
     25 
     26 #if DEBUG
     27 #include <android/log.h>
     28 #  define  D(x...)  __android_log_print(ANDROID_LOG_INFO,"helloneon",x)
     29 #else
     30 #  define  D(...)  do {} while (0)
     31 #endif
     32 
     33 /* return current time in milliseconds */
     34 static double
     35 now_ms(void)
     36 {
     37     struct timespec res;
     38     clock_gettime(CLOCK_REALTIME, &res);
     39     return 1000.0*res.tv_sec + (double)res.tv_nsec/1e6;
     40 }
     41 
     42 
     43 /* this is a FIR filter implemented in C */
     44 static void
     45 fir_filter_c(short *output, const short* input, const short* kernel, int width, int kernelSize)
     46 {
     47     int  offset = -kernelSize/2;
     48     int  nn;
     49     for (nn = 0; nn < width; nn++) {
     50         int sum = 0;
     51         int mm;
     52         for (mm = 0; mm < kernelSize; mm++) {
     53             sum += kernel[mm]*input[nn+offset+mm];
     54         }
     55         output[nn] = (short)((sum + 0x8000) >> 16);
     56     }
     57 }
     58 
     59 #define  FIR_KERNEL_SIZE   32
     60 #define  FIR_OUTPUT_SIZE   2560
     61 #define  FIR_INPUT_SIZE    (FIR_OUTPUT_SIZE + FIR_KERNEL_SIZE)
     62 #define  FIR_ITERATIONS    600
     63 
     64 static const short  fir_kernel[FIR_KERNEL_SIZE] = {
     65     0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10,
     66     0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10 };
     67 
     68 static short        fir_output[FIR_OUTPUT_SIZE];
     69 static short        fir_input_0[FIR_INPUT_SIZE];
     70 static const short* fir_input = fir_input_0 + (FIR_KERNEL_SIZE/2);
     71 static short        fir_output_expected[FIR_OUTPUT_SIZE];
     72 
     73 /* This is a trivial JNI example where we use a native method
     74  * to return a new VM String. See the corresponding Java source
     75  * file located at:
     76  *
     77  *   apps/samples/hello-neon/project/src/com/example/neon/HelloNeon.java
     78  */
     79 jstring
     80 Java_com_example_neon_HelloNeon_stringFromJNI( JNIEnv* env,
     81                                                jobject thiz )
     82 {
     83     char*  str;
     84     AndroidCpuFamily family;
     85     uint64_t features;
     86     char buffer[512];
     87     char tryNeon = 0;
     88     double  t0, t1, time_c, time_neon;
     89 
     90     /* setup FIR input - whatever */
     91     {
     92         int  nn;
     93         for (nn = 0; nn < FIR_INPUT_SIZE; nn++) {
     94             fir_input_0[nn] = (5*nn) & 255;
     95         }
     96         fir_filter_c(fir_output_expected, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
     97     }
     98 
     99     /* Benchmark small FIR filter loop - C version */
    100     t0 = now_ms();
    101     {
    102         int  count = FIR_ITERATIONS;
    103         for (; count > 0; count--) {
    104             fir_filter_c(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
    105         }
    106     }
    107     t1 = now_ms();
    108     time_c = t1 - t0;
    109 
    110     asprintf(&str, "FIR Filter benchmark:\nC version          : %g ms\n", time_c);
    111     strlcpy(buffer, str, sizeof buffer);
    112     free(str);
    113 
    114     strlcat(buffer, "Neon version   : ", sizeof buffer);
    115 
    116     family = android_getCpuFamily();
    117     if ((family != ANDROID_CPU_FAMILY_ARM) &&
    118         (family != ANDROID_CPU_FAMILY_X86))
    119     {
    120         strlcat(buffer, "Not an ARM and not an X86 CPU !\n", sizeof buffer);
    121         goto EXIT;
    122     }
    123 
    124     features = android_getCpuFeatures();
    125     if (((features & ANDROID_CPU_ARM_FEATURE_ARMv7) == 0) &&
    126         ((features & ANDROID_CPU_X86_FEATURE_SSSE3) == 0))
    127     {
    128         strlcat(buffer, "Not an ARMv7 and not an X86 SSSE3 CPU !\n", sizeof buffer);
    129         goto EXIT;
    130     }
    131 
    132     /* HAVE_NEON is defined in Android.mk ! */
    133 #ifdef HAVE_NEON
    134     if (((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0) &&
    135         ((features & ANDROID_CPU_X86_FEATURE_SSSE3) == 0))
    136     {
    137         strlcat(buffer, "CPU doesn't support NEON !\n", sizeof buffer);
    138         goto EXIT;
    139     }
    140 
    141     /* Benchmark small FIR filter loop - Neon version */
    142     t0 = now_ms();
    143     {
    144         int  count = FIR_ITERATIONS;
    145         for (; count > 0; count--) {
    146             fir_filter_neon_intrinsics(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
    147         }
    148     }
    149     t1 = now_ms();
    150     time_neon = t1 - t0;
    151     asprintf(&str, "%g ms (x%g faster)\n", time_neon, time_c / (time_neon < 1e-6 ? 1. : time_neon));
    152     strlcat(buffer, str, sizeof buffer);
    153     free(str);
    154 
    155     /* check the result, just in case */
    156     {
    157         int  nn, fails = 0;
    158         for (nn = 0; nn < FIR_OUTPUT_SIZE; nn++) {
    159             if (fir_output[nn] != fir_output_expected[nn]) {
    160                 if (++fails < 16)
    161                     D("neon[%d] = %d expected %d", nn, fir_output[nn], fir_output_expected[nn]);
    162             }
    163         }
    164         D("%d fails\n", fails);
    165     }
    166 #else /* !HAVE_NEON */
    167     strlcat(buffer, "Program not compiled with ARMv7 support !\n", sizeof buffer);
    168 #endif /* !HAVE_NEON */
    169 EXIT:
    170     return (*env)->NewStringUTF(env, buffer);
    171 }
    172