Home | History | Annotate | Download | only in jni
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <jni.h>
     18 #include <stdlib.h>
     19 #include <string.h>
     20 #include <sys/time.h>
     21 
     22 /* Code from now to qsort_local all copied from bionic source.
     23  * The code is duplicated here to remove dependency on optimized bionic
     24  */
     25 static __inline char    *med3(char *, char *, char *, int (*)(const void *, const void *));
     26 static __inline void     swapfunc(char *, char *, int, int);
     27 
     28 #define min(a, b)   (a) < (b) ? a : b
     29 
     30 /*
     31  * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
     32  */
     33 #define swapcode(TYPE, parmi, parmj, n) {       \
     34     long i = (n) / sizeof (TYPE);           \
     35     TYPE *pi = (TYPE *) (parmi);            \
     36     TYPE *pj = (TYPE *) (parmj);            \
     37     do {                        \
     38         TYPE    t = *pi;            \
     39         *pi++ = *pj;                \
     40         *pj++ = t;              \
     41         } while (--i > 0);              \
     42 }
     43 
     44 #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
     45     es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
     46 
     47 static __inline void
     48 swapfunc(char *a, char *b, int n, int swaptype)
     49 {
     50     if (swaptype <= 1)
     51         swapcode(long, a, b, n)
     52     else
     53         swapcode(char, a, b, n)
     54 }
     55 
     56 #define swap(a, b)                  \
     57     if (swaptype == 0) {                \
     58         long t = *(long *)(a);          \
     59         *(long *)(a) = *(long *)(b);        \
     60         *(long *)(b) = t;           \
     61     } else                      \
     62         swapfunc(a, b, es, swaptype)
     63 
     64 #define vecswap(a, b, n)    if ((n) > 0) swapfunc(a, b, n, swaptype)
     65 
     66 static __inline char *
     67 med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
     68 {
     69     return cmp(a, b) < 0 ?
     70            (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
     71               :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
     72 }
     73 
     74 void
     75 qsort_local(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
     76 {
     77     char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
     78     int d, r, swaptype, swap_cnt;
     79     char *a = (char*)aa;
     80 
     81 loop:   SWAPINIT(a, es);
     82     swap_cnt = 0;
     83     if (n < 7) {
     84         for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
     85             for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
     86                  pl -= es)
     87                 swap(pl, pl - es);
     88         return;
     89     }
     90     pm = (char *)a + (n / 2) * es;
     91     if (n > 7) {
     92         pl = (char *)a;
     93         pn = (char *)a + (n - 1) * es;
     94         if (n > 40) {
     95             d = (n / 8) * es;
     96             pl = med3(pl, pl + d, pl + 2 * d, cmp);
     97             pm = med3(pm - d, pm, pm + d, cmp);
     98             pn = med3(pn - 2 * d, pn - d, pn, cmp);
     99         }
    100         pm = med3(pl, pm, pn, cmp);
    101     }
    102     swap(a, pm);
    103     pa = pb = (char *)a + es;
    104 
    105     pc = pd = (char *)a + (n - 1) * es;
    106     for (;;) {
    107         while (pb <= pc && (r = cmp(pb, a)) <= 0) {
    108             if (r == 0) {
    109                 swap_cnt = 1;
    110                 swap(pa, pb);
    111                 pa += es;
    112             }
    113             pb += es;
    114         }
    115         while (pb <= pc && (r = cmp(pc, a)) >= 0) {
    116             if (r == 0) {
    117                 swap_cnt = 1;
    118                 swap(pc, pd);
    119                 pd -= es;
    120             }
    121             pc -= es;
    122         }
    123         if (pb > pc)
    124             break;
    125         swap(pb, pc);
    126         swap_cnt = 1;
    127         pb += es;
    128         pc -= es;
    129     }
    130     if (swap_cnt == 0) {  /* Switch to insertion sort */
    131         for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
    132             for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
    133                  pl -= es)
    134                 swap(pl, pl - es);
    135         return;
    136     }
    137 
    138     pn = (char *)a + n * es;
    139     r = min(pa - (char *)a, pb - pa);
    140     vecswap(a, pb - r, r);
    141     r = min(pd - pc, pn - pd - (int)es);
    142     vecswap(pb, pn - r, r);
    143     if ((r = pb - pa) > (int)es)
    144         qsort_local(a, r / es, es, cmp);
    145     if ((r = pd - pc) > (int)es) {
    146         /* Iterate rather than recurse to save stack space */
    147         a = pn - r;
    148         n = r / es;
    149         goto loop;
    150     }
    151     /* qsort(pn - r, r / es, es, cmp); */
    152 }
    153 
    154 /* code duplication ends here */
    155 
    156 /**
    157  * Util for getting time stamp
    158  */
    159 double currentTimeMillis()
    160 {
    161     struct timeval tv;
    162     gettimeofday(&tv, (struct timezone *) NULL);
    163     return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
    164 }
    165 
    166 /**
    167  * Initialize given array randomly for the given seed
    168  */
    169 template <typename T> void randomInitArray(T* array, int len, unsigned int seed)
    170 {
    171     srand(seed);
    172     for (int i = 0; i < len; i++) {
    173         array[i] = (T) rand();
    174     }
    175 }
    176 
    177 /**
    178  * comparison function for int, for qsort
    179  */
    180 int cmpint(const void* p1, const void* p2)
    181 {
    182     return *(int*)p1 - *(int*)p2;
    183 }
    184 
    185 extern "C" JNIEXPORT jdouble JNICALL Java_android_simplecpu_cts_CpuNative_runSort(JNIEnv* env,
    186         jclass clazz, jint numberElements, jint repetition)
    187 {
    188     int* data = new int[numberElements];
    189     if (data == NULL) {
    190         env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
    191         return -1;
    192     }
    193     double totalTime = 0;
    194     for (int i = 0; i < repetition; i++) {
    195         randomInitArray<int>(data, numberElements, 0);
    196         double start = currentTimeMillis();
    197         qsort_local(data, numberElements, sizeof(int), cmpint);
    198         double end = currentTimeMillis();
    199         totalTime += (end - start);
    200     }
    201     delete[] data;
    202     return totalTime;
    203 }
    204 
    205 
    206 /**
    207  * Do matrix multiplication, C = A x B with all matrices having dimension of n x n
    208  * The implementation is not in the most efficient, but it is good enough for benchmarking purpose.
    209  * @param n should be multiple of 8
    210  */
    211 void doMatrixMultiplication(float* A, float* B, float* C, int n)
    212 {
    213     // batch size
    214     const int M = 8;
    215     for (int i = 0; i < n; i++) {
    216         for (int j = 0; j < n; j += M) {
    217             float sum[M];
    218             for (int k = 0; k < M; k++) {
    219                 sum[k] = 0;
    220             }
    221             // re-use the whole cache line for accessing B.
    222             // otherwise, the whole line will be read and only one value will be used.
    223 
    224             for (int k = 0; k < n; k++) {
    225                 float a = A[i * n + k];
    226                 sum[0] += a * B[k * n + j];
    227                 sum[1] += a * B[k * n + j + 1];
    228                 sum[2] += a * B[k * n + j + 2];
    229                 sum[3] += a * B[k * n + j + 3];
    230                 sum[4] += a * B[k * n + j + 4];
    231                 sum[5] += a * B[k * n + j + 5];
    232                 sum[6] += a * B[k * n + j + 6];
    233                 sum[7] += a * B[k * n + j + 7];
    234             }
    235             for (int k = 0; k < M; k++) {
    236                 C[i * n + j + k] = sum[k];
    237             }
    238         }
    239     }
    240 }
    241 
    242 extern "C" JNIEXPORT jdouble JNICALL Java_android_simplecpu_cts_CpuNative_runMatrixMultiplication(
    243         JNIEnv* env, jclass clazz, jint n, jint repetition)
    244 {
    245     // C = A x B
    246     float* A = new float[n * n];
    247     float* B = new float[n * n];
    248     float* C = new float[n * n];
    249     if ((A == NULL) || (B == NULL) || (C == NULL)) {
    250         delete[] A;
    251         delete[] B;
    252         delete[] C;
    253         env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
    254         return -1;
    255     }
    256     double totalTime = 0;
    257     for (int i = 0; i < repetition; i++) {
    258         randomInitArray<float>(A, n * n, 0);
    259         randomInitArray<float>(B, n * n, 1);
    260         double start = currentTimeMillis();
    261         doMatrixMultiplication(A, B, C, n);
    262         double end = currentTimeMillis();
    263         totalTime += (end - start);
    264     }
    265     delete[] A;
    266     delete[] B;
    267     delete[] C;
    268     return totalTime;
    269 }
    270 
    271