Home | History | Annotate | Download | only in memtest
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <stdio.h>
     18 #include <stdlib.h>
     19 #include <string.h>
     20 #include <sys/time.h>
     21 #include <time.h>
     22 #include <unistd.h>
     23 #include <sched.h>
     24 #include <sys/resource.h>
     25 #include <sys/syscall.h>
     26 #include <sys/types.h>
     27 #include <sys/mman.h>
     28 
     29 #ifdef __ARM_NEON__
     30 #include <arm_neon.h>
     31 #endif
     32 
     33 
     34 typedef long long nsecs_t;
     35 static nsecs_t gTime;
     36 float data_f[1024 * 128];
     37 
     38 static nsecs_t system_time()
     39 {
     40     struct timespec t;
     41     t.tv_sec = t.tv_nsec = 0;
     42     clock_gettime(CLOCK_MONOTONIC, &t);
     43     return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
     44 }
     45 
     46 static void startTime()
     47 {
     48     gTime = system_time();
     49 }
     50 
     51 static void endTime(const char *str, double ops)
     52 {
     53     nsecs_t t = system_time() - gTime;
     54     double ds = ((double)t) / 1e9;
     55     printf("Test: %s, %f Mops\n", str, ops / ds / 1e6);
     56 }
     57 
     58 
     59 static void test_mad() {
     60     for(int i=0; i<1020; i++) {
     61         data_f[i] = i;
     62     }
     63 
     64     startTime();
     65 
     66     float total = 0;
     67     // Do ~1 billion ops
     68     for (int ct=0; ct < (1000 * (1000 / 20)); ct++) {
     69         for (int i=0; i < 1000; i++) {
     70             data_f[i] = (data_f[i] * 0.02f +
     71                          data_f[i+1] * 0.04f +
     72                          data_f[i+2] * 0.05f +
     73                          data_f[i+3] * 0.1f +
     74                          data_f[i+4] * 0.2f +
     75                          data_f[i+5] * 0.2f +
     76                          data_f[i+6] * 0.1f +
     77                          data_f[i+7] * 0.05f +
     78                          data_f[i+8] * 0.04f +
     79                          data_f[i+9] * 0.02f + 1.f);
     80         }
     81     }
     82 
     83     endTime("scalar mad", 1e9);
     84 }
     85 
     86 
     87 #ifdef __ARM_NEON__
     88 
     89 static void test_fma() {
     90     for(int i=0; i<1020 * 4; i++) {
     91         data_f[i] = i;
     92     }
     93     float32x4_t c0_02 = vdupq_n_f32(0.02f);
     94     float32x4_t c0_04 = vdupq_n_f32(0.04f);
     95     float32x4_t c0_05 = vdupq_n_f32(0.05f);
     96     float32x4_t c0_10 = vdupq_n_f32(0.1f);
     97     float32x4_t c0_20 = vdupq_n_f32(0.2f);
     98     float32x4_t c1_00 = vdupq_n_f32(1.0f);
     99 
    100     startTime();
    101 
    102     float total = 0;
    103     // Do ~1 billion ops
    104     for (int ct=0; ct < (1000 * (1000 / 80)); ct++) {
    105         for (int i=0; i < 1000; i++) {
    106             float32x4_t t;
    107             t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02);
    108             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04);
    109             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05);
    110             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10);
    111             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20);
    112             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20);
    113             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10);
    114             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05);
    115             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04);
    116             t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02);
    117             t = vaddq_f32(t, c1_00);
    118             vst1q_f32((float32_t *)&data_f[i], t);
    119         }
    120     }
    121 
    122     endTime("neon fma", 1e9);
    123 }
    124 #endif
    125 
    126 int fp_test(int argc, char** argv) {
    127     test_mad();
    128 
    129 #ifdef __ARM_NEON__
    130     test_fma();
    131 #endif
    132 
    133     return 0;
    134 }
    135 
    136 
    137 
    138 
    139