Home | History | Annotate | Download | only in memtest
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef __BANDWIDTH_H__
     18 #define __BANDWIDTH_H__
     19 
     20 #include <stdlib.h>
     21 #include <string.h>
     22 
     23 #include "utils/Compat.h"
     24 #include "memtest.h"
     25 
     26 // Bandwidth Class definitions.
     27 class BandwidthBenchmark {
     28 public:
     29     BandwidthBenchmark()
     30         : _size(0),
     31           _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
     32           _num_loops(DEFAULT_NUM_LOOPS) {}
     33     virtual ~BandwidthBenchmark() {}
     34 
     35     bool run() {
     36         if (_size == 0) {
     37             return false;
     38         }
     39         if (!canRun()) {
     40             return false;
     41         }
     42 
     43         bench(_num_warm_loops);
     44 
     45         nsecs_t t = system_time();
     46         bench(_num_loops);
     47         t = system_time() - t;
     48 
     49         _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
     50 
     51         return true;
     52     }
     53 
     54     bool canRun() { return !usesNeon() || isNeonSupported(); }
     55 
     56     virtual bool setSize(size_t size) = 0;
     57 
     58     virtual const char *getName() = 0;
     59 
     60     virtual bool verify() = 0;
     61 
     62     virtual bool usesNeon() { return false; }
     63 
     64     bool isNeonSupported() {
     65 #if defined(__ARM_NEON__)
     66         return true;
     67 #else
     68         return false;
     69 #endif
     70     }
     71 
     72     // Accessors/mutators.
     73     double mb_per_sec() { return _mb_per_sec; }
     74     size_t num_warm_loops() { return _num_warm_loops; }
     75     size_t num_loops() { return _num_loops; }
     76     size_t size() { return _size; }
     77 
     78     void set_num_warm_loops(size_t num_warm_loops) {
     79         _num_warm_loops = num_warm_loops;
     80     }
     81     void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
     82 
     83     // Static constants
     84     static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
     85     static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
     86 
     87 protected:
     88     virtual void bench(size_t num_loops) = 0;
     89 
     90     double _mb_per_sec;
     91     size_t _size;
     92     size_t _num_warm_loops;
     93     size_t _num_loops;
     94 
     95 private:
     96     // Static constants
     97     static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
     98     static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
     99 };
    100 
    101 class CopyBandwidthBenchmark : public BandwidthBenchmark {
    102 public:
    103     CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
    104 
    105     bool setSize(size_t size) {
    106         if (_src) {
    107            free(_src);
    108            _src = NULL;
    109         }
    110         if (_dst) {
    111             free(_dst);
    112             _dst = NULL;
    113         }
    114 
    115         if (size == 0) {
    116             _size = DEFAULT_COPY_SIZE;
    117         } else {
    118             _size = size;
    119         }
    120 
    121         _src = reinterpret_cast<char*>(memalign(64, _size));
    122         if (!_src) {
    123             perror("Failed to allocate memory for test.");
    124             return false;
    125         }
    126         _dst = reinterpret_cast<char*>(memalign(64, _size));
    127         if (!_dst) {
    128             perror("Failed to allocate memory for test.");
    129             return false;
    130         }
    131 
    132         return true;
    133     }
    134     virtual ~CopyBandwidthBenchmark() {
    135         if (_src) {
    136             free(_src);
    137             _src = NULL;
    138         }
    139         if (_dst) {
    140             free(_dst);
    141             _dst = NULL;
    142         }
    143     }
    144 
    145     bool verify() {
    146         memset(_src, 0x23, _size);
    147         memset(_dst, 0, _size);
    148         bench(1);
    149         if (memcmp(_src, _dst, _size) != 0) {
    150             printf("Buffers failed to compare after one loop.\n");
    151             return false;
    152         }
    153 
    154         memset(_src, 0x23, _size);
    155         memset(_dst, 0, _size);
    156         _num_loops = 2;
    157         bench(2);
    158         if (memcmp(_src, _dst, _size) != 0) {
    159             printf("Buffers failed to compare after two loops.\n");
    160             return false;
    161         }
    162 
    163         return true;
    164     }
    165 
    166 protected:
    167     char *_src;
    168     char *_dst;
    169 
    170     static const unsigned int DEFAULT_COPY_SIZE = 8000;
    171 };
    172 
    173 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
    174 public:
    175     CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
    176     virtual ~CopyLdrdStrdBenchmark() {}
    177 
    178     const char *getName() { return "ldrd/strd"; }
    179 
    180 protected:
    181     // Copy using ldrd/strd instructions.
    182     void bench(size_t num_loops) {
    183         asm volatile(
    184             "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
    185 
    186             "mov r0, %0\n"
    187             "mov r1, %1\n"
    188             "mov r2, %2\n"
    189             "mov r3, %3\n"
    190 
    191             "0:\n"
    192             "mov r4, r2, lsr #6\n"
    193 
    194             "1:\n"
    195             "ldrd r6, r7, [r0]\n"
    196             "strd r6, r7, [r1]\n"
    197             "ldrd r6, r7, [r0, #8]\n"
    198             "strd r6, r7, [r1, #8]\n"
    199             "ldrd r6, r7, [r0, #16]\n"
    200             "strd r6, r7, [r1, #16]\n"
    201             "ldrd r6, r7, [r0, #24]\n"
    202             "strd r6, r7, [r1, #24]\n"
    203             "ldrd r6, r7, [r0, #32]\n"
    204             "strd r6, r7, [r1, #32]\n"
    205             "ldrd r6, r7, [r0, #40]\n"
    206             "strd r6, r7, [r1, #40]\n"
    207             "ldrd r6, r7, [r0, #48]\n"
    208             "strd r6, r7, [r1, #48]\n"
    209             "ldrd r6, r7, [r0, #56]\n"
    210             "strd r6, r7, [r1, #56]\n"
    211 
    212             "add  r0, r0, #64\n"
    213             "add  r1, r1, #64\n"
    214             "subs r4, r4, #1\n"
    215             "bgt 1b\n"
    216 
    217             "sub r0, r0, r2\n"
    218             "sub r1, r1, r2\n"
    219             "subs r3, r3, #1\n"
    220             "bgt 0b\n"
    221 
    222             "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
    223         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    224     }
    225 };
    226 
    227 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
    228 public:
    229     CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
    230     virtual ~CopyLdmiaStmiaBenchmark() {}
    231 
    232     const char *getName() { return "ldmia/stmia"; }
    233 
    234 protected:
    235     // Copy using ldmia/stmia instructions.
    236     void bench(size_t num_loops) {
    237         asm volatile(
    238             "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
    239 
    240             "mov r0, %0\n"
    241             "mov r1, %1\n"
    242             "mov r2, %2\n"
    243             "mov r3, %3\n"
    244 
    245             "0:\n"
    246             "mov r4, r2, lsr #6\n"
    247 
    248             "1:\n"
    249             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
    250             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
    251             "subs r4, r4, #1\n"
    252             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
    253             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
    254             "bgt 1b\n"
    255 
    256             "sub r0, r0, r2\n"
    257             "sub r1, r1, r2\n"
    258             "subs r3, r3, #1\n"
    259             "bgt 0b\n"
    260 
    261             "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
    262         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    263     }
    264 };
    265 
    266 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
    267 public:
    268     CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
    269     virtual ~CopyVld1Vst1Benchmark() {}
    270 
    271     const char *getName() { return "vld1/vst1"; }
    272 
    273     bool usesNeon() { return true; }
    274 
    275 protected:
    276     // Copy using vld1/vst1 instructions.
    277 #if defined(__ARM_NEON__)
    278     void bench(size_t num_loops) {
    279         asm volatile(
    280             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    281 
    282             "mov r0, %0\n"
    283             "mov r1, %1\n"
    284             "mov r2, %2\n"
    285             "mov r3, %3\n"
    286 
    287             "0:\n"
    288             "mov r4, r2, lsr #6\n"
    289 
    290             "1:\n"
    291             "vld1.8 {d0-d3}, [r0]!\n"
    292             "vld1.8 {d4-d7}, [r0]!\n"
    293             "subs r4, r4, #1\n"
    294             "vst1.8 {d0-d3}, [r1:128]!\n"
    295             "vst1.8 {d4-d7}, [r1:128]!\n"
    296             "bgt 1b\n"
    297 
    298             "sub r0, r0, r2\n"
    299             "sub r1, r1, r2\n"
    300             "subs r3, r3, #1\n"
    301             "bgt 0b\n"
    302 
    303             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    304         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    305 #else
    306     void bench(size_t) {
    307 #endif
    308     }
    309 };
    310 
    311 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
    312 public:
    313     CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
    314     virtual ~CopyVldrVstrBenchmark() {}
    315 
    316     const char *getName() { return "vldr/vstr"; }
    317 
    318     bool usesNeon() { return true; }
    319 
    320 protected:
    321     // Copy using vldr/vstr instructions.
    322 #if defined(__ARM_NEON__)
    323     void bench(size_t num_loops) {
    324         asm volatile(
    325             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    326 
    327             "mov r0, %0\n"
    328             "mov r1, %1\n"
    329             "mov r2, %2\n"
    330             "mov r3, %3\n"
    331 
    332             "0:\n"
    333             "mov r4, r2, lsr #6\n"
    334 
    335             "1:\n"
    336             "vldr d0, [r0, #0]\n"
    337             "subs r4, r4, #1\n"
    338             "vldr d1, [r0, #8]\n"
    339             "vstr d0, [r1, #0]\n"
    340             "vldr d0, [r0, #16]\n"
    341             "vstr d1, [r1, #8]\n"
    342             "vldr d1, [r0, #24]\n"
    343             "vstr d0, [r1, #16]\n"
    344             "vldr d0, [r0, #32]\n"
    345             "vstr d1, [r1, #24]\n"
    346             "vldr d1, [r0, #40]\n"
    347             "vstr d0, [r1, #32]\n"
    348             "vldr d0, [r0, #48]\n"
    349             "vstr d1, [r1, #40]\n"
    350             "vldr d1, [r0, #56]\n"
    351             "vstr d0, [r1, #48]\n"
    352             "add r0, r0, #64\n"
    353             "vstr d1, [r1, #56]\n"
    354             "add r1, r1, #64\n"
    355             "bgt 1b\n"
    356 
    357             "sub r0, r0, r2\n"
    358             "sub r1, r1, r2\n"
    359             "subs r3, r3, #1\n"
    360             "bgt 0b\n"
    361 
    362             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    363         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    364 #else
    365     void bench(size_t) {
    366 #endif
    367     }
    368 };
    369 
    370 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
    371 public:
    372     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
    373     virtual ~CopyVldmiaVstmiaBenchmark() {}
    374 
    375     const char *getName() { return "vldmia/vstmia"; }
    376 
    377     bool usesNeon() { return true; }
    378 
    379 protected:
    380     // Copy using vldmia/vstmia instructions.
    381 #if defined(__ARM_NEON__)
    382     void bench(size_t num_loops) {
    383         asm volatile(
    384             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    385 
    386             "mov r0, %0\n"
    387             "mov r1, %1\n"
    388             "mov r2, %2\n"
    389             "mov r3, %3\n"
    390 
    391             "0:\n"
    392             "mov r4, r2, lsr #6\n"
    393 
    394             "1:\n"
    395             "vldmia r0!, {d0-d7}\n"
    396             "subs r4, r4, #1\n"
    397             "vstmia r1!, {d0-d7}\n"
    398             "bgt 1b\n"
    399 
    400             "sub r0, r0, r2\n"
    401             "sub r1, r1, r2\n"
    402             "subs r3, r3, #1\n"
    403             "bgt 0b\n"
    404 
    405             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    406         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    407 #else
    408     void bench(size_t) {
    409 #endif
    410     }
    411 };
    412 
    413 class MemcpyBenchmark : public CopyBandwidthBenchmark {
    414 public:
    415     MemcpyBenchmark() : CopyBandwidthBenchmark() { }
    416     virtual ~MemcpyBenchmark() {}
    417 
    418     const char *getName() { return "memcpy"; }
    419 
    420 protected:
    421     void bench(size_t num_loops) {
    422         for (size_t i = 0; i < num_loops; i++) {
    423             memcpy(_dst, _src, _size);
    424         }
    425     }
    426 };
    427 
    428 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
    429 public:
    430     SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
    431     virtual ~SingleBufferBandwidthBenchmark() {
    432         if (_buffer) {
    433             free(_buffer);
    434             _buffer = NULL;
    435         }
    436     }
    437 
    438     bool setSize(size_t size) {
    439         if (_buffer) {
    440             free(_buffer);
    441             _buffer = NULL;
    442         }
    443 
    444         if (size == 0) {
    445             _size = DEFAULT_SINGLE_BUFFER_SIZE;
    446         } else {
    447             _size = size;
    448         }
    449 
    450         _buffer = reinterpret_cast<char*>(memalign(64, _size));
    451         if (!_buffer) {
    452             perror("Failed to allocate memory for test.");
    453             return false;
    454         }
    455         memset(_buffer, 0, _size);
    456 
    457         return true;
    458     }
    459 
    460     bool verify() { return true; }
    461 
    462 protected:
    463     char *_buffer;
    464 
    465     static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
    466 };
    467 
    468 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
    469 public:
    470     WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
    471     virtual ~WriteBandwidthBenchmark() { }
    472 
    473     bool verify() {
    474         memset(_buffer, 0, _size);
    475         bench(1);
    476         for (size_t i = 0; i < _size; i++) {
    477             if (_buffer[i] != 1) {
    478                 printf("Buffer failed to compare after one loop.\n");
    479                 return false;
    480             }
    481         }
    482 
    483         memset(_buffer, 0, _size);
    484         bench(2);
    485         for (size_t i = 0; i < _size; i++) {
    486             if (_buffer[i] != 2) {
    487                 printf("Buffer failed to compare after two loops.\n");
    488                 return false;
    489             }
    490         }
    491 
    492         return true;
    493     }
    494 };
    495 
    496 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
    497 public:
    498     WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
    499     virtual ~WriteStrdBenchmark() {}
    500 
    501     const char *getName() { return "strd"; }
    502 
    503 protected:
    504     // Write a given value using strd.
    505     void bench(size_t num_loops) {
    506         asm volatile(
    507             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
    508 
    509             "mov r0, %0\n"
    510             "mov r1, %1\n"
    511             "mov r2, %2\n"
    512 
    513             "mov r4, #0\n"
    514             "mov r5, #0\n"
    515 
    516             "0:\n"
    517             "mov r3, r1, lsr #5\n"
    518 
    519             "add r4, r4, #0x01010101\n"
    520             "mov r5, r4\n"
    521 
    522             "1:\n"
    523             "subs r3, r3, #1\n"
    524             "strd r4, r5, [r0]\n"
    525             "strd r4, r5, [r0, #8]\n"
    526             "strd r4, r5, [r0, #16]\n"
    527             "strd r4, r5, [r0, #24]\n"
    528             "add  r0, r0, #32\n"
    529             "bgt 1b\n"
    530 
    531             "sub r0, r0, r1\n"
    532             "subs r2, r2, #1\n"
    533             "bgt 0b\n"
    534 
    535             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
    536           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    537     }
    538 };
    539 
    540 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
    541 public:
    542     WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
    543     virtual ~WriteStmiaBenchmark() {}
    544 
    545     const char *getName() { return "stmia"; }
    546 
    547 protected:
    548       // Write a given value using stmia.
    549       void bench(size_t num_loops) {
    550           asm volatile(
    551               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
    552 
    553               "mov r0, %0\n"
    554               "mov r1, %1\n"
    555               "mov r2, %2\n"
    556 
    557               "mov r4, #0\n"
    558 
    559               "0:\n"
    560               "mov r3, r1, lsr #5\n"
    561 
    562               "add r4, r4, #0x01010101\n"
    563               "mov r5, r4\n"
    564               "mov r6, r4\n"
    565               "mov r7, r4\n"
    566               "mov r8, r4\n"
    567               "mov r9, r4\n"
    568               "mov r10, r4\n"
    569               "mov r11, r4\n"
    570 
    571               "1:\n"
    572               "subs r3, r3, #1\n"
    573               "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
    574               "bgt 1b\n"
    575 
    576               "sub r0, r0, r1\n"
    577               "subs r2, r2, #1\n"
    578               "bgt 0b\n"
    579 
    580               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
    581         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    582     }
    583 };
    584 
    585 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
    586 public:
    587     WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
    588     virtual ~WriteVst1Benchmark() {}
    589 
    590     const char *getName() { return "vst1"; }
    591 
    592     bool usesNeon() { return true; }
    593 
    594 protected:
    595     // Write a given value using vst.
    596 #if defined(__ARM_NEON__)
    597     void bench(size_t num_loops) {
    598         asm volatile(
    599             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    600 
    601             "mov r0, %0\n"
    602             "mov r1, %1\n"
    603             "mov r2, %2\n"
    604             "mov r4, #0\n"
    605 
    606             "0:\n"
    607             "mov r3, r1, lsr #5\n"
    608 
    609             "add r4, r4, #1\n"
    610             "vdup.8 d0, r4\n"
    611             "vmov d1, d0\n"
    612             "vmov d2, d0\n"
    613             "vmov d3, d0\n"
    614 
    615             "1:\n"
    616             "subs r3, r3, #1\n"
    617             "vst1.8 {d0-d3}, [r0:128]!\n"
    618             "bgt 1b\n"
    619 
    620             "sub r0, r0, r1\n"
    621             "subs r2, r2, #1\n"
    622             "bgt 0b\n"
    623 
    624             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    625         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    626 #else
    627     void bench(size_t) {
    628 #endif
    629     }
    630 };
    631 
    632 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
    633 public:
    634     WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
    635     virtual ~WriteVstrBenchmark() {}
    636 
    637     const char *getName() { return "vstr"; }
    638 
    639     bool usesNeon() { return true; }
    640 
    641 protected:
    642     // Write a given value using vst.
    643 #if defined(__ARM_NEON__)
    644     void bench(size_t num_loops) {
    645         asm volatile(
    646             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    647 
    648             "mov r0, %0\n"
    649             "mov r1, %1\n"
    650             "mov r2, %2\n"
    651             "mov r4, #0\n"
    652 
    653             "0:\n"
    654             "mov r3, r1, lsr #5\n"
    655 
    656             "add r4, r4, #1\n"
    657             "vdup.8 d0, r4\n"
    658             "vmov d1, d0\n"
    659             "vmov d2, d0\n"
    660             "vmov d3, d0\n"
    661 
    662             "1:\n"
    663             "vstr d0, [r0, #0]\n"
    664             "subs r3, r3, #1\n"
    665             "vstr d1, [r0, #8]\n"
    666             "vstr d0, [r0, #16]\n"
    667             "vstr d1, [r0, #24]\n"
    668             "add r0, r0, #32\n"
    669             "bgt 1b\n"
    670 
    671             "sub r0, r0, r1\n"
    672             "subs r2, r2, #1\n"
    673             "bgt 0b\n"
    674 
    675             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    676         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    677 #else
    678     void bench(size_t) {
    679 #endif
    680     }
    681 };
    682 
    683 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
    684 public:
    685     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
    686     virtual ~WriteVstmiaBenchmark() {}
    687 
    688     const char *getName() { return "vstmia"; }
    689 
    690     bool usesNeon() { return true; }
    691 
    692 protected:
    693     // Write a given value using vstmia.
    694 #if defined(__ARM_NEON__)
    695     void bench(size_t num_loops) {
    696         asm volatile(
    697             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    698 
    699             "mov r0, %0\n"
    700             "mov r1, %1\n"
    701             "mov r2, %2\n"
    702             "mov r4, #0\n"
    703 
    704             "0:\n"
    705             "mov r3, r1, lsr #5\n"
    706 
    707             "add r4, r4, #1\n"
    708             "vdup.8 d0, r4\n"
    709             "vmov d1, d0\n"
    710             "vmov d2, d0\n"
    711             "vmov d3, d0\n"
    712 
    713             "1:\n"
    714             "subs r3, r3, #1\n"
    715             "vstmia r0!, {d0-d3}\n"
    716             "bgt 1b\n"
    717 
    718             "sub r0, r0, r1\n"
    719             "subs r2, r2, #1\n"
    720             "bgt 0b\n"
    721 
    722             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    723         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    724 #else
    725     void bench(size_t) {
    726 #endif
    727     }
    728 };
    729 
    730 class MemsetBenchmark : public WriteBandwidthBenchmark {
    731 public:
    732     MemsetBenchmark() : WriteBandwidthBenchmark() { }
    733     virtual ~MemsetBenchmark() {}
    734 
    735     const char *getName() { return "memset"; }
    736 
    737 protected:
    738     void bench(size_t num_loops) {
    739         for (size_t i = 0; i < num_loops; i++) {
    740             memset(_buffer, (i % 255) + 1, _size);
    741         }
    742     }
    743 };
    744 
    745 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
    746 public:
    747     ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
    748     virtual ~ReadLdrdBenchmark() {}
    749 
    750     const char *getName() { return "ldrd"; }
    751 
    752 protected:
    753     // Write a given value using strd.
    754     void bench(size_t num_loops) {
    755         asm volatile(
    756             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
    757 
    758             "mov r0, %0\n"
    759             "mov r1, %1\n"
    760             "mov r2, %2\n"
    761 
    762             "0:\n"
    763             "mov r3, r1, lsr #5\n"
    764 
    765             "1:\n"
    766             "subs r3, r3, #1\n"
    767             "ldrd r4, r5, [r0]\n"
    768             "ldrd r4, r5, [r0, #8]\n"
    769             "ldrd r4, r5, [r0, #16]\n"
    770             "ldrd r4, r5, [r0, #24]\n"
    771             "add  r0, r0, #32\n"
    772             "bgt 1b\n"
    773 
    774             "sub r0, r0, r1\n"
    775             "subs r2, r2, #1\n"
    776             "bgt 0b\n"
    777 
    778             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
    779           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    780     }
    781 };
    782 
    783 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
    784 public:
    785     ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
    786     virtual ~ReadLdmiaBenchmark() {}
    787 
    788     const char *getName() { return "ldmia"; }
    789 
    790 protected:
    791       // Write a given value using stmia.
    792       void bench(size_t num_loops) {
    793           asm volatile(
    794               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
    795 
    796               "mov r0, %0\n"
    797               "mov r1, %1\n"
    798               "mov r2, %2\n"
    799 
    800               "0:\n"
    801               "mov r3, r1, lsr #5\n"
    802 
    803               "1:\n"
    804               "subs r3, r3, #1\n"
    805               "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
    806               "bgt 1b\n"
    807 
    808               "sub r0, r0, r1\n"
    809               "subs r2, r2, #1\n"
    810               "bgt 0b\n"
    811 
    812               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
    813         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    814     }
    815 };
    816 
    817 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
    818 public:
    819     ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
    820     virtual ~ReadVld1Benchmark() {}
    821 
    822     const char *getName() { return "vld1"; }
    823 
    824     bool usesNeon() { return true; }
    825 
    826 protected:
    827     // Write a given value using vst.
    828 #if defined(__ARM_NEON__)
    829     void bench(size_t num_loops) {
    830         asm volatile(
    831             "stmfd sp!, {r0,r1,r2,r3}\n"
    832 
    833             "mov r0, %0\n"
    834             "mov r1, %1\n"
    835             "mov r2, %2\n"
    836 
    837             "0:\n"
    838             "mov r3, r1, lsr #5\n"
    839 
    840             "1:\n"
    841             "subs r3, r3, #1\n"
    842             "vld1.8 {d0-d3}, [r0:128]!\n"
    843             "bgt 1b\n"
    844 
    845             "sub r0, r0, r1\n"
    846             "subs r2, r2, #1\n"
    847             "bgt 0b\n"
    848 
    849             "ldmfd sp!, {r0,r1,r2,r3}\n"
    850         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    851 #else
    852     void bench(size_t) {
    853 #endif
    854     }
    855 };
    856 
    857 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
    858 public:
    859     ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
    860     virtual ~ReadVldrBenchmark() {}
    861 
    862     const char *getName() { return "vldr"; }
    863 
    864     bool usesNeon() { return true; }
    865 
    866 protected:
    867     // Write a given value using vst.
    868 #if defined(__ARM_NEON__)
    869     void bench(size_t num_loops) {
    870         asm volatile(
    871             "stmfd sp!, {r0,r1,r2,r3}\n"
    872 
    873             "mov r0, %0\n"
    874             "mov r1, %1\n"
    875             "mov r2, %2\n"
    876 
    877             "0:\n"
    878             "mov r3, r1, lsr #5\n"
    879 
    880             "1:\n"
    881             "vldr d0, [r0, #0]\n"
    882             "subs r3, r3, #1\n"
    883             "vldr d1, [r0, #8]\n"
    884             "vldr d0, [r0, #16]\n"
    885             "vldr d1, [r0, #24]\n"
    886             "add r0, r0, #32\n"
    887             "bgt 1b\n"
    888 
    889             "sub r0, r0, r1\n"
    890             "subs r2, r2, #1\n"
    891             "bgt 0b\n"
    892 
    893             "ldmfd sp!, {r0,r1,r2,r3}\n"
    894         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    895 #else
    896     void bench(size_t) {
    897 #endif
    898     }
    899 };
    900 
    901 
    902 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
    903 public:
    904     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
    905     virtual ~ReadVldmiaBenchmark() {}
    906 
    907     const char *getName() { return "vldmia"; }
    908 
    909     bool usesNeon() { return true; }
    910 
    911 protected:
    912     // Write a given value using vstmia.
    913 #if defined(__ARM_NEON__)
    914     void bench(size_t num_loops) {
    915         asm volatile(
    916             "stmfd sp!, {r0,r1,r2,r3}\n"
    917 
    918             "mov r0, %0\n"
    919             "mov r1, %1\n"
    920             "mov r2, %2\n"
    921 
    922             "0:\n"
    923             "mov r3, r1, lsr #5\n"
    924 
    925             "1:\n"
    926             "subs r3, r3, #1\n"
    927             "vldmia r0!, {d0-d3}\n"
    928             "bgt 1b\n"
    929 
    930             "sub r0, r0, r1\n"
    931             "subs r2, r2, #1\n"
    932             "bgt 0b\n"
    933 
    934             "ldmfd sp!, {r0,r1,r2,r3}\n"
    935         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    936 #else
    937     void bench(size_t) {
    938 #endif
    939     }
    940 };
    941 
    942 #endif  // __BANDWIDTH_H__
    943