Home | History | Annotate | Download | only in memtest
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef __BANDWIDTH_H__
     18 #define __BANDWIDTH_H__
     19 
     20 #include "memtest.h"
     21 
     22 // Bandwidth Class definitions.
     23 class BandwidthBenchmark {
     24 public:
     25     BandwidthBenchmark()
     26         : _size(0),
     27           _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
     28           _num_loops(DEFAULT_NUM_LOOPS) {}
     29     virtual ~BandwidthBenchmark() {}
     30 
     31     bool run() {
     32         if (_size == 0) {
     33             return false;
     34         }
     35         if (!canRun()) {
     36             return false;
     37         }
     38 
     39         bench(_num_warm_loops);
     40 
     41         nsecs_t t = system_time();
     42         bench(_num_loops);
     43         t = system_time() - t;
     44 
     45         _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
     46 
     47         return true;
     48     }
     49 
     50     bool canRun() { return !usesNeon() || isNeonSupported(); }
     51 
     52     virtual bool setSize(size_t size) = 0;
     53 
     54     virtual const char *getName() = 0;
     55 
     56     virtual bool verify() = 0;
     57 
     58     virtual bool usesNeon() { return false; }
     59 
     60     bool isNeonSupported() {
     61 #if defined(__ARM_NEON__)
     62         return true;
     63 #else
     64         return false;
     65 #endif
     66     }
     67 
     68     // Accessors/mutators.
     69     double mb_per_sec() { return _mb_per_sec; }
     70     size_t num_warm_loops() { return _num_warm_loops; }
     71     size_t num_loops() { return _num_loops; }
     72     size_t size() { return _size; }
     73 
     74     void set_num_warm_loops(size_t num_warm_loops) {
     75         _num_warm_loops = num_warm_loops;
     76     }
     77     void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
     78 
     79     // Static constants
     80     static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
     81     static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
     82 
     83 protected:
     84     virtual void bench(size_t num_loops) = 0;
     85 
     86     double _mb_per_sec;
     87     size_t _size;
     88     size_t _num_warm_loops;
     89     size_t _num_loops;
     90 
     91 private:
     92     // Static constants
     93     static const double _NUM_NS_PER_SEC = 1000000000.0;
     94     static const double _BYTES_PER_MB = 1024.0* 1024.0;
     95 };
     96 
     97 class CopyBandwidthBenchmark : public BandwidthBenchmark {
     98 public:
     99     CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
    100 
    101     bool setSize(size_t size) {
    102         if (_src) {
    103            free(_src);
    104         }
    105         if (_dst) {
    106             free(_dst);
    107         }
    108 
    109         if (size == 0) {
    110             _size = DEFAULT_COPY_SIZE;
    111         } else {
    112             _size = size;
    113         }
    114 
    115         _src = reinterpret_cast<char*>(memalign(64, _size));
    116         if (!_src) {
    117             perror("Failed to allocate memory for test.");
    118             return false;
    119         }
    120         _dst = reinterpret_cast<char*>(memalign(64, _size));
    121         if (!_dst) {
    122             perror("Failed to allocate memory for test.");
    123             return false;
    124         }
    125 
    126         return true;
    127     }
    128     virtual ~CopyBandwidthBenchmark() {
    129         if (_src) {
    130             free(_src);
    131             _src = NULL;
    132         }
    133         if (_dst) {
    134             free(_dst);
    135             _dst = NULL;
    136         }
    137     }
    138 
    139     bool verify() {
    140         memset(_src, 0x23, _size);
    141         memset(_dst, 0, _size);
    142         bench(1);
    143         if (memcmp(_src, _dst, _size) != 0) {
    144             printf("Buffers failed to compare after one loop.\n");
    145             return false;
    146         }
    147 
    148         memset(_src, 0x23, _size);
    149         memset(_dst, 0, _size);
    150         _num_loops = 2;
    151         bench(2);
    152         if (memcmp(_src, _dst, _size) != 0) {
    153             printf("Buffers failed to compare after two loops.\n");
    154             return false;
    155         }
    156 
    157         return true;
    158     }
    159 
    160 protected:
    161     char *_src;
    162     char *_dst;
    163 
    164     static const unsigned int DEFAULT_COPY_SIZE = 8000;
    165 };
    166 
    167 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
    168 public:
    169     CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
    170     virtual ~CopyLdrdStrdBenchmark() {}
    171 
    172     const char *getName() { return "ldrd/strd"; }
    173 
    174 protected:
    175     // Copy using ldrd/strd instructions.
    176     void bench(size_t num_loops) {
    177         asm volatile(
    178             "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
    179 
    180             "mov r0, %0\n"
    181             "mov r1, %1\n"
    182             "mov r2, %2\n"
    183             "mov r3, %3\n"
    184 
    185             "0:\n"
    186             "mov r4, r2, lsr #6\n"
    187 
    188             "1:\n"
    189             "ldrd r6, r7, [r0]\n"
    190             "strd r6, r7, [r1]\n"
    191             "ldrd r6, r7, [r0, #8]\n"
    192             "strd r6, r7, [r1, #8]\n"
    193             "ldrd r6, r7, [r0, #16]\n"
    194             "strd r6, r7, [r1, #16]\n"
    195             "ldrd r6, r7, [r0, #24]\n"
    196             "strd r6, r7, [r1, #24]\n"
    197             "ldrd r6, r7, [r0, #32]\n"
    198             "strd r6, r7, [r1, #32]\n"
    199             "ldrd r6, r7, [r0, #40]\n"
    200             "strd r6, r7, [r1, #40]\n"
    201             "ldrd r6, r7, [r0, #48]\n"
    202             "strd r6, r7, [r1, #48]\n"
    203             "ldrd r6, r7, [r0, #56]\n"
    204             "strd r6, r7, [r1, #56]\n"
    205 
    206             "add  r0, r0, #64\n"
    207             "add  r1, r1, #64\n"
    208             "subs r4, r4, #1\n"
    209             "bgt 1b\n"
    210 
    211             "sub r0, r0, r2\n"
    212             "sub r1, r1, r2\n"
    213             "subs r3, r3, #1\n"
    214             "bgt 0b\n"
    215 
    216             "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
    217         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    218     }
    219 };
    220 
    221 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
    222 public:
    223     CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
    224     virtual ~CopyLdmiaStmiaBenchmark() {}
    225 
    226     const char *getName() { return "ldmia/stmia"; }
    227 
    228 protected:
    229     // Copy using ldmia/stmia instructions.
    230     void bench(size_t num_loops) {
    231         asm volatile(
    232             "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
    233 
    234             "mov r0, %0\n"
    235             "mov r1, %1\n"
    236             "mov r2, %2\n"
    237             "mov r3, %3\n"
    238 
    239             "0:\n"
    240             "mov r4, r2, lsr #6\n"
    241 
    242             "1:\n"
    243             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
    244             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
    245             "subs r4, r4, #1\n"
    246             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
    247             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
    248             "bgt 1b\n"
    249 
    250             "sub r0, r0, r2\n"
    251             "sub r1, r1, r2\n"
    252             "subs r3, r3, #1\n"
    253             "bgt 0b\n"
    254 
    255             "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
    256         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    257     }
    258 };
    259 
    260 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
    261 public:
    262     CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
    263     virtual ~CopyVld1Vst1Benchmark() {}
    264 
    265     const char *getName() { return "vld1/vst1"; }
    266 
    267     bool usesNeon() { return true; }
    268 
    269 protected:
    270     // Copy using vld1/vst1 instructions.
    271     void bench(size_t num_loops) {
    272 #if defined(__ARM_NEON__)
    273         asm volatile(
    274             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    275 
    276             "mov r0, %0\n"
    277             "mov r1, %1\n"
    278             "mov r2, %2\n"
    279             "mov r3, %3\n"
    280 
    281             "0:\n"
    282             "mov r4, r2, lsr #6\n"
    283 
    284             "1:\n"
    285             "vld1.8 {d0-d3}, [r0]!\n"
    286             "vld1.8 {d4-d7}, [r0]!\n"
    287             "subs r4, r4, #1\n"
    288             "vst1.8 {d0-d3}, [r1:128]!\n"
    289             "vst1.8 {d4-d7}, [r1:128]!\n"
    290             "bgt 1b\n"
    291 
    292             "sub r0, r0, r2\n"
    293             "sub r1, r1, r2\n"
    294             "subs r3, r3, #1\n"
    295             "bgt 0b\n"
    296 
    297             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    298         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    299 #endif
    300     }
    301 };
    302 
    303 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
    304 public:
    305     CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
    306     virtual ~CopyVldrVstrBenchmark() {}
    307 
    308     const char *getName() { return "vldr/vstr"; }
    309 
    310     bool usesNeon() { return true; }
    311 
    312 protected:
    313     // Copy using vldr/vstr instructions.
    314     void bench(size_t num_loops) {
    315 #if defined(__ARM_NEON__)
    316         asm volatile(
    317             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    318 
    319             "mov r0, %0\n"
    320             "mov r1, %1\n"
    321             "mov r2, %2\n"
    322             "mov r3, %3\n"
    323 
    324             "0:\n"
    325             "mov r4, r2, lsr #6\n"
    326 
    327             "1:\n"
    328             "vldr d0, [r0, #0]\n"
    329             "subs r4, r4, #1\n"
    330             "vldr d1, [r0, #8]\n"
    331             "vstr d0, [r1, #0]\n"
    332             "vldr d0, [r0, #16]\n"
    333             "vstr d1, [r1, #8]\n"
    334             "vldr d1, [r0, #24]\n"
    335             "vstr d0, [r1, #16]\n"
    336             "vldr d0, [r0, #32]\n"
    337             "vstr d1, [r1, #24]\n"
    338             "vldr d1, [r0, #40]\n"
    339             "vstr d0, [r1, #32]\n"
    340             "vldr d0, [r0, #48]\n"
    341             "vstr d1, [r1, #40]\n"
    342             "vldr d1, [r0, #56]\n"
    343             "vstr d0, [r1, #48]\n"
    344             "add r0, r0, #64\n"
    345             "vstr d1, [r1, #56]\n"
    346             "add r1, r1, #64\n"
    347             "bgt 1b\n"
    348 
    349             "sub r0, r0, r2\n"
    350             "sub r1, r1, r2\n"
    351             "subs r3, r3, #1\n"
    352             "bgt 0b\n"
    353 
    354             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    355         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    356 #endif
    357     }
    358 };
    359 
    360 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
    361 public:
    362     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
    363     virtual ~CopyVldmiaVstmiaBenchmark() {}
    364 
    365     const char *getName() { return "vldmia/vstmia"; }
    366 
    367     bool usesNeon() { return true; }
    368 
    369 protected:
    370     // Copy using vldmia/vstmia instructions.
    371     void bench(size_t num_loops) {
    372 #if defined(__ARM_NEON__)
    373         asm volatile(
    374             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    375 
    376             "mov r0, %0\n"
    377             "mov r1, %1\n"
    378             "mov r2, %2\n"
    379             "mov r3, %3\n"
    380 
    381             "0:\n"
    382             "mov r4, r2, lsr #6\n"
    383 
    384             "1:\n"
    385             "vldmia r0!, {d0-d7}\n"
    386             "subs r4, r4, #1\n"
    387             "vstmia r1!, {d0-d7}\n"
    388             "bgt 1b\n"
    389 
    390             "sub r0, r0, r2\n"
    391             "sub r1, r1, r2\n"
    392             "subs r3, r3, #1\n"
    393             "bgt 0b\n"
    394 
    395             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    396         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
    397 #endif
    398     }
    399 };
    400 
    401 class MemcpyBenchmark : public CopyBandwidthBenchmark {
    402 public:
    403     MemcpyBenchmark() : CopyBandwidthBenchmark() { }
    404     virtual ~MemcpyBenchmark() {}
    405 
    406     const char *getName() { return "memcpy"; }
    407 
    408 protected:
    409     void bench(size_t num_loops) {
    410         for (size_t i = 0; i < num_loops; i++) {
    411             memcpy(_dst, _src, _size);
    412         }
    413     }
    414 };
    415 
    416 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
    417 public:
    418     SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
    419     virtual ~SingleBufferBandwidthBenchmark() {
    420         if (_buffer) {
    421             free(_buffer);
    422             _buffer = NULL;
    423         }
    424     }
    425 
    426     bool setSize(size_t size) {
    427         if (_buffer) {
    428             free(_buffer);
    429             _buffer = NULL;
    430         }
    431 
    432         if (_size == 0) {
    433             _size = DEFAULT_SINGLE_BUFFER_SIZE;
    434         } else {
    435             _size = size;
    436         }
    437 
    438         _buffer = reinterpret_cast<char*>(memalign(64, _size));
    439         if (!_buffer) {
    440             perror("Failed to allocate memory for test.");
    441             return false;
    442         }
    443         memset(_buffer, 0, _size);
    444 
    445         return true;
    446     }
    447 
    448     bool verify() { return true; }
    449 
    450 protected:
    451     char *_buffer;
    452 
    453     static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
    454 };
    455 
    456 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
    457 public:
    458     WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
    459     virtual ~WriteBandwidthBenchmark() { }
    460 
    461     bool verify() {
    462         memset(_buffer, 0, _size);
    463         bench(1);
    464         for (size_t i = 0; i < _size; i++) {
    465             if (_buffer[i] != 1) {
    466                 printf("Buffer failed to compare after one loop.\n");
    467                 return false;
    468             }
    469         }
    470 
    471         memset(_buffer, 0, _size);
    472         bench(2);
    473         for (size_t i = 0; i < _size; i++) {
    474             if (_buffer[i] != 2) {
    475                 printf("Buffer failed to compare after two loops.\n");
    476                 return false;
    477             }
    478         }
    479 
    480         return true;
    481     }
    482 };
    483 
    484 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
    485 public:
    486     WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
    487     virtual ~WriteStrdBenchmark() {}
    488 
    489     const char *getName() { return "strd"; }
    490 
    491 protected:
    492     // Write a given value using strd.
    493     void bench(size_t num_loops) {
    494         asm volatile(
    495             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
    496 
    497             "mov r0, %0\n"
    498             "mov r1, %1\n"
    499             "mov r2, %2\n"
    500 
    501             "mov r4, #0\n"
    502             "mov r5, #0\n"
    503 
    504             "0:\n"
    505             "mov r3, r1, lsr #5\n"
    506 
    507             "add r4, r4, #0x01010101\n"
    508             "mov r5, r4\n"
    509 
    510             "1:\n"
    511             "subs r3, r3, #1\n"
    512             "strd r4, r5, [r0]\n"
    513             "strd r4, r5, [r0, #8]\n"
    514             "strd r4, r5, [r0, #16]\n"
    515             "strd r4, r5, [r0, #24]\n"
    516             "add  r0, r0, #32\n"
    517             "bgt 1b\n"
    518 
    519             "sub r0, r0, r1\n"
    520             "subs r2, r2, #1\n"
    521             "bgt 0b\n"
    522 
    523             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
    524           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    525     }
    526 };
    527 
    528 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
    529 public:
    530     WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
    531     virtual ~WriteStmiaBenchmark() {}
    532 
    533     const char *getName() { return "stmia"; }
    534 
    535 protected:
    536       // Write a given value using stmia.
    537       void bench(size_t num_loops) {
    538           asm volatile(
    539               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
    540 
    541               "mov r0, %0\n"
    542               "mov r1, %1\n"
    543               "mov r2, %2\n"
    544 
    545               "mov r4, #0\n"
    546 
    547               "0:\n"
    548               "mov r3, r1, lsr #5\n"
    549 
    550               "add r4, r4, #0x01010101\n"
    551               "mov r5, r4\n"
    552               "mov r6, r4\n"
    553               "mov r7, r4\n"
    554               "mov r8, r4\n"
    555               "mov r9, r4\n"
    556               "mov r10, r4\n"
    557               "mov r11, r4\n"
    558 
    559               "1:\n"
    560               "subs r3, r3, #1\n"
    561               "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
    562               "bgt 1b\n"
    563 
    564               "sub r0, r0, r1\n"
    565               "subs r2, r2, #1\n"
    566               "bgt 0b\n"
    567 
    568               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
    569         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    570     }
    571 };
    572 
    573 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
    574 public:
    575     WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
    576     virtual ~WriteVst1Benchmark() {}
    577 
    578     const char *getName() { return "vst1"; }
    579 
    580     bool usesNeon() { return true; }
    581 
    582 protected:
    583     // Write a given value using vst.
    584     void bench(size_t num_loops) {
    585 #if defined(__ARM_NEON__)
    586         asm volatile(
    587             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    588 
    589             "mov r0, %0\n"
    590             "mov r1, %1\n"
    591             "mov r2, %2\n"
    592             "mov r4, #0\n"
    593 
    594             "0:\n"
    595             "mov r3, r1, lsr #5\n"
    596 
    597             "add r4, r4, #1\n"
    598             "vdup.8 d0, r4\n"
    599             "vmov d1, d0\n"
    600             "vmov d2, d0\n"
    601             "vmov d3, d0\n"
    602 
    603             "1:\n"
    604             "subs r3, r3, #1\n"
    605             "vst1.8 {d0-d3}, [r0:128]!\n"
    606             "bgt 1b\n"
    607 
    608             "sub r0, r0, r1\n"
    609             "subs r2, r2, #1\n"
    610             "bgt 0b\n"
    611 
    612             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    613         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    614 #endif
    615     }
    616 };
    617 
    618 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
    619 public:
    620     WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
    621     virtual ~WriteVstrBenchmark() {}
    622 
    623     const char *getName() { return "vstr"; }
    624 
    625     bool usesNeon() { return true; }
    626 
    627 protected:
    628     // Write a given value using vst.
    629     void bench(size_t num_loops) {
    630 #if defined(__ARM_NEON__)
    631         asm volatile(
    632             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    633 
    634             "mov r0, %0\n"
    635             "mov r1, %1\n"
    636             "mov r2, %2\n"
    637             "mov r4, #0\n"
    638 
    639             "0:\n"
    640             "mov r3, r1, lsr #5\n"
    641 
    642             "add r4, r4, #1\n"
    643             "vdup.8 d0, r4\n"
    644             "vmov d1, d0\n"
    645             "vmov d2, d0\n"
    646             "vmov d3, d0\n"
    647 
    648             "1:\n"
    649             "vstr d0, [r0, #0]\n"
    650             "subs r3, r3, #1\n"
    651             "vstr d1, [r0, #8]\n"
    652             "vstr d0, [r0, #16]\n"
    653             "vstr d1, [r0, #24]\n"
    654             "add r0, r0, #32\n"
    655             "bgt 1b\n"
    656 
    657             "sub r0, r0, r1\n"
    658             "subs r2, r2, #1\n"
    659             "bgt 0b\n"
    660 
    661             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    662         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    663 #endif
    664     }
    665 };
    666 
    667 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
    668 public:
    669     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
    670     virtual ~WriteVstmiaBenchmark() {}
    671 
    672     const char *getName() { return "vstmia"; }
    673 
    674     bool usesNeon() { return true; }
    675 
    676 protected:
    677     // Write a given value using vstmia.
    678     void bench(size_t num_loops) {
    679 #if defined(__ARM_NEON__)
    680         asm volatile(
    681             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
    682 
    683             "mov r0, %0\n"
    684             "mov r1, %1\n"
    685             "mov r2, %2\n"
    686             "mov r4, #0\n"
    687 
    688             "0:\n"
    689             "mov r3, r1, lsr #5\n"
    690 
    691             "add r4, r4, #1\n"
    692             "vdup.8 d0, r4\n"
    693             "vmov d1, d0\n"
    694             "vmov d2, d0\n"
    695             "vmov d3, d0\n"
    696 
    697             "1:\n"
    698             "subs r3, r3, #1\n"
    699             "vstmia r0!, {d0-d3}\n"
    700             "bgt 1b\n"
    701 
    702             "sub r0, r0, r1\n"
    703             "subs r2, r2, #1\n"
    704             "bgt 0b\n"
    705 
    706             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
    707         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    708 #endif
    709     }
    710 };
    711 
    712 class MemsetBenchmark : public WriteBandwidthBenchmark {
    713 public:
    714     MemsetBenchmark() : WriteBandwidthBenchmark() { }
    715     virtual ~MemsetBenchmark() {}
    716 
    717     const char *getName() { return "memset"; }
    718 
    719 protected:
    720     void bench(size_t num_loops) {
    721         for (size_t i = 0; i < num_loops; i++) {
    722             memset(_buffer, (i % 255) + 1, _size);
    723         }
    724     }
    725 };
    726 
    727 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
    728 public:
    729     ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
    730     virtual ~ReadLdrdBenchmark() {}
    731 
    732     const char *getName() { return "ldrd"; }
    733 
    734 protected:
    735     // Write a given value using strd.
    736     void bench(size_t num_loops) {
    737         asm volatile(
    738             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
    739 
    740             "mov r0, %0\n"
    741             "mov r1, %1\n"
    742             "mov r2, %2\n"
    743 
    744             "0:\n"
    745             "mov r3, r1, lsr #5\n"
    746 
    747             "1:\n"
    748             "subs r3, r3, #1\n"
    749             "ldrd r4, r5, [r0]\n"
    750             "ldrd r4, r5, [r0, #8]\n"
    751             "ldrd r4, r5, [r0, #16]\n"
    752             "ldrd r4, r5, [r0, #24]\n"
    753             "add  r0, r0, #32\n"
    754             "bgt 1b\n"
    755 
    756             "sub r0, r0, r1\n"
    757             "subs r2, r2, #1\n"
    758             "bgt 0b\n"
    759 
    760             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
    761           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    762     }
    763 };
    764 
    765 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
    766 public:
    767     ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
    768     virtual ~ReadLdmiaBenchmark() {}
    769 
    770     const char *getName() { return "ldmia"; }
    771 
    772 protected:
    773       // Write a given value using stmia.
    774       void bench(size_t num_loops) {
    775           asm volatile(
    776               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
    777 
    778               "mov r0, %0\n"
    779               "mov r1, %1\n"
    780               "mov r2, %2\n"
    781 
    782               "0:\n"
    783               "mov r3, r1, lsr #5\n"
    784 
    785               "1:\n"
    786               "subs r3, r3, #1\n"
    787               "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
    788               "bgt 1b\n"
    789 
    790               "sub r0, r0, r1\n"
    791               "subs r2, r2, #1\n"
    792               "bgt 0b\n"
    793 
    794               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
    795         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    796     }
    797 };
    798 
    799 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
    800 public:
    801     ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
    802     virtual ~ReadVld1Benchmark() {}
    803 
    804     const char *getName() { return "vld1"; }
    805 
    806     bool usesNeon() { return true; }
    807 
    808 protected:
    809     // Write a given value using vst.
    810     void bench(size_t num_loops) {
    811 #if defined(__ARM_NEON__)
    812         asm volatile(
    813             "stmfd sp!, {r0,r1,r2,r3}\n"
    814 
    815             "mov r0, %0\n"
    816             "mov r1, %1\n"
    817             "mov r2, %2\n"
    818 
    819             "0:\n"
    820             "mov r3, r1, lsr #5\n"
    821 
    822             "1:\n"
    823             "subs r3, r3, #1\n"
    824             "vld1.8 {d0-d3}, [r0:128]!\n"
    825             "bgt 1b\n"
    826 
    827             "sub r0, r0, r1\n"
    828             "subs r2, r2, #1\n"
    829             "bgt 0b\n"
    830 
    831             "ldmfd sp!, {r0,r1,r2,r3}\n"
    832         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    833 #endif
    834     }
    835 };
    836 
    837 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
    838 public:
    839     ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
    840     virtual ~ReadVldrBenchmark() {}
    841 
    842     const char *getName() { return "vldr"; }
    843 
    844     bool usesNeon() { return true; }
    845 
    846 protected:
    847     // Write a given value using vst.
    848     void bench(size_t num_loops) {
    849 #if defined(__ARM_NEON__)
    850         asm volatile(
    851             "stmfd sp!, {r0,r1,r2,r3}\n"
    852 
    853             "mov r0, %0\n"
    854             "mov r1, %1\n"
    855             "mov r2, %2\n"
    856 
    857             "0:\n"
    858             "mov r3, r1, lsr #5\n"
    859 
    860             "1:\n"
    861             "vldr d0, [r0, #0]\n"
    862             "subs r3, r3, #1\n"
    863             "vldr d1, [r0, #8]\n"
    864             "vldr d0, [r0, #16]\n"
    865             "vldr d1, [r0, #24]\n"
    866             "add r0, r0, #32\n"
    867             "bgt 1b\n"
    868 
    869             "sub r0, r0, r1\n"
    870             "subs r2, r2, #1\n"
    871             "bgt 0b\n"
    872 
    873             "ldmfd sp!, {r0,r1,r2,r3}\n"
    874         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    875 #endif
    876     }
    877 };
    878 
    879 
    880 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
    881 public:
    882     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
    883     virtual ~ReadVldmiaBenchmark() {}
    884 
    885     const char *getName() { return "vldmia"; }
    886 
    887     bool usesNeon() { return true; }
    888 
    889 protected:
    890     // Write a given value using vstmia.
    891     void bench(size_t num_loops) {
    892 #if defined(__ARM_NEON__)
    893         asm volatile(
    894             "stmfd sp!, {r0,r1,r2,r3}\n"
    895 
    896             "mov r0, %0\n"
    897             "mov r1, %1\n"
    898             "mov r2, %2\n"
    899 
    900             "0:\n"
    901             "mov r3, r1, lsr #5\n"
    902 
    903             "1:\n"
    904             "subs r3, r3, #1\n"
    905             "vldmia r0!, {d0-d3}\n"
    906             "bgt 1b\n"
    907 
    908             "sub r0, r0, r1\n"
    909             "subs r2, r2, #1\n"
    910             "bgt 0b\n"
    911 
    912             "ldmfd sp!, {r0,r1,r2,r3}\n"
    913         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
    914 #endif
    915     }
    916 };
    917 
    918 #endif  // __BANDWIDTH_H__
    919