1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef __BANDWIDTH_H__ 18 #define __BANDWIDTH_H__ 19 20 #include "memtest.h" 21 22 // Bandwidth Class definitions. 23 class BandwidthBenchmark { 24 public: 25 BandwidthBenchmark() 26 : _size(0), 27 _num_warm_loops(DEFAULT_NUM_WARM_LOOPS), 28 _num_loops(DEFAULT_NUM_LOOPS) {} 29 virtual ~BandwidthBenchmark() {} 30 31 bool run() { 32 if (_size == 0) { 33 return false; 34 } 35 if (!canRun()) { 36 return false; 37 } 38 39 bench(_num_warm_loops); 40 41 nsecs_t t = system_time(); 42 bench(_num_loops); 43 t = system_time() - t; 44 45 _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC); 46 47 return true; 48 } 49 50 bool canRun() { return !usesNeon() || isNeonSupported(); } 51 52 virtual bool setSize(size_t size) = 0; 53 54 virtual const char *getName() = 0; 55 56 virtual bool verify() = 0; 57 58 virtual bool usesNeon() { return false; } 59 60 bool isNeonSupported() { 61 #if defined(__ARM_NEON__) 62 return true; 63 #else 64 return false; 65 #endif 66 } 67 68 // Accessors/mutators. 69 double mb_per_sec() { return _mb_per_sec; } 70 size_t num_warm_loops() { return _num_warm_loops; } 71 size_t num_loops() { return _num_loops; } 72 size_t size() { return _size; } 73 74 void set_num_warm_loops(size_t num_warm_loops) { 75 _num_warm_loops = num_warm_loops; 76 } 77 void set_num_loops(size_t num_loops) { _num_loops = num_loops; } 78 79 // Static constants 80 static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000; 81 static const unsigned int DEFAULT_NUM_LOOPS = 20000000; 82 83 protected: 84 virtual void bench(size_t num_loops) = 0; 85 86 double _mb_per_sec; 87 size_t _size; 88 size_t _num_warm_loops; 89 size_t _num_loops; 90 91 private: 92 // Static constants 93 static const double _NUM_NS_PER_SEC = 1000000000.0; 94 static const double _BYTES_PER_MB = 1024.0* 1024.0; 95 }; 96 97 class CopyBandwidthBenchmark : public BandwidthBenchmark { 98 public: 99 CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { } 100 101 bool setSize(size_t size) { 102 if (_src) { 103 free(_src); 104 } 105 if (_dst) { 106 free(_dst); 107 } 108 109 if (size == 0) { 110 _size = DEFAULT_COPY_SIZE; 111 } else { 112 _size = size; 113 } 114 115 _src = reinterpret_cast<char*>(memalign(64, _size)); 116 if (!_src) { 117 perror("Failed to allocate memory for test."); 118 return false; 119 } 120 _dst = reinterpret_cast<char*>(memalign(64, _size)); 121 if (!_dst) { 122 perror("Failed to allocate memory for test."); 123 return false; 124 } 125 126 return true; 127 } 128 virtual ~CopyBandwidthBenchmark() { 129 if (_src) { 130 free(_src); 131 _src = NULL; 132 } 133 if (_dst) { 134 free(_dst); 135 _dst = NULL; 136 } 137 } 138 139 bool verify() { 140 memset(_src, 0x23, _size); 141 memset(_dst, 0, _size); 142 bench(1); 143 if (memcmp(_src, _dst, _size) != 0) { 144 printf("Buffers failed to compare after one loop.\n"); 145 return false; 146 } 147 148 memset(_src, 0x23, _size); 149 memset(_dst, 0, _size); 150 _num_loops = 2; 151 bench(2); 152 if (memcmp(_src, _dst, _size) != 0) { 153 printf("Buffers failed to compare after two loops.\n"); 154 return false; 155 } 156 157 return true; 158 } 159 160 protected: 161 char *_src; 162 char *_dst; 163 164 static const unsigned int DEFAULT_COPY_SIZE = 8000; 165 }; 166 167 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark { 168 public: 169 CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { } 170 virtual ~CopyLdrdStrdBenchmark() {} 171 172 const char *getName() { return "ldrd/strd"; } 173 174 protected: 175 // Copy using ldrd/strd instructions. 176 void bench(size_t num_loops) { 177 asm volatile( 178 "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n" 179 180 "mov r0, %0\n" 181 "mov r1, %1\n" 182 "mov r2, %2\n" 183 "mov r3, %3\n" 184 185 "0:\n" 186 "mov r4, r2, lsr #6\n" 187 188 "1:\n" 189 "ldrd r6, r7, [r0]\n" 190 "strd r6, r7, [r1]\n" 191 "ldrd r6, r7, [r0, #8]\n" 192 "strd r6, r7, [r1, #8]\n" 193 "ldrd r6, r7, [r0, #16]\n" 194 "strd r6, r7, [r1, #16]\n" 195 "ldrd r6, r7, [r0, #24]\n" 196 "strd r6, r7, [r1, #24]\n" 197 "ldrd r6, r7, [r0, #32]\n" 198 "strd r6, r7, [r1, #32]\n" 199 "ldrd r6, r7, [r0, #40]\n" 200 "strd r6, r7, [r1, #40]\n" 201 "ldrd r6, r7, [r0, #48]\n" 202 "strd r6, r7, [r1, #48]\n" 203 "ldrd r6, r7, [r0, #56]\n" 204 "strd r6, r7, [r1, #56]\n" 205 206 "add r0, r0, #64\n" 207 "add r1, r1, #64\n" 208 "subs r4, r4, #1\n" 209 "bgt 1b\n" 210 211 "sub r0, r0, r2\n" 212 "sub r1, r1, r2\n" 213 "subs r3, r3, #1\n" 214 "bgt 0b\n" 215 216 "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n" 217 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 218 } 219 }; 220 221 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark { 222 public: 223 CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { } 224 virtual ~CopyLdmiaStmiaBenchmark() {} 225 226 const char *getName() { return "ldmia/stmia"; } 227 228 protected: 229 // Copy using ldmia/stmia instructions. 230 void bench(size_t num_loops) { 231 asm volatile( 232 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n" 233 234 "mov r0, %0\n" 235 "mov r1, %1\n" 236 "mov r2, %2\n" 237 "mov r3, %3\n" 238 239 "0:\n" 240 "mov r4, r2, lsr #6\n" 241 242 "1:\n" 243 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 244 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 245 "subs r4, r4, #1\n" 246 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 247 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 248 "bgt 1b\n" 249 250 "sub r0, r0, r2\n" 251 "sub r1, r1, r2\n" 252 "subs r3, r3, #1\n" 253 "bgt 0b\n" 254 255 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n" 256 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 257 } 258 }; 259 260 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark { 261 public: 262 CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { } 263 virtual ~CopyVld1Vst1Benchmark() {} 264 265 const char *getName() { return "vld1/vst1"; } 266 267 bool usesNeon() { return true; } 268 269 protected: 270 // Copy using vld1/vst1 instructions. 271 void bench(size_t num_loops) { 272 #if defined(__ARM_NEON__) 273 asm volatile( 274 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 275 276 "mov r0, %0\n" 277 "mov r1, %1\n" 278 "mov r2, %2\n" 279 "mov r3, %3\n" 280 281 "0:\n" 282 "mov r4, r2, lsr #6\n" 283 284 "1:\n" 285 "vld1.8 {d0-d3}, [r0]!\n" 286 "vld1.8 {d4-d7}, [r0]!\n" 287 "subs r4, r4, #1\n" 288 "vst1.8 {d0-d3}, [r1:128]!\n" 289 "vst1.8 {d4-d7}, [r1:128]!\n" 290 "bgt 1b\n" 291 292 "sub r0, r0, r2\n" 293 "sub r1, r1, r2\n" 294 "subs r3, r3, #1\n" 295 "bgt 0b\n" 296 297 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 298 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 299 #endif 300 } 301 }; 302 303 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark { 304 public: 305 CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { } 306 virtual ~CopyVldrVstrBenchmark() {} 307 308 const char *getName() { return "vldr/vstr"; } 309 310 bool usesNeon() { return true; } 311 312 protected: 313 // Copy using vldr/vstr instructions. 314 void bench(size_t num_loops) { 315 #if defined(__ARM_NEON__) 316 asm volatile( 317 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 318 319 "mov r0, %0\n" 320 "mov r1, %1\n" 321 "mov r2, %2\n" 322 "mov r3, %3\n" 323 324 "0:\n" 325 "mov r4, r2, lsr #6\n" 326 327 "1:\n" 328 "vldr d0, [r0, #0]\n" 329 "subs r4, r4, #1\n" 330 "vldr d1, [r0, #8]\n" 331 "vstr d0, [r1, #0]\n" 332 "vldr d0, [r0, #16]\n" 333 "vstr d1, [r1, #8]\n" 334 "vldr d1, [r0, #24]\n" 335 "vstr d0, [r1, #16]\n" 336 "vldr d0, [r0, #32]\n" 337 "vstr d1, [r1, #24]\n" 338 "vldr d1, [r0, #40]\n" 339 "vstr d0, [r1, #32]\n" 340 "vldr d0, [r0, #48]\n" 341 "vstr d1, [r1, #40]\n" 342 "vldr d1, [r0, #56]\n" 343 "vstr d0, [r1, #48]\n" 344 "add r0, r0, #64\n" 345 "vstr d1, [r1, #56]\n" 346 "add r1, r1, #64\n" 347 "bgt 1b\n" 348 349 "sub r0, r0, r2\n" 350 "sub r1, r1, r2\n" 351 "subs r3, r3, #1\n" 352 "bgt 0b\n" 353 354 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 355 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 356 #endif 357 } 358 }; 359 360 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark { 361 public: 362 CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { } 363 virtual ~CopyVldmiaVstmiaBenchmark() {} 364 365 const char *getName() { return "vldmia/vstmia"; } 366 367 bool usesNeon() { return true; } 368 369 protected: 370 // Copy using vldmia/vstmia instructions. 371 void bench(size_t num_loops) { 372 #if defined(__ARM_NEON__) 373 asm volatile( 374 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 375 376 "mov r0, %0\n" 377 "mov r1, %1\n" 378 "mov r2, %2\n" 379 "mov r3, %3\n" 380 381 "0:\n" 382 "mov r4, r2, lsr #6\n" 383 384 "1:\n" 385 "vldmia r0!, {d0-d7}\n" 386 "subs r4, r4, #1\n" 387 "vstmia r1!, {d0-d7}\n" 388 "bgt 1b\n" 389 390 "sub r0, r0, r2\n" 391 "sub r1, r1, r2\n" 392 "subs r3, r3, #1\n" 393 "bgt 0b\n" 394 395 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 396 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 397 #endif 398 } 399 }; 400 401 class MemcpyBenchmark : public CopyBandwidthBenchmark { 402 public: 403 MemcpyBenchmark() : CopyBandwidthBenchmark() { } 404 virtual ~MemcpyBenchmark() {} 405 406 const char *getName() { return "memcpy"; } 407 408 protected: 409 void bench(size_t num_loops) { 410 for (size_t i = 0; i < num_loops; i++) { 411 memcpy(_dst, _src, _size); 412 } 413 } 414 }; 415 416 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark { 417 public: 418 SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { } 419 virtual ~SingleBufferBandwidthBenchmark() { 420 if (_buffer) { 421 free(_buffer); 422 _buffer = NULL; 423 } 424 } 425 426 bool setSize(size_t size) { 427 if (_buffer) { 428 free(_buffer); 429 _buffer = NULL; 430 } 431 432 if (_size == 0) { 433 _size = DEFAULT_SINGLE_BUFFER_SIZE; 434 } else { 435 _size = size; 436 } 437 438 _buffer = reinterpret_cast<char*>(memalign(64, _size)); 439 if (!_buffer) { 440 perror("Failed to allocate memory for test."); 441 return false; 442 } 443 memset(_buffer, 0, _size); 444 445 return true; 446 } 447 448 bool verify() { return true; } 449 450 protected: 451 char *_buffer; 452 453 static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000; 454 }; 455 456 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark { 457 public: 458 WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { } 459 virtual ~WriteBandwidthBenchmark() { } 460 461 bool verify() { 462 memset(_buffer, 0, _size); 463 bench(1); 464 for (size_t i = 0; i < _size; i++) { 465 if (_buffer[i] != 1) { 466 printf("Buffer failed to compare after one loop.\n"); 467 return false; 468 } 469 } 470 471 memset(_buffer, 0, _size); 472 bench(2); 473 for (size_t i = 0; i < _size; i++) { 474 if (_buffer[i] != 2) { 475 printf("Buffer failed to compare after two loops.\n"); 476 return false; 477 } 478 } 479 480 return true; 481 } 482 }; 483 484 class WriteStrdBenchmark : public WriteBandwidthBenchmark { 485 public: 486 WriteStrdBenchmark() : WriteBandwidthBenchmark() { } 487 virtual ~WriteStrdBenchmark() {} 488 489 const char *getName() { return "strd"; } 490 491 protected: 492 // Write a given value using strd. 493 void bench(size_t num_loops) { 494 asm volatile( 495 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 496 497 "mov r0, %0\n" 498 "mov r1, %1\n" 499 "mov r2, %2\n" 500 501 "mov r4, #0\n" 502 "mov r5, #0\n" 503 504 "0:\n" 505 "mov r3, r1, lsr #5\n" 506 507 "add r4, r4, #0x01010101\n" 508 "mov r5, r4\n" 509 510 "1:\n" 511 "subs r3, r3, #1\n" 512 "strd r4, r5, [r0]\n" 513 "strd r4, r5, [r0, #8]\n" 514 "strd r4, r5, [r0, #16]\n" 515 "strd r4, r5, [r0, #24]\n" 516 "add r0, r0, #32\n" 517 "bgt 1b\n" 518 519 "sub r0, r0, r1\n" 520 "subs r2, r2, #1\n" 521 "bgt 0b\n" 522 523 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 524 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 525 } 526 }; 527 528 class WriteStmiaBenchmark : public WriteBandwidthBenchmark { 529 public: 530 WriteStmiaBenchmark() : WriteBandwidthBenchmark() { } 531 virtual ~WriteStmiaBenchmark() {} 532 533 const char *getName() { return "stmia"; } 534 535 protected: 536 // Write a given value using stmia. 537 void bench(size_t num_loops) { 538 asm volatile( 539 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 540 541 "mov r0, %0\n" 542 "mov r1, %1\n" 543 "mov r2, %2\n" 544 545 "mov r4, #0\n" 546 547 "0:\n" 548 "mov r3, r1, lsr #5\n" 549 550 "add r4, r4, #0x01010101\n" 551 "mov r5, r4\n" 552 "mov r6, r4\n" 553 "mov r7, r4\n" 554 "mov r8, r4\n" 555 "mov r9, r4\n" 556 "mov r10, r4\n" 557 "mov r11, r4\n" 558 559 "1:\n" 560 "subs r3, r3, #1\n" 561 "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" 562 "bgt 1b\n" 563 564 "sub r0, r0, r1\n" 565 "subs r2, r2, #1\n" 566 "bgt 0b\n" 567 568 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 569 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 570 } 571 }; 572 573 class WriteVst1Benchmark : public WriteBandwidthBenchmark { 574 public: 575 WriteVst1Benchmark() : WriteBandwidthBenchmark() { } 576 virtual ~WriteVst1Benchmark() {} 577 578 const char *getName() { return "vst1"; } 579 580 bool usesNeon() { return true; } 581 582 protected: 583 // Write a given value using vst. 584 void bench(size_t num_loops) { 585 #if defined(__ARM_NEON__) 586 asm volatile( 587 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 588 589 "mov r0, %0\n" 590 "mov r1, %1\n" 591 "mov r2, %2\n" 592 "mov r4, #0\n" 593 594 "0:\n" 595 "mov r3, r1, lsr #5\n" 596 597 "add r4, r4, #1\n" 598 "vdup.8 d0, r4\n" 599 "vmov d1, d0\n" 600 "vmov d2, d0\n" 601 "vmov d3, d0\n" 602 603 "1:\n" 604 "subs r3, r3, #1\n" 605 "vst1.8 {d0-d3}, [r0:128]!\n" 606 "bgt 1b\n" 607 608 "sub r0, r0, r1\n" 609 "subs r2, r2, #1\n" 610 "bgt 0b\n" 611 612 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 613 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 614 #endif 615 } 616 }; 617 618 class WriteVstrBenchmark : public WriteBandwidthBenchmark { 619 public: 620 WriteVstrBenchmark() : WriteBandwidthBenchmark() { } 621 virtual ~WriteVstrBenchmark() {} 622 623 const char *getName() { return "vstr"; } 624 625 bool usesNeon() { return true; } 626 627 protected: 628 // Write a given value using vst. 629 void bench(size_t num_loops) { 630 #if defined(__ARM_NEON__) 631 asm volatile( 632 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 633 634 "mov r0, %0\n" 635 "mov r1, %1\n" 636 "mov r2, %2\n" 637 "mov r4, #0\n" 638 639 "0:\n" 640 "mov r3, r1, lsr #5\n" 641 642 "add r4, r4, #1\n" 643 "vdup.8 d0, r4\n" 644 "vmov d1, d0\n" 645 "vmov d2, d0\n" 646 "vmov d3, d0\n" 647 648 "1:\n" 649 "vstr d0, [r0, #0]\n" 650 "subs r3, r3, #1\n" 651 "vstr d1, [r0, #8]\n" 652 "vstr d0, [r0, #16]\n" 653 "vstr d1, [r0, #24]\n" 654 "add r0, r0, #32\n" 655 "bgt 1b\n" 656 657 "sub r0, r0, r1\n" 658 "subs r2, r2, #1\n" 659 "bgt 0b\n" 660 661 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 662 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 663 #endif 664 } 665 }; 666 667 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark { 668 public: 669 WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { } 670 virtual ~WriteVstmiaBenchmark() {} 671 672 const char *getName() { return "vstmia"; } 673 674 bool usesNeon() { return true; } 675 676 protected: 677 // Write a given value using vstmia. 678 void bench(size_t num_loops) { 679 #if defined(__ARM_NEON__) 680 asm volatile( 681 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 682 683 "mov r0, %0\n" 684 "mov r1, %1\n" 685 "mov r2, %2\n" 686 "mov r4, #0\n" 687 688 "0:\n" 689 "mov r3, r1, lsr #5\n" 690 691 "add r4, r4, #1\n" 692 "vdup.8 d0, r4\n" 693 "vmov d1, d0\n" 694 "vmov d2, d0\n" 695 "vmov d3, d0\n" 696 697 "1:\n" 698 "subs r3, r3, #1\n" 699 "vstmia r0!, {d0-d3}\n" 700 "bgt 1b\n" 701 702 "sub r0, r0, r1\n" 703 "subs r2, r2, #1\n" 704 "bgt 0b\n" 705 706 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 707 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 708 #endif 709 } 710 }; 711 712 class MemsetBenchmark : public WriteBandwidthBenchmark { 713 public: 714 MemsetBenchmark() : WriteBandwidthBenchmark() { } 715 virtual ~MemsetBenchmark() {} 716 717 const char *getName() { return "memset"; } 718 719 protected: 720 void bench(size_t num_loops) { 721 for (size_t i = 0; i < num_loops; i++) { 722 memset(_buffer, (i % 255) + 1, _size); 723 } 724 } 725 }; 726 727 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark { 728 public: 729 ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { } 730 virtual ~ReadLdrdBenchmark() {} 731 732 const char *getName() { return "ldrd"; } 733 734 protected: 735 // Write a given value using strd. 736 void bench(size_t num_loops) { 737 asm volatile( 738 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 739 740 "mov r0, %0\n" 741 "mov r1, %1\n" 742 "mov r2, %2\n" 743 744 "0:\n" 745 "mov r3, r1, lsr #5\n" 746 747 "1:\n" 748 "subs r3, r3, #1\n" 749 "ldrd r4, r5, [r0]\n" 750 "ldrd r4, r5, [r0, #8]\n" 751 "ldrd r4, r5, [r0, #16]\n" 752 "ldrd r4, r5, [r0, #24]\n" 753 "add r0, r0, #32\n" 754 "bgt 1b\n" 755 756 "sub r0, r0, r1\n" 757 "subs r2, r2, #1\n" 758 "bgt 0b\n" 759 760 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 761 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 762 } 763 }; 764 765 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark { 766 public: 767 ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { } 768 virtual ~ReadLdmiaBenchmark() {} 769 770 const char *getName() { return "ldmia"; } 771 772 protected: 773 // Write a given value using stmia. 774 void bench(size_t num_loops) { 775 asm volatile( 776 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 777 778 "mov r0, %0\n" 779 "mov r1, %1\n" 780 "mov r2, %2\n" 781 782 "0:\n" 783 "mov r3, r1, lsr #5\n" 784 785 "1:\n" 786 "subs r3, r3, #1\n" 787 "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" 788 "bgt 1b\n" 789 790 "sub r0, r0, r1\n" 791 "subs r2, r2, #1\n" 792 "bgt 0b\n" 793 794 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 795 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 796 } 797 }; 798 799 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark { 800 public: 801 ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { } 802 virtual ~ReadVld1Benchmark() {} 803 804 const char *getName() { return "vld1"; } 805 806 bool usesNeon() { return true; } 807 808 protected: 809 // Write a given value using vst. 810 void bench(size_t num_loops) { 811 #if defined(__ARM_NEON__) 812 asm volatile( 813 "stmfd sp!, {r0,r1,r2,r3}\n" 814 815 "mov r0, %0\n" 816 "mov r1, %1\n" 817 "mov r2, %2\n" 818 819 "0:\n" 820 "mov r3, r1, lsr #5\n" 821 822 "1:\n" 823 "subs r3, r3, #1\n" 824 "vld1.8 {d0-d3}, [r0:128]!\n" 825 "bgt 1b\n" 826 827 "sub r0, r0, r1\n" 828 "subs r2, r2, #1\n" 829 "bgt 0b\n" 830 831 "ldmfd sp!, {r0,r1,r2,r3}\n" 832 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 833 #endif 834 } 835 }; 836 837 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark { 838 public: 839 ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { } 840 virtual ~ReadVldrBenchmark() {} 841 842 const char *getName() { return "vldr"; } 843 844 bool usesNeon() { return true; } 845 846 protected: 847 // Write a given value using vst. 848 void bench(size_t num_loops) { 849 #if defined(__ARM_NEON__) 850 asm volatile( 851 "stmfd sp!, {r0,r1,r2,r3}\n" 852 853 "mov r0, %0\n" 854 "mov r1, %1\n" 855 "mov r2, %2\n" 856 857 "0:\n" 858 "mov r3, r1, lsr #5\n" 859 860 "1:\n" 861 "vldr d0, [r0, #0]\n" 862 "subs r3, r3, #1\n" 863 "vldr d1, [r0, #8]\n" 864 "vldr d0, [r0, #16]\n" 865 "vldr d1, [r0, #24]\n" 866 "add r0, r0, #32\n" 867 "bgt 1b\n" 868 869 "sub r0, r0, r1\n" 870 "subs r2, r2, #1\n" 871 "bgt 0b\n" 872 873 "ldmfd sp!, {r0,r1,r2,r3}\n" 874 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 875 #endif 876 } 877 }; 878 879 880 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark { 881 public: 882 ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { } 883 virtual ~ReadVldmiaBenchmark() {} 884 885 const char *getName() { return "vldmia"; } 886 887 bool usesNeon() { return true; } 888 889 protected: 890 // Write a given value using vstmia. 891 void bench(size_t num_loops) { 892 #if defined(__ARM_NEON__) 893 asm volatile( 894 "stmfd sp!, {r0,r1,r2,r3}\n" 895 896 "mov r0, %0\n" 897 "mov r1, %1\n" 898 "mov r2, %2\n" 899 900 "0:\n" 901 "mov r3, r1, lsr #5\n" 902 903 "1:\n" 904 "subs r3, r3, #1\n" 905 "vldmia r0!, {d0-d3}\n" 906 "bgt 1b\n" 907 908 "sub r0, r0, r1\n" 909 "subs r2, r2, #1\n" 910 "bgt 0b\n" 911 912 "ldmfd sp!, {r0,r1,r2,r3}\n" 913 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 914 #endif 915 } 916 }; 917 918 #endif // __BANDWIDTH_H__ 919